diff --git a/.gitignore b/.gitignore
index 3bfe751..2a0ee24 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
docs/generated/
docs/auto_examples/
docs/modules/
+docs/sg_execution_times.rst
# Byte-compiled / optimized / DLL files
__pycache__/
diff --git a/MANIFEST.in b/MANIFEST.in
index 09d993a..8bd58d4 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,3 +1,5 @@
include README.rst LICENSE environment.yml requirements.txt
-recursive-include netneurotools/data *
include versioneer.py
+include netneurotools/datasets/datasets.json
+include netneurotools/datasets/references.json
+include netneurotools/datasets/netneurotools.bib
\ No newline at end of file
diff --git a/README.rst b/README.rst
index e8a6f50..49b7f88 100644
--- a/README.rst
+++ b/README.rst
@@ -26,53 +26,60 @@ netneurotools: Tools for network neuroscience
|
This toolbox is a collection of functions written in Python that get frequent
-usage in the `Network Neuroscience Lab `_, housed in
+usage in the `Network Neuroscience Lab `_, housed in
the `Brain Imaging Centre `_ at
`McGill University `_.
-.. _installation:
-
-Installation
-------------
-
-Install directly from PyPi with :code:`pip install netneurotools` or install the main branch with
-
-.. code-block:: bash
-
- git clone https://github.com/netneurolab/netneurotools.git
- cd netneurotools
- pip install .
.. _features:
Features
--------
-* Network neuroscience metrics: up-to-date and optimized
+Netneurotools provides a wide range of tools for network neuroscience research.
- * Network communication
- * Null networks
+* A range of useful datasets fetchers
-* Brain plotting functions: easy to use and customize
+* Network construction: empirical and surrogate
- * Surface visualization
- `plot_fsaverage `_
- and `plot_fslr `_
- * 3D point brain `plot_point_brain `_
- * Sorted communities `plot_mod_heatmap `_
+* Network (graph) metrics calculation: up-to-date and optimized
-* Statistics functions
+* Brain and network visualization
- * Dominance analysis `get_dominance_stats `_
+* Optimized statistics routines
-* Fetchers for common datasets
+* Convenient interface for external tools
-* Utilities for working with FreeSurfer and CIVET
+* And much more!
Check out our `documentation `_
for more information!
+
+.. _installation:
+
+Installation
+------------
+
+You can install directly from PyPi with :code:`pip install netneurotools`.
+
+This package is under active development. We recommend installing the latest version
+with
+
+.. code-block:: bash
+
+ pip install git+https://github.com/netneurolab/netneurotools.git
+
+
+If you are looking for the earlier version of the toolbox before the recent breaking changes,
+you can install it with
+
+.. code-block:: bash
+
+ pip install git+https://github.com/netneurolab/netneurotools.git@0.2.X
+
+
.. _development:
Development
@@ -85,6 +92,7 @@ you've found a bug, are experiencing a problem, or have a question, create a
new `issue `_ with some
information about it and one of our team members will do our best to help you.
+
.. _licensing:
License Information
diff --git a/dev_environment.yml b/dev_environment.yml
deleted file mode 100644
index dd89823..0000000
--- a/dev_environment.yml
+++ /dev/null
@@ -1,26 +0,0 @@
-name: netneurotools
-channels:
- - defaults
- - conda-forge
-dependencies:
- - python>=3.6
- - flake8
- - matplotlib
- - mayavi
- - nibabel
- - nilearn
- - numba
- - "numpy>=1.16"
- - pandas
- - pip
- - "pytest>=3.6"
- - pytest-cov
- - scikit-learn
- - "scipy>=1.4.0"
- - "sphinx>=1.2"
- - sphinx-gallery
- - sphinx_rtd_theme
- - versioneer
- - pip:
- - git+https://github.com/aestrivex/bctpy.git#egg=bctpy
- - pysurfer
diff --git a/docs/api.rst b/docs/api.rst
index a5adc5c..a537069 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -8,71 +8,106 @@ Python Reference API
.. contents:: **List of modules**
:local:
-.. _ref_network:
+.. _ref_datasets:
-:mod:`netneurotools.networks` - Constructing networks
------------------------------------------------------
+:mod:`netneurotools.datasets` - Automatic dataset fetching
+----------------------------------------------------------
-.. automodule:: netneurotools.networks
+.. automodule:: netneurotools.datasets
:no-members:
:no-inherited-members:
-.. currentmodule:: netneurotools.networks
+.. currentmodule:: netneurotools.datasets
+
+To download templates
.. autosummary::
:template: function.rst
:toctree: generated/
- func_consensus
- struct_consensus
- threshold_network
- binarize_network
- match_length_degree_distribution
- randmio_und
- strength_preserving_rand_sa
- strength_preserving_rand_sa_mse_opt
- strength_preserving_rand_sa_dir
-.. _ref_modularity:
+ fetch_fsaverage
+ fetch_fsaverage_curated
+ fetch_hcp_standards
+ fetch_fslr_curated
+ fetch_civet
+ fetch_civet_curated
+ fetch_conte69
+ fetch_yerkes19
-:mod:`netneurotools.modularity` - Calculating network modularity
-----------------------------------------------------------------
+To download atlases
-.. automodule:: netneurotools.modularity
- :no-members:
- :no-inherited-members:
+.. autosummary::
+ :template: function.rst
+ :toctree: generated/
-.. currentmodule:: netneurotools.modularity
+ fetch_cammoun2012
+ fetch_schaefer2018
+ fetch_mmpall
+ fetch_pauli2018
+ fetch_ye2020
+ fetch_voneconomo
+
+To download project-related data
.. autosummary::
:template: function.rst
:toctree: generated/
- consensus_modularity
- zrand
- get_modularity
- get_modularity_z
- get_modularity_sig
+ fetch_vazquez_rodriguez2019
+ fetch_mirchi2018
+ fetch_hansen_manynetworks
+ fetch_hansen_receptors
+ fetch_hansen_genescognition
+ fetch_hansen_brainstemfc
+ fetch_shafiei_megfmrimapping
+ fetch_shafiei_megdynamics
+ fetch_suarez_mami
+ fetch_famous_gmat
+ fetch_neurosynth
+
-.. _ref_cluster:
+.. _ref_network:
-:mod:`netneurotools.cluster` - Working with clusters
-----------------------------------------------------
+:mod:`netneurotools.networks` - Constructing networks
+-----------------------------------------------------
-.. automodule:: netneurotools.cluster
+.. automodule:: netneurotools.networks
:no-members:
:no-inherited-members:
-.. currentmodule:: netneurotools.cluster
+.. currentmodule:: netneurotools.networks
+
+To construct consensus networks
.. autosummary::
:template: function.rst
:toctree: generated/
- find_consensus
- match_assignments
- reorder_assignments
- match_cluster_labels
+ func_consensus
+ struct_consensus
+
+To randomize networks
+
+.. autosummary::
+ :template: function.rst
+ :toctree: generated/
+
+ randmio_und
+ match_length_degree_distribution
+ strength_preserving_rand_sa
+ strength_preserving_rand_sa_mse_opt
+ strength_preserving_rand_sa_dir
+
+Convenient functions
+
+.. autosummary::
+ :template: function.rst
+ :toctree: generated/
+
+ binarize_network
+ threshold_network
+
.. _ref_plotting:
@@ -85,50 +120,42 @@ Python Reference API
.. currentmodule:: netneurotools.plotting
+Pyvista
+
+.. autosummary::
+ :template: function.rst
+ :toctree: generated/
+
+ pv_plot_surface
+
+PySurfer (deprecated)
+
.. autosummary::
:template: function.rst
:toctree: generated/
- sort_communities
- plot_mod_heatmap
plot_conte69
plot_fslr
plot_fsaverage
plot_fsvertex
- plot_point_brain
-.. _ref_stats:
+matplotlib
-:mod:`netneurotools.stats` - General statistics functions
----------------------------------------------------------
+.. autosummary::
+ :template: function.rst
+ :toctree: generated/
-.. automodule:: netneurotools.stats
- :no-members:
- :no-inherited-members:
+ plot_point_brain
+ plot_mod_heatmap
-.. currentmodule:: netneurotools.stats
+Fun color & colormap stuff
.. autosummary::
:template: function.rst
:toctree: generated/
- gen_spinsamples
- residualize
- get_mad_outliers
- efficient_pearsonr
- permtest_1samp
- permtest_rel
- permtest_pearsonr
- get_dominance_stats
- network_pearsonr
- network_pearsonr_numba
- network_pearsonr_pairwise
- effective_resistance
- network_polarisation
- network_variance
- network_variance_numba
- network_covariance
- network_covariance_numba
+ available_cmaps
+
.. _ref_metrics:
@@ -141,146 +168,172 @@ Python Reference API
.. currentmodule:: netneurotools.metrics
+Brain network metrics
+
.. autosummary::
:template: function.rst
:toctree: generated/
- _binarize
degrees_und
degrees_dir
distance_wei_floyd
retrieve_shortest_path
- communicability_bin
- communicability_wei
- rich_feeder_peripheral
navigation_wu
get_navigation_path_length
- search_information
+ communicability_bin
+ communicability_wei
path_transitivity
- flow_graph
+ search_information
mean_first_passage_time
diffusion_efficiency
resource_efficiency_bin
+ flow_graph
+ assortativity
matching_ind_und
- _graph_laplacian
-
-.. _ref_datasets:
+ rich_feeder_peripheral
-:mod:`netneurotools.datasets` - Automatic dataset fetching
-----------------------------------------------------------
+Network spreading
-.. automodule:: netneurotools.datasets
- :no-members:
- :no-inherited-members:
+.. autosummary::
+ :template: function.rst
+ :toctree: generated/
-.. currentmodule:: netneurotools.datasets
+ simulate_atrophy
-Functions to download atlases and templates
+Statistical network metrics
.. autosummary::
:template: function.rst
:toctree: generated/
- fetch_cammoun2012
- fetch_civet
- fetch_conte69
- fetch_fsaverage
- fetch_pauli2018
- fetch_schaefer2018
- fetch_hcp_standards
- fetch_voneconomo
+ network_pearsonr
+ network_pearsonr_numba
+ network_pearsonr_pairwise
+ effective_resistance
+ network_polarisation
+ network_variance
+ network_variance_numba
+ network_covariance
+ network_covariance_numba
+
-Functions to download real-world datasets
+.. _ref_modularity:
+
+:mod:`netneurotools.modularity` - Calculating network modularity
+----------------------------------------------------------------
+
+.. automodule:: netneurotools.modularity
+ :no-members:
+ :no-inherited-members:
+
+.. currentmodule:: netneurotools.modularity
.. autosummary::
:template: function.rst
:toctree: generated/
- fetch_connectome
- fetch_mirchi2018
- fetch_vazquez_rodriguez2019
+ match_cluster_labels
+ match_assignments
+ reorder_assignments
+ find_consensus
+ consensus_modularity
+ zrand
+ get_modularity
+ get_modularity_z
+ get_modularity_sig
+
+
+.. _ref_stats:
-Functions to generate (pseudo-random) datasets
+:mod:`netneurotools.stats` - General statistics functions
+---------------------------------------------------------
+
+.. automodule:: netneurotools.stats
+ :no-members:
+ :no-inherited-members:
+
+.. currentmodule:: netneurotools.stats
+
+Correlations
.. autosummary::
:template: function.rst
:toctree: generated/
- make_correlated_xy
+ efficient_pearsonr
+ weighted_pearsonr
+ make_correlated_xy
-.. _ref_freesurfer:
+Permutation tests
-:mod:`netneurotools.freesurfer` - FreeSurfer compatibility functions
---------------------------------------------------------------------
+.. autosummary::
+ :template: function.rst
+ :toctree: generated/
-.. automodule:: netneurotools.freesurfer
- :no-members:
- :no-inherited-members:
+ permtest_1samp
+ permtest_rel
+ permtest_pearsonr
-.. currentmodule:: netneurotools.freesurfer
+Regressions
.. autosummary::
:template: function.rst
:toctree: generated/
- apply_prob_atlas
- find_parcel_centroids
- parcels_to_vertices
- vertices_to_parcels
- spin_data
- spin_parcels
+ residualize
+ get_dominance_stats
+
-.. _ref_civet:
+.. _ref_spatial:
-:mod:`netneurotools.civet` - CIVET compatibility functions
-----------------------------------------------------------
+:mod:`netneurotools.spatial` - Spatial statistics
+-------------------------------------------------
-.. automodule:: netneurotools.civet
+.. automodule:: netneurotools.spatial
:no-members:
:no-inherited-members:
-.. currentmodule:: netneurotools.civet
+.. currentmodule:: netneurotools.spatial
+
+Calculating spatial statistics
.. autosummary::
:template: function.rst
:toctree: generated/
- read_civet
- civet_to_freesurfer
+ morans_i
+ local_morans_i
-.. _ref_utils:
-:mod:`netneurotools.utils` - Miscellaneous, grab bag utilities
---------------------------------------------------------------
+.. _ref_interface:
-.. automodule:: netneurotools.utils
+:mod:`netneurotools.interface` - Interface for external tools
+-------------------------------------------------------------
+
+.. automodule:: netneurotools.interface
:no-members:
:no-inherited-members:
-.. currentmodule:: netneurotools.utils
+.. currentmodule:: netneurotools.interface
.. autosummary::
:template: function.rst
:toctree: generated/
- run
- add_constant
- get_triu
- get_centroids
-.. _ref_colors:
+.. _ref_experimental:
-:mod:`netneurotools.colors` - Useful colormaps
---------------------------------------------------------------
+:mod:`netneurotools.experimental` - Functions in alpha stage
+------------------------------------------------------------
-.. automodule:: netneurotools.colors
+.. automodule:: netneurotools.experimental
:no-members:
:no-inherited-members:
-.. currentmodule:: netneurotools.colors
+.. currentmodule:: netneurotools.experimental
.. autosummary::
:template: function.rst
:toctree: generated/
- available_cmaps
+
diff --git a/docs/conf.py b/docs/conf.py
index 37ee2e5..a5890a1 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -39,7 +39,8 @@
'sphinx.ext.mathjax',
'sphinx.ext.napoleon',
'sphinx.ext.viewcode',
- 'sphinx_gallery.gen_gallery'
+ 'sphinx_gallery.gen_gallery',
+ 'sphinx_design'
]
# Generate the API documentation when building
@@ -47,6 +48,7 @@
autodoc_default_options = {'members': True, 'inherited-members': True}
numpydoc_show_class_members = False
autoclass_content = "class"
+napoleon_use_param = False
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
diff --git a/docs/installation.rst b/docs/installation.rst
index 79f91de..0e4f129 100644
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -24,15 +24,47 @@ Alternatively, you can install ``netneurotools`` directly from PyPi with:
pip install netneurotools
+
Optional installation for surface plotting
------------------------------------------
-In order to use surface plotting functionality like
-:py:func:`netneurotools.plotting.plot_fsaverage`, you will need a working
-``vtk``/``mayavi``/``pysurfer`` installation. These can generally be installed
-with the following command:
+Pyvista
+~~~~~~~
+
+This is the new plotting library used in the package. This will allow you to use functions like
+
+- :py:func:`netneurotools.plotting.pv_plot_surface`
+
+You will need a working ``pyvista`` installation.
+Generally, we recommend using a clean conda environment, and install Pyvista using the following commands:
+
+.. code-block:: bash
+
+ conda create -n plotting python=3.12
+ conda activate plotting
+ conda install -c conda-forge pyvista
+ # if you are using Jupyter notebooks
+ conda install -c conda-forge jupyterlab trame trame-vtk trame-vuetify trame-jupyter-extension
+
+If you meet any issues, please refer to the
+`detailed installation guide `_.
-.. code-block: bash
+
+Pysurfer (deprecated)
+~~~~~~~~~~~~~~~~~~~~~
+
+This is the old plotting library used in the package. It is now deprecated in favor of Pyvista.
+This will allow you to use functions like
+
+- :py:func:`netneurotools.plotting.plot_fsaverage`
+- :py:func:`netneurotools.plotting.plot_fslr`
+- :py:func:`netneurotools.plotting.plot_conte69`
+- :py:func:`netneurotools.plotting.plot_fsvertex`
+
+You will need a working ``vtk``/``mayavi``/``pysurfer`` installation.
+These can generally be installed with the following command:
+
+.. code-block:: bash
pip install vtk mayavi pysurfer
@@ -94,8 +126,9 @@ installation, there is generally no need to follow these instructions!
- Install from source
``pip install git+https://github.com/netneurolab/netneurotools.git``
-Troubleshooting
-~~~~~~~~~~~~~~~
+
+Here are some common issues and their solutions:
+
- Error related to ``from tvtk.vtk_module import VTK_MAJOR_VERSION``
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 218744e..2a4b94f 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,6 +1,7 @@
-r ../requirements.txt
-sphinx>=2.0, <7.0.0
+sphinx>=2.0
sphinx_rtd_theme
sphinx-gallery
pillow
pytest-doctestplus
+sphinx_design
\ No newline at end of file
diff --git a/environment.yml b/environment.yml
deleted file mode 100644
index 5984c5b..0000000
--- a/environment.yml
+++ /dev/null
@@ -1,15 +0,0 @@
-name: netneurotools
-channels:
- - defaults
- - conda-forge
-dependencies:
- - python>=3.6
- - matplotlib
- - nibabel
- - nilearn
- - "numpy>=1.16"
- - pip
- - scikit-learn
- - "scipy>=1.4.0"
- - pip:
- - git+https://github.com/aestrivex/bctpy.git#egg=bctpy
diff --git a/examples/plot_consensus_clustering.py b/examples/plot_consensus_clustering.py
index 13faab1..69c337f 100644
--- a/examples/plot_consensus_clustering.py
+++ b/examples/plot_consensus_clustering.py
@@ -81,9 +81,9 @@
# We'll provide these different assignments to our consensus-finding algorithm
# which will generate one final community assignment vector:
-from netneurotools import cluster
+from netneurotools import modularity
-consensus = cluster.find_consensus(np.column_stack(ci), seed=1234)
+consensus = modularity.find_consensus(np.column_stack(ci), seed=1234)
plotting.plot_mod_heatmap(corr, consensus, cmap='viridis')
###############################################################################
diff --git a/examples/plot_perm_pvals.py b/examples/plot_perm_pvals.py
index 702d28a..6f87056 100644
--- a/examples/plot_perm_pvals.py
+++ b/examples/plot_perm_pvals.py
@@ -28,8 +28,8 @@
# We can use ``scipy.stats`` for a standard parametric test to assess whether
# the array is different from zero:
-from scipy import stats
-print(stats.ttest_1samp(rvs, 0.0))
+import scipy.stats as sstats
+print(sstats.ttest_1samp(rvs, 0.0))
###############################################################################
# And can do the same thing with permutations using ``netneurotools.stats``:
@@ -88,7 +88,7 @@
# These two arrays shouldn't be meaningfully different, and we can test that
# with a standard parametric test:
-print(stats.ttest_rel(rvs1, rvs2))
+print(sstats.ttest_rel(rvs1, rvs2))
###############################################################################
# Or with a non-parametric permutation test:
@@ -114,13 +114,12 @@
#
# First, we'll generate two correlated variables:
-from netneurotools import datasets
-x, y = datasets.make_correlated_xy(corr=0.2, size=100)
+x, y = nnstats.make_correlated_xy(corr=0.2, size=100)
###############################################################################
# We can generate the Pearson correlation with the standard parametric p-value:
-print(stats.pearsonr(x, y))
+print(sstats.pearsonr(x, y))
###############################################################################
# Or use permutation testing to derive the p-value:
@@ -132,7 +131,7 @@
# :func:`~.permtest_rel` apply here, so you can provide same-sized arrays and
# correlations will only be calculated for paired columns:
-a, b = datasets.make_correlated_xy(corr=0.9, size=100)
+a, b = nnstats.make_correlated_xy(corr=0.9, size=100)
arr1, arr2 = np.column_stack([x, a]), np.column_stack([y, b])
print(nnstats.permtest_pearsonr(arr1, arr2))
diff --git a/examples/plot_mirchi_2018.py b/examples/wip_plot_mirchi_2018.py
similarity index 100%
rename from examples/plot_mirchi_2018.py
rename to examples/wip_plot_mirchi_2018.py
diff --git a/netneurotools/__init__.py b/netneurotools/__init__.py
index 8163cc1..2ca326a 100644
--- a/netneurotools/__init__.py
+++ b/netneurotools/__init__.py
@@ -1,6 +1,7 @@
-__all__ = [
- '__version__',
-]
-
-from . import _version
-__version__ = _version.get_versions()['version']
+
+from . import _version
+__version__ = _version.get_versions()['version']
+
+__all__ = [
+ '__version__'
+]
diff --git a/netneurotools/civet.py b/netneurotools/civet.py
deleted file mode 100644
index 5c247c0..0000000
--- a/netneurotools/civet.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Functions for working with CIVET data (ugh)."""
-
-import nibabel as nib
-import numpy as np
-from scipy.interpolate import griddata
-
-from .datasets import fetch_civet, fetch_fsaverage
-
-_MNI305to152 = np.array([[0.9975, -0.0073, 0.0176, -0.0429],
- [0.0146, 1.0009, -0.0024, 1.5496],
- [-0.0130, -0.0093, 0.9971, 1.1840],
- [0.0000, 0.0000, 0.0000, 1.0000]])
-
-
-def read_civet(fname):
- """
- Read a CIVET-style .obj geometry file.
-
- Parameters
- ----------
- fname : str or os.PathLike
- Filepath to .obj file
-
- Returns
- -------
- vertices : (N, 3)
- triangles : (T, 3)
- """
- k, polygons = 0, []
- with open(fname, 'r') as src:
- n_vert = int(src.readline().split()[6])
- vertices = np.zeros((n_vert, 3))
- for i, line in enumerate(src):
- if i < n_vert:
- vertices[i] = [float(i) for i in line.split()]
- elif i >= (2 * n_vert) + 5:
- if not line.strip():
- k = 1
- elif k == 1:
- polygons.extend([int(i) for i in line.split()])
-
- triangles = np.reshape(np.asarray(polygons), (-1, 3))
-
- return vertices, triangles
-
-
-def civet_to_freesurfer(brainmap, surface='mid', version='v1',
- freesurfer='fsaverage6', method='nearest',
- data_dir=None):
- """
- Project `brainmap` in CIVET space to `freesurfer` fsaverage space.
-
- Uses a nearest-neighbor projection based on the geometry of the vertices
-
- Parameters
- ----------
- brainmap : array_like
- CIVET brainmap to be converted to freesurfer space
- surface : {'white', 'mid'}, optional
- Which CIVET surface to use for geometry of `brainmap`. Default: 'mid'
- version : {'v1', 'v2'}, optional
- Which CIVET version to use for geometry of `brainmap`. Default: 'v1'
- freesurfer : str, optional
- Which version of FreeSurfer space to project data to. Must be one of
- {'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6'}.
- Default: 'fsaverage6'
- method : {'nearest', 'linear'}, optional
- What method of interpolation to use when projecting the data between
- surfaces. Default: 'nearest'
- data_dir : str, optional
- Path to use as data directory. If not specified, will check for
- environmental variable 'NNT_DATA'; if that is not set, will use
- `~/nnt-data` instead. Default: None
-
- Returns
- -------
- data : np.ndarray
- Provided `brainmap` mapped to FreeSurfer
- """
- brainmap = np.asarray(brainmap)
- densities = (81924, 327684)
- n_vert = brainmap.shape[0]
- if n_vert not in densities:
- raise ValueError('Unable to interpret `brainmap` space; provided '
- 'array must have length in {}. Received: {}'
- .format(densities, n_vert))
-
- n_vert = n_vert // 2
- icbm = fetch_civet(density='41k' if n_vert == 40962 else '164k',
- version=version, data_dir=data_dir, verbose=0)[surface]
- fsavg = fetch_fsaverage(version=freesurfer, data_dir=data_dir, verbose=0)
- fsavg = fsavg['pial' if surface == 'mid' else surface]
-
- data = []
- for n, hemi in enumerate(('lh', 'rh')):
- sl = slice(n_vert * n, n_vert * (n + 1))
- vert_cv, _ = read_civet(getattr(icbm, hemi))
- vert_fs = nib.affines.apply_affine(
- _MNI305to152, nib.freesurfer.read_geometry(getattr(fsavg, hemi))[0]
- )
- data.append(griddata(vert_cv, brainmap[sl], vert_fs, method=method))
-
- return np.hstack(data)
diff --git a/netneurotools/data/osf.json b/netneurotools/data/osf.json
deleted file mode 100644
index 289ecbc..0000000
--- a/netneurotools/data/osf.json
+++ /dev/null
@@ -1,405 +0,0 @@
-{
- "atl-cammoun2012": {
- "gcs": {
- "url": [
- "mb37e",
- "5ce6bb4423fec40017e82c5e"
- ],
- "md5": "266c4520af768e766328fb8e6648005d"
- },
- "fsaverage": {
- "url": [
- "mb37e",
- "5ce6c30523fec40017e83439"
- ],
- "md5": "2a19eb4744c0ce6c243f721bd43ecff0"
- },
- "fsaverage5": {
- "url": [
- "mb37e",
- "5e189a1c57341903868036dd"
- ],
- "md5": "2afb22e1887d47f1ca81c340fff7692b"
- },
- "fsaverage6": {
- "url": [
- "mb37e",
- "5e189a1b5734190380804072"
- ],
- "md5": "1df743bff13316f67bd41d13ec691c97"
- },
- "MNI152NLin2009aSym": {
- "url": [
- "mb37e",
- "5e2f4bf0e71ef800301880c2"
- ],
- "md5": "9da30bad22d732aa5f00a6d178d087c4"
- },
- "fslr32k": {
- "url": [
- "mb37e",
- "5e2f4bf1e71ef80027189c56"
- ],
- "md5": "a5177319d5e0b8825a91d503ded1a59e"
- }
- },
- "atl-pauli2018": [
- {
- "url": [
- "jkzwp",
- "5b11fa3364f25a001973dce0"
- ],
- "md5": "62dd6ff405d3a8b89ee188cafa3a7f6a",
- "name": "atl-pauli2018/atl-Pauli2018_space-MNI152NLin2009cAsym_hemi-both_probabilistic.nii.gz"
- },
- {
- "url": [
- "jkzwp",
- "5b11fa2ff1f288000e625a7f"
- ],
- "md5": "5a5b6246921be08456304875447c68ed",
- "name": "atl-pauli2018/atl-Pauli2018_space-MNI152NLin2009cAsym_hemi-both_deterministic.nii.gz"
- },
- {
- "url": [
- "mb37e",
- "5c93b4f034062c001b1ef50d"
- ],
- "md5": "390a693abeb1a583151f30aa8798bab5",
- "name": "atl-pauli2018/atl-Pauli2018_space-MNI152NLin2009cAsym_info.csv"
- }
- ],
- "tpl-conte69": {
- "url": [
- "fvuh8",
- "5b198ec5ec24e20011b48548"
- ],
- "md5": "bd944e3f9f343e0e51e562b440960529"
- },
- "tpl-yerkes19": {
- "url": [
- "mb37e",
- "60ae93d504e91a005f1761ab"
- ],
- "md5": "9ee4f1605fb690a85b04b61549d62925"
- },
- "tpl-fsaverage": {
- "fsaverage": {
- "url": [
- "mb37e",
- "5c82830a1d73810018bdacea"
- ],
- "md5": "1e82c52ed21d06d4e6e7341c725c5262"
- },
- "fsaverage3": {
- "url": [
- "mb37e",
- "5d9f83b6f6b03e000e1ba285"
- ],
- "md5": "b4182495d341364e3f7c5b86284d8d20"
- },
- "fsaverage4": {
- "url": [
- "mb37e",
- "5d9f83b7fcf91f00111c7473"
- ],
- "md5": "5a481421dc1286c7bd9b8a47db5fad0b"
- },
- "fsaverage5": {
- "url": [
- "mb37e",
- "5d9f83b6f6b03e00101c932f"
- ],
- "md5": "cc75f7290c03970a8b8a06dfc215e925"
- },
- "fsaverage6": {
- "url": [
- "mb37e",
- "5d9f83b7a7bc73000cea05f1"
- ],
- "md5": "8f75b95c0e47ae935d10745baefa2c49"
- }
- },
- "tpl-civet": {
- "v1": {
- "civet41k": {
- "url": [
- "mb37e",
- "601daffd84ecf800fe031868"
- ],
- "md5": "b27219c876464992e1b61da1c60d8d6e"
- }
- },
- "v2": {
- "civet41k": {
- "url": [
- "mb37e",
- "601dafe77ad0a80119d9483c"
- ],
- "md5": "a47b015e471c6a800d236f107fda5b4a"
- },
- "civet164k": {
- "url": [
- "mb37e",
- "601dafe87ad0a8011ad94938"
- ],
- "md5": "02537ea65d5366acd8de729022a34bab"
- }
- }
- },
- "ds-connectomes": {
- "celegans": {
- "url": [
- "mb37e",
- "5d9b8e4aa7bc73000be65508"
- ],
- "md5": "f35cd893bc1aff4e8184a528fcda14b9",
- "keys": [
- "conn",
- "dist",
- "labels"
- ]
- },
- "drosophila": {
- "url": [
- "mb37e",
- "5d9b8e4aa7bc73000ce65d00"
- ],
- "md5": "6a67a4fc1b4f35b72c42cca4d0827249",
- "keys": [
- "conn",
- "coords",
- "labels",
- "networks"
- ]
- },
- "human_func_scale033": {
- "url": [
- "mb37e",
- "5d9b8e4afcf91f000f18f57b"
- ],
- "md5": "1988ab427d9bc0de075bbe600ce0a27f",
- "keys": [
- "conn",
- "coords",
- "labels"
- ]
- },
- "human_func_scale060": {
- "url": [
- "mb37e",
- "5d9b8e4aa7bc73000de67117"
- ],
- "md5": "4191f5a2b0c5063dcba9935ea0ef0bfe",
- "keys": [
- "conn",
- "coords",
- "labels"
- ]
- },
- "human_func_scale125": {
- "url": [
- "mb37e",
- "5d9b8e4b26eb50000e78c987"
- ],
- "md5": "533e11cf9fea67d536648c9ef939a5f5",
- "keys": [
- "conn",
- "coords",
- "labels"
- ]
- },
- "human_func_scale250": {
- "url": [
- "mb37e",
- "5d9b8e4efcf91f0012190ba1"
- ],
- "md5": "4abc7324c2a9ae04ef6cf5555149b3f4",
- "keys": [
- "conn",
- "coords",
- "labels"
- ]
- },
- "human_func_scale500": {
- "url": [
- "mb37e",
- "5d9b8e4ff6b03e000d18b5a1"
- ],
- "md5": "637c6057476b2508f15f244d528e156d",
- "keys": [
- "conn",
- "coords",
- "labels"
- ]
- },
- "human_struct_scale033": {
- "url": [
- "mb37e",
- "5d9b8e4f26eb50000e78c993"
- ],
- "md5": "27a2101f2f04e0fc8de09a8248793235",
- "keys": [
- "conn",
- "coords",
- "dist",
- "labels"
- ]
- },
- "human_struct_scale060": {
- "url": [
- "mb37e",
- "5d9b8e4da7bc73000be6550e"
- ],
- "md5": "9289265ab1bd0fa18611eeaf1afce745",
- "keys": [
- "conn",
- "coords",
- "dist",
- "labels"
- ]
- },
- "human_struct_scale125": {
- "url": [
- "mb37e",
- "5d9b8e50f6b03e000e18aa37"
- ],
- "md5": "07e60b141809babe8c2645d93cd24984",
- "keys": [
- "conn",
- "coords",
- "dist",
- "labels"
- ]
- },
- "human_struct_scale250": {
- "url": [
- "mb37e",
- "5d9b8e51fcf91f001118fdc2"
- ],
- "md5": "56f9ca8b4ecc63ef9aaf64a606755c09",
- "keys": [
- "conn",
- "coords",
- "dist",
- "labels"
- ]
- },
- "human_struct_scale500": {
- "url": [
- "mb37e",
- "5d9b8e51a7bc73000ee65769"
- ],
- "md5": "94724e0446f8cb06207a4521ba1df20f",
- "keys": [
- "conn",
- "coords",
- "dist",
- "labels"
- ]
- },
- "macaque_markov": {
- "url": [
- "mb37e",
- "5d9b8e56a7bc73000ce65d11"
- ],
- "md5": "5ce43182afc9c4f779db2c0306afb202",
- "keys": [
- "conn",
- "dist",
- "labels"
- ]
- },
- "macaque_modha": {
- "url": [
- "mb37e",
- "5d9b8e5626eb50000d78abd0"
- ],
- "md5": "f467c62b2670feaf75c93d90d5ed5de6",
- "keys": [
- "conn",
- "coords",
- "dist",
- "labels"
- ]
- },
- "mouse": {
- "url": [
- "mb37e",
- "5d9b8e5626eb50000e78c9a0"
- ],
- "md5": "dba5cbbb9e72c1cacda945086d77a125",
- "keys": [
- "conn",
- "coords",
- "dist",
- "labels",
- "acronyms"
- ]
- },
- "rat": {
- "url": [
- "mb37e",
- "5d9b8e56f6b03e000f18d06f"
- ],
- "md5": "9e1f12ce4fa42082a76d62f89670f5d0",
- "keys": [
- "conn",
- "labels"
- ]
- }
- },
- "ds-vazquez_rodriguez2019": {
- "url": [
- "mb37e",
- "5d9f5aa4f6b03e000e1b819e"
- ],
- "md5": "c710365a2cc5cddb8a2fbb5f6ae421a3"
- },
- "atl-schaefer2018": {
- "fsaverage": {
- "url": [
- "mb37e",
- "5dbc8d7dcfc96c000dc3581c"
- ],
- "md5": "74dfe4237efaccabf057897c49e8af94"
- },
- "fsaverage5": {
- "url": [
- "mb37e",
- "5dbc8d7daf84c3000eebffb2"
- ],
- "md5": "45a8c784f1979eb33a119bdab912a51f"
- },
- "fsaverage6": {
- "url": [
- "mb37e",
- "5dbc8d7bcfc96c000ec6dca2"
- ],
- "md5": "8738daccab4648c3e891a1c8d3a9ec1f"
- },
- "fslr32k": {
- "url": [
- "mb37e",
- "5e3086e4af75930094bdd507"
- ],
- "md5": "d8378f33107ed5d98c27e8070ebb5aa2"
- }
- },
- "atl-mmpall": {
- "fslr32k": {
- "url": [
- "mb37e",
- "6047bac259e910009b83114f"
- ],
- "md5": "fd641742685a239d9c3f60e19a280ca2"
- }
- },
- "atl-voneconomo_koskinas": {
- "url": [
- "mb37e",
- "5ed80005fabc45000d639900"
- ],
- "md5": "67085e2577d21dc3a742f4fcde6e3b18"
- }
-}
diff --git a/netneurotools/datasets/__init__.py b/netneurotools/datasets/__init__.py
index cfe50c1..b14674e 100644
--- a/netneurotools/datasets/__init__.py
+++ b/netneurotools/datasets/__init__.py
@@ -1,16 +1,61 @@
-"""Functions for fetching and generating datasets."""
+"""Functions for handling datasets."""
+
+
+from .fetch_template import (
+ fetch_fsaverage, fetch_fsaverage_curated,
+ fetch_hcp_standards, fetch_fslr_curated,
+ fetch_civet, fetch_civet_curated,
+ fetch_conte69, fetch_yerkes19
+)
+
+
+from .fetch_atlas import (
+ # cortical
+ fetch_cammoun2012, fetch_schaefer2018, fetch_mmpall,
+ # subcortical
+ fetch_pauli2018, fetch_ye2020,
+ # annotation
+ fetch_voneconomo
+)
+
+
+from .fetch_project import (
+ # old projects
+ fetch_vazquez_rodriguez2019, fetch_mirchi2018,
+ # new projects
+ fetch_hansen_manynetworks, fetch_hansen_receptors,
+ fetch_hansen_genescognition, fetch_hansen_brainstemfc,
+ fetch_shafiei_megfmrimapping, fetch_shafiei_megdynamics,
+ fetch_suarez_mami,
+ # example data
+ fetch_famous_gmat,
+ # resources
+ fetch_neurosynth
+)
+
+from .datasets_utils import (
+ FREESURFER_IGNORE, _get_freesurfer_subjid
+)
+
__all__ = [
- 'fetch_cammoun2012', 'fetch_pauli2018', 'fetch_fsaverage', 'fetch_conte69',
- 'fetch_connectome', 'available_connectomes', 'fetch_vazquez_rodriguez2019',
- 'fetch_mirchi2018', 'make_correlated_xy', 'fetch_schaefer2018',
- 'fetch_hcp_standards', 'fetch_voneconomo', 'fetch_mmpall', 'fetch_civet'
+ # fetch_template
+ 'fetch_fsaverage', 'fetch_fsaverage_curated',
+ 'fetch_hcp_standards', 'fetch_fslr_curated',
+ 'fetch_civet', 'fetch_civet_curated',
+ 'fetch_conte69', 'fetch_yerkes19',
+ # fetch_atlas
+ 'fetch_cammoun2012', 'fetch_schaefer2018', 'fetch_mmpall',
+ 'fetch_pauli2018', 'fetch_ye2020',
+ 'fetch_voneconomo',
+ # fetch_project
+ 'fetch_vazquez_rodriguez2019', 'fetch_mirchi2018',
+ 'fetch_hansen_manynetworks', 'fetch_hansen_receptors',
+ 'fetch_hansen_genescognition', 'fetch_hansen_brainstemfc',
+ 'fetch_shafiei_megfmrimapping', 'fetch_shafiei_megdynamics',
+ 'fetch_suarez_mami',
+ 'fetch_famous_gmat',
+ 'fetch_neurosynth',
+ # datasets_utils
+ 'FREESURFER_IGNORE', '_get_freesurfer_subjid'
]
-
-from .fetchers import (fetch_cammoun2012, fetch_pauli2018, fetch_fsaverage,
- fetch_conte69, fetch_yerkes19, fetch_connectome,
- available_connectomes, fetch_vazquez_rodriguez2019,
- fetch_schaefer2018, fetch_hcp_standards,
- fetch_voneconomo, fetch_mmpall, fetch_civet)
-from .generators import (make_correlated_xy)
-from .mirchi import (fetch_mirchi2018)
diff --git a/netneurotools/datasets/mirchi.py b/netneurotools/datasets/_mirchi2018.py
similarity index 70%
rename from netneurotools/datasets/mirchi.py
rename to netneurotools/datasets/_mirchi2018.py
index 190ae63..60e0c09 100644
--- a/netneurotools/datasets/mirchi.py
+++ b/netneurotools/datasets/_mirchi2018.py
@@ -1,13 +1,9 @@
-# -*- coding: utf-8 -*-
"""Code for re-generating results from Mirchi et al., 2018 (SCAN)."""
-import os
from urllib.request import HTTPError, urlopen
import numpy as np
-from .utils import _get_data_dir
-
TIMESERIES = ("https://s3.amazonaws.com/openneuro/ds000031/ds000031_R1.0.2"
"/uncompressed/derivatives/sub-01/ses-{0}/"
@@ -71,7 +67,7 @@
}
-def _get_fc(data_dir=None, resume=True, verbose=1):
+def _get_fc(verbose=1):
"""
Get functional connections from MyConnectome parcelled time series data.
@@ -99,7 +95,7 @@ def _get_fc(data_dir=None, resume=True, verbose=1):
return np.vstack(fc)
-def _get_panas(data_dir=None, resume=True, verbose=1):
+def _get_panas():
"""
Get PANAS subscales from MyConnectome behavioral data.
@@ -132,47 +128,3 @@ def _get_panas(data_dir=None, resume=True, verbose=1):
measures[subscale] = measure.sum(axis=-1)
return measures
-
-
-def fetch_mirchi2018(data_dir=None, resume=True, verbose=1):
- """
- Download (and creates) dataset for replicating Mirchi et al., 2018, SCAN.
-
- Parameters
- ----------
- data_dir : str, optional
- Directory to check for existing data files (if they exist) or to save
- generated data files. Files should be named mirchi2018_fc.npy and
- mirchi2018_panas.csv for the functional connectivity and behavioral
- data, respectively.
-
- Returns
- -------
- X : (73, 198135) numpy.ndarray
- Functional connections from MyConnectome rsfMRI time series data
- Y : (73, 13) numpy.ndarray
- PANAS subscales from MyConnectome behavioral data
- """
- data_dir = os.path.join(_get_data_dir(data_dir=data_dir), 'ds-mirchi2018')
- os.makedirs(data_dir, exist_ok=True)
-
- X_fname = os.path.join(data_dir, 'myconnectome_fc.npy')
- Y_fname = os.path.join(data_dir, 'myconnectome_panas.csv')
-
- if not os.path.exists(X_fname):
- X = _get_fc(data_dir=data_dir, resume=resume, verbose=verbose)
- np.save(X_fname, X, allow_pickle=False)
- else:
- X = np.load(X_fname, allow_pickle=False)
-
- if not os.path.exists(Y_fname):
- Y = _get_panas(data_dir=data_dir, resume=resume, verbose=verbose)
- np.savetxt(Y_fname, np.column_stack(list(Y.values())),
- header=','.join(Y.keys()), delimiter=',', fmt='%i')
- # convert dictionary to structured array before returning
- Y = np.array([tuple(row) for row in np.column_stack(list(Y.values()))],
- dtype=dict(names=list(Y.keys()), formats=['i8'] * len(Y)))
- else:
- Y = np.genfromtxt(Y_fname, delimiter=',', names=True, dtype=int)
-
- return X, Y
diff --git a/netneurotools/datasets/datasets.json b/netneurotools/datasets/datasets.json
new file mode 100644
index 0000000..c6011a1
--- /dev/null
+++ b/netneurotools/datasets/datasets.json
@@ -0,0 +1,470 @@
+{
+ "atl-cammoun2012": {
+ "gcs": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "67326ef9ee12e85a3662f7c4"
+ ],
+ "md5": "a9a0779258c170805e4690394802a707",
+ "uncompress": true,
+ "rel-path": "atl-cammoun2012/gcs"
+ },
+ "fsaverage": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "67326ef5c41abfb7cd0ddf1d"
+ ],
+ "md5": "a67cad69c51749240d4b1b0100f429f5",
+ "uncompress": true,
+ "rel-path": "atl-cammoun2012/fsaverage"
+ },
+ "fsaverage5": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "67326ef5b1dd1a8ca6b2ef68"
+ ],
+ "md5": "2648b4d14461128c0889d9b9ad8ec349",
+ "uncompress": true,
+ "rel-path": "atl-cammoun2012/fsaverage5"
+ },
+ "fsaverage6": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "67326ef766af3db75e62f62f"
+ ],
+ "md5": "a810eed6e19a6ccbf01253312da1f291",
+ "uncompress": true,
+ "rel-path": "atl-cammoun2012/fsaverage6"
+ },
+ "MNI152NLin2009aSym": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "67326efaee12e85a3662f7c6"
+ ],
+ "md5": "7cc281d6916baf78bb53b12806d28454",
+ "uncompress": true,
+ "rel-path": "atl-cammoun2012/MNI152NLin2009aSym"
+ },
+ "fslr32k": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "67326ef7bbbfa7f6feb2f071"
+ ],
+ "md5": "679575fbc83474730fd77fa17a522ed4",
+ "uncompress": true,
+ "rel-path": "atl-cammoun2012/fslr32k"
+ }
+ },
+ "atl-pauli2018": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "6732834503ad88c31bb2f1c7"
+ ],
+ "md5": "214a9c4381919c00af1f0b97ea9f8ed7",
+ "uncompress": true,
+ "rel-path": "atl-pauli2018"
+ },
+ "tpl-conte69": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "67327010ad7d0b0b8c4d6d92"
+ ],
+ "md5": "bd944e3f9f343e0e51e562b440960529",
+ "uncompress": true,
+ "rel-path": "tpl-conte69"
+
+ },
+ "tpl-yerkes19": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "67327067f8f06756c60ddedd"
+ ],
+ "md5": "9ee4f1605fb690a85b04b61549d62925",
+ "uncompress": true,
+ "rel-path": "tpl-yerkes19"
+ },
+ "tpl-fsaverage": {
+ "fsaverage": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "67327113dab4e926760ddf37"
+ ],
+ "md5": "a92a40fc0db1dfd88159cbdf517a25da",
+ "uncompress": true,
+ "rel-path": "tpl-fsaverage/fsaverage"
+ },
+ "fsaverage3": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "673271131ad23303434d6f68"
+ ],
+ "md5": "93c6d815aca12da38233c8b70a6e31e1",
+ "uncompress": true,
+ "rel-path": "tpl-fsaverage/fsaverage3"
+ },
+ "fsaverage4": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "67327115ad7d0b0b8c4d6e2c"
+ ],
+ "md5": "7f7322d97d2e7bd8fb17dd09803f0e07",
+ "uncompress": true,
+ "rel-path": "tpl-fsaverage/fsaverage4"
+ },
+ "fsaverage5": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "67327116b8b930019c62fd6b"
+ ],
+ "md5": "b79f2583917e2a5f79a4768e2b6e27e6",
+ "uncompress": true,
+ "rel-path": "tpl-fsaverage/fsaverage5"
+ },
+ "fsaverage6": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "67327118c1e3bbde16b2f05f"
+ ],
+ "md5": "a4695232aa1a1bad644f9c5dbf52eca5",
+ "uncompress": true,
+ "rel-path": "tpl-fsaverage/fsaverage6"
+ }
+ },
+ "tpl-fsaverage_curated": {
+ "fsaverage": {
+ "url-type": "osf",
+ "url": [
+ "4mw3a",
+ "60b684b2cb2a5e01e968c918"
+ ],
+ "md5": "f4969a33b8cd88e46821ef581e492282",
+ "uncompress": true,
+ "rel-path": "tpl-fsaverage_curated/fsaverage"
+ },
+ "fsaverage6": {
+ "url-type": "osf",
+ "url": [
+ "4mw3a",
+ "60b684aecb2a5e01fc68b7e1"
+ ],
+ "md5": "0cc48e9d5d5bb0216502888c954805fd",
+ "uncompress": true,
+ "rel-path": "tpl-fsaverage_curated/fsaverage6"
+ },
+ "fsaverage5": {
+ "url-type": "osf",
+ "url": [
+ "4mw3a",
+ "60b684ab9096b7021b63cf6b"
+ ],
+ "md5": "c61384c271ee2e6b5449222281137414",
+ "uncompress": true,
+ "rel-path": "tpl-fsaverage_curated/fsaverage5"
+ },
+ "fsaverage4": {
+ "url-type": "osf",
+ "url": [
+ "4mw3a",
+ "60b684ac9096b7021c63d9c5"
+ ],
+ "md5": "019dbf849671f5bed5a42476e5359997",
+ "uncompress": true,
+ "rel-path": "tpl-fsaverage_curated/fsaverage4"
+ }
+ },
+ "tpl-hcp_standards": {
+ "standard_mesh_atlases": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "6732749850aeaa53a10ddf2c"
+ ],
+ "md5": "c13bf257f0a7dea7955a83577cfe6659",
+ "uncompress": true,
+ "rel-path": "tpl-hcp_standards/standard_mesh_atlases"
+ }
+ },
+ "tpl-fslr_curated": {
+ "fslr164k": {
+ "url-type": "osf",
+ "url": [
+ "4mw3a",
+ "60b684b9cb2a5e01fd68c115"
+ ],
+ "md5": "e86a2e9d998e07d621e8f31f0205bf3e",
+ "uncompress": true,
+ "rel-path": "tpl-fslr_curated/fslr164k"
+ },
+ "fslr32k": {
+ "url-type": "osf",
+ "url": [
+ "4mw3a",
+ "60b684b53a6df1021bd4df2d"
+ ],
+ "md5": "7932b4418f63d28935b5adf67150b16f",
+ "uncompress": true,
+ "rel-path": "tpl-fslr_curated/fslr32k"
+ },
+ "fslr8k": {
+ "url-type": "osf",
+ "url": [
+ "4mw3a",
+ "63eadef9b3fed60444e3434f"
+ ],
+ "md5": "a1226aee262475e23d2cc37b84ef261d",
+ "uncompress": true,
+ "rel-path": "tpl-fslr_curated/fslr8k"
+ },
+ "fslr4k": {
+ "url-type": "osf",
+ "url": [
+ "4mw3a",
+ "63eadf00cb544b03fa9e6f00"
+ ],
+ "md5": "72b7b17d389a04774ca6fd4ca28b4087",
+ "uncompress": true,
+ "rel-path": "tpl-fslr_curated/fslr4k"
+ }
+ },
+ "tpl-civet": {
+ "v1": {
+ "civet41k": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "673272bedab4e926760ddf70"
+ ],
+ "md5": "57feb3db87e81508648a73e72ac427d6",
+ "uncompress": true,
+ "rel-path": "tpl-civet/v1/civet41k"
+ }
+ },
+ "v2": {
+ "civet41k": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "673272f252f6f10b568ea5ae"
+ ],
+ "md5": "a91a092c4a92e95f817230863cf05a2e",
+ "uncompress": true,
+ "rel-path": "tpl-civet/v2/civet41k"
+ },
+ "civet164k": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "673272f266af3db75e62fb33"
+ ],
+ "md5": "2c26ea033a905253b734889d08e1efdd",
+ "uncompress": true,
+ "rel-path": "tpl-civet/v2/civet164k"
+ }
+ }
+ },
+ "tpl-civet_curated": {
+ "v2": {
+ "civet41k": {
+ "url-type": "osf",
+ "url": [
+ "4mw3a",
+ "60b684a69096b7021c63d9a0"
+ ],
+ "md5": "b425aa4dd5aa9c3c764b192d2a799123",
+ "uncompress": true,
+ "rel-path": "tpl-civet_curated/v2/civet41k"
+ },
+ "civet164k": {
+ "url-type": "osf",
+ "url": [
+ "4mw3a",
+ "60b684ab3a6df1020dd50706"
+ ],
+ "md5": "90d8e99304e1ec3b85d1092ff9ac5b8f",
+ "uncompress": true,
+ "rel-path": "tpl-civet_curated/v2/civet164k"
+ }
+ }
+ },
+ "ds-famous_gmat": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "67327344a6dba6adff62f48c"
+ ],
+ "md5": "b803de1058579881a759f475704e9f35",
+ "uncompress": true,
+ "rel-path": "ds-famous_gmat"
+ },
+ "ds-vazquez_rodriguez2019": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "6732736066af3db75e62fb5b"
+ ],
+ "md5": "c710365a2cc5cddb8a2fbb5f6ae421a3",
+ "uncompress": true,
+ "rel-path": "ds-vazquez_rodriguez2019"
+ },
+ "atl-schaefer2018": {
+ "fsaverage": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "673273c54b79ea9a5062f4b3"
+ ],
+ "md5": "de53b3faaa076442ae8ddb5ef62d79e8",
+ "uncompress": true,
+ "rel-path": "atl-schaefer2018/fsaverage"
+ },
+ "fsaverage5": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "673273c550aeaa53a10dded5"
+ ],
+ "md5": "838dd438e9d10d0dda055b32d02a63f9",
+ "uncompress": true,
+ "rel-path": "atl-schaefer2018/fsaverage5"
+ },
+ "fsaverage6": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "673273c72e830a5cbd0dde84"
+ ],
+ "md5": "06461e983ba10c64621cae6250c0c8cd",
+ "uncompress": true,
+ "rel-path": "atl-schaefer2018/fsaverage6"
+ },
+ "fslr32k": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "673273c790023be9c44d6b5a"
+ ],
+ "md5": "770401d8fdcec6ca05f797e77230338e",
+ "uncompress": true,
+ "rel-path": "atl-schaefer2018/fslr32k"
+ }
+ },
+ "atl-mmpall": {
+ "fslr32k": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "6732741eb9a26212244d6bbb"
+ ],
+ "md5": "38371de50d8942dcf464d2b9c22edebf",
+ "uncompress": true,
+ "rel-path": "atl-mmpall/fslr32k"
+ }
+ },
+ "atl-voneconomo_koskinas": {
+ "url-type": "osf",
+ "url": [
+ "udpv8",
+ "6732744252f6f10b568ea607"
+ ],
+ "md5": "67085e2577d21dc3a742f4fcde6e3b18",
+ "uncompress": true,
+ "rel-path": "atl-voneconomo_koskinas"
+ },
+ "ds-hansen_manynetworks": {
+ "url-type": "github-release",
+ "url": [
+ "netneurolab",
+ "hansen_many_networks",
+ "v1.0.0"
+ ],
+ "md5": "9e503c759506293aa441054cfd206ccc",
+ "uncompress": true,
+ "rename-folder": "hansen_many_networks-1.0.0",
+ "rel-path": "ds-hansen_manynetworks"
+ },
+ "ds-hansen_receptors": {
+ "url-type": "github-archive",
+ "url": [
+ "netneurolab",
+ "hansen_receptors",
+ "f8b41da92a733f99368c1f39d4959731aa1316c1"
+ ],
+ "md5": "8d7472135ed1ecf18899ba6f680526c8",
+ "uncompress": true,
+ "rename-folder": "hansen_receptors-f8b41da92a733f99368c1f39d4959731aa1316c1",
+ "rel-path": "ds-hansen_receptors"
+ },
+ "ds-hansen_genescognition": {
+ "url-type": "github-archive",
+ "url": [
+ "netneurolab",
+ "hansen_genescognition",
+ "bdffc4b22c08bc69530b6b67baa7875ab1b82f77"
+ ],
+ "md5": "3f4108cb60234944142301df421e59ef",
+ "uncompress": true,
+ "rename-folder": "hansen_genescognition-bdffc4b22c08bc69530b6b67baa7875ab1b82f77",
+ "rel-path": "ds-hansen_genescognition"
+ },
+ "ds-hansen_brainstemfc": {
+ "url-type": "github-archive",
+ "url": [
+ "netneurolab",
+ "hansen_brainstemfc",
+ "522a60f2736f09b5c29cab9b9ffd174e4006e6af"
+ ],
+ "md5": "2b330934b5946d510f94040993b337d2",
+ "uncompress": true,
+ "rename-folder": "hansen_brainstemfc-522a60f2736f09b5c29cab9b9ffd174e4006e6af",
+ "rel-path": "ds-hansen_brainstemfc"
+ },
+ "ds-shafiei_megfmrimapping": {
+ "url-type": "github-archive",
+ "url": [
+ "netneurolab",
+ "shafiei_megfmrimapping",
+ "ba33fe8f3313f582d9422edf93d8f1f13309f8e1"
+ ],
+ "md5": "ddb83942e178e032b5787dba60025120",
+ "uncompress": true,
+ "rename-folder": "shafiei_megfmrimapping-ba33fe8f3313f582d9422edf93d8f1f13309f8e1",
+ "rel-path": "ds-shafiei_megfmrimapping"
+ },
+ "ds-shafiei_megdynamics": {
+ "url-type": "github-archive",
+ "url": [
+ "netneurolab",
+ "shafiei_megdynamics",
+ "9c8a2a25ba3da78c27539c821be3eba4b0ac84e0"
+ ],
+ "md5": "c8a3a3575e0a5e2b9deb11c4f596cf5c",
+ "uncompress": true,
+ "rename-folder": "shafiei_megdynamics-9c8a2a25ba3da78c27539c821be3eba4b0ac84e0",
+ "rel-path": "ds-shafiei_megdynamics"
+ },
+ "ds-suarez_mami": {
+ "url-type": "zenodo-file",
+ "url": [
+ "7143143",
+ "data.zip"
+ ],
+ "md5": "bf52dfd87b20fc565dd2bca626a7f65a",
+ "uncompress": true,
+ "rename-folder": "data",
+ "rel-path": "ds-suarez_mami"
+ }
+}
diff --git a/netneurotools/datasets/datasets_utils.py b/netneurotools/datasets/datasets_utils.py
new file mode 100644
index 0000000..8fd4ee6
--- /dev/null
+++ b/netneurotools/datasets/datasets_utils.py
@@ -0,0 +1,403 @@
+"""Utilites for loading / creating datasets."""
+
+import json
+import os
+import shutil
+from pathlib import Path
+from collections import namedtuple
+import importlib.resources
+
+
+try:
+ # nilearn 0.10.3
+ from nilearn.datasets._utils import fetch_single_file as _fetch_file
+except ImportError:
+ from nilearn.datasets.utils import _fetch_file
+
+
+SURFACE = namedtuple("Surface", ("L", "R"))
+
+FREESURFER_IGNORE = [
+ "unknown",
+ "corpuscallosum",
+ "Background+FreeSurfer_Defined_Medial_Wall",
+]
+
+
+def _get_data_dir(data_dir=None):
+ """
+ Get path to netneurotools data directory.
+
+ Parameters
+ ----------
+ data_dir : str, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+
+ Returns
+ -------
+ data_dir : str
+ Path to use as data directory
+ """
+ if data_dir is None:
+ data_dir = os.environ.get("NNT_DATA", str(Path.home() / "nnt-data"))
+ data_dir = Path(data_dir).expanduser()
+ data_dir.mkdir(parents=True, exist_ok=True)
+ return data_dir
+
+
+def _decode_url(url_type, url):
+ """
+ Format `data` object with OSF API URL.
+
+ Parameters
+ ----------
+ data : object
+ If dict with a `url` key, will format OSF_API with relevant values
+
+ Returns
+ -------
+ data : object
+ Input data with all `url` dict keys formatted
+ """
+ OSF_API = "https://files.osf.io/v1/resources/{}/providers/osfstorage/{}"
+ GITHUB_RELEASE_API = "https://github.com/{}/{}/archive/refs/tags/{}.tar.gz"
+ GITHUB_ARCHIVE_API = "https://github.com/{}/{}/archive/{}.tar.gz"
+ ZENODO_FILE_API = "https://zenodo.org/record/{}/files/{}?download=1"
+
+ if url_type == "osf":
+ out_url = OSF_API.format(*url)
+ elif url_type == "github-release":
+ out_url = GITHUB_RELEASE_API.format(*url)
+ elif url_type == "github-archive":
+ out_url = GITHUB_ARCHIVE_API.format(*url)
+ elif url_type == "zenodo-file":
+ out_url = ZENODO_FILE_API.format(*url)
+ else:
+ raise ValueError("URL type {} not recognized".format(url_type))
+
+ return out_url
+
+
+def fetch_file(dataset_name, keys=None, force=False, data_dir=None, verbose=1):
+ """
+ Fetch file(s) for dataset `dataset_name`.
+
+ Parameters
+ ----------
+ dataset_name : str
+ Name of dataset to fetch
+ keys : str or list, optional
+ If provided, will only fetch the specified key(s) from the dataset.
+ Default: None
+ force : bool, optional
+ If True, will overwrite existing dataset. Default: False
+ data_dir : str, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+ verbose : int, optional
+ Verbosity level. Default: 1
+ """
+ data_dir = _get_data_dir(data_dir=data_dir)
+ info = _get_dataset_info(dataset_name)
+
+ # deal with nested keys
+ if keys is not None:
+ if isinstance(keys, str):
+ keys = [keys]
+ for k in keys:
+ info = info[k]
+
+ if "uncompress" in info and info["uncompress"]:
+ targ_folder = data_dir / info["rel-path"]
+
+ # check if folder exists and remove if force=True
+ if targ_folder.exists():
+ if force:
+ shutil.rmtree(targ_folder)
+ if verbose:
+ print(f"Dataset {dataset_name} already exists. Overwriting.")
+ else:
+ if verbose:
+ print(f"Dataset {dataset_name} already exists. Skipping download.")
+ return targ_folder
+
+ # download compressed file
+ dl_fname = _fetch_file(
+ _decode_url(info["url-type"], info["url"]),
+ data_dir,
+ resume=True,
+ md5sum=info["md5"],
+ verbose=verbose,
+ )
+
+ # extract contents and remove compressed file
+ if info["url-type"] == "zenodo-file":
+ archive_format = "zip"
+ else:
+ archive_format = "gztar"
+
+ shutil.unpack_archive(dl_fname, targ_folder.parent, format=archive_format)
+ os.remove(dl_fname)
+
+ # rename folder if necessary
+ if "rename-folder" in info:
+ shutil.move(targ_folder.parent / info["rename-folder"], targ_folder)
+ if verbose:
+ print(f"Downloaded {dataset_name} to {data_dir}")
+ return targ_folder
+ else:
+ targ_file = data_dir / dataset_name / info["file-name"]
+
+ # check if file exists and remove if force=True
+ if targ_file.exists():
+ if force:
+ os.remove(targ_file)
+ if verbose:
+ print(f"Dataset {dataset_name} already exists. Overwriting.")
+ else:
+ if verbose:
+ print(f"Dataset {dataset_name} already exists. Skipping download.")
+ return targ_file
+ # download file
+ dl_fname = _fetch_file(
+ _decode_url(info["url-type"], info["url"]),
+ data_dir / dataset_name,
+ resume=True,
+ md5sum=info["md5"],
+ verbose=verbose,
+ )
+ # move/rename file
+ shutil.move(dl_fname, targ_file)
+ if verbose:
+ print(f"Downloaded {dataset_name} to {data_dir}")
+ return targ_file
+
+
+def _load_resource_json(relative_path):
+ """
+ Load JSON file from package resources.
+
+ Parameters
+ ----------
+ relative_path : str
+ Path to JSON file relative to package resources
+
+ Returns
+ -------
+ resource_json : dict
+ JSON file loaded as a dictionary
+ """
+ # handling pkg_resources.resource_filename deprecation
+ if getattr(importlib.resources, "files", None) is not None:
+ f_resource = importlib.resources.files("netneurotools") / relative_path
+ else:
+ from pkg_resources import resource_filename
+
+ f_resource = resource_filename("netneurotools", relative_path)
+
+ with open(f_resource) as src:
+ resource_json = json.load(src)
+
+ return resource_json
+
+
+NNT_DATASETS = _load_resource_json("datasets/datasets.json")
+# NNT_DATASETS = _decode_urls(NNT_DATASETS)
+
+
+def _get_dataset_info(name):
+ """
+ Return url and MD5 checksum for dataset `name`.
+
+ Parameters
+ ----------
+ name : str
+ Name of dataset
+
+ Returns
+ -------
+ url : str
+ URL from which to download dataset
+ md5 : str
+ MD5 checksum for file downloade from `url`
+ """
+ try:
+ return NNT_DATASETS[name]
+ except KeyError:
+ raise KeyError(
+ f"Provided dataset {name} is not valid. "
+ f"Must be one of: {sorted(NNT_DATASETS.keys())}"
+ ) from None
+
+
+NNT_REFERENCES = _load_resource_json("datasets/references.json")
+
+
+def _get_reference_info(name, verbose=1, return_dict=False):
+ """
+ Return reference information for dataset `name`.
+
+ Parameters
+ ----------
+ name : str
+ Name of dataset
+
+ Returns
+ -------
+ reference : str
+ Reference information for dataset
+ """
+ try:
+ curr_refs = NNT_REFERENCES[name]
+ if verbose:
+ print("Please cite the following papers if you are using this function:")
+ for bib_category, bib_category_items in curr_refs.items():
+ print(f" [{bib_category}]:")
+ for bib_item in bib_category_items:
+ print(f" {bib_item['citation']}")
+
+ if return_dict:
+ return curr_refs
+ except KeyError:
+ raise KeyError(
+ f"Provided dataset {name} is not valid. "
+ f"Must be one of: {sorted(NNT_REFERENCES.keys())}"
+ ) from None
+
+
+def _fill_reference_json(bib_file, json_file, overwrite=False, use_defaults=False):
+ """
+ Fill in citation information for references in a JSON file.
+
+ For internal use only.
+
+ Parameters
+ ----------
+ bib_file : str
+ Path to BibTeX file containing references
+ json_file : str
+ Path to JSON file containing references
+ overwrite : bool, optional
+ Whether to overwrite existing citation information. Default: False
+ use_defaults : bool, optional
+ Whether to use default paths for `bib_file` and `json_file`. Default: False
+
+ Returns
+ -------
+ None
+ """
+ if use_defaults:
+ bib_file = (
+ importlib.resources.files("netneurotools") / "datasets/netneurotools.bib"
+ )
+ json_file = (
+ importlib.resources.files("netneurotools") / "datasets/references.json"
+ )
+
+ from pybtex import PybtexEngine
+
+ engine = PybtexEngine()
+
+ def _get_citation(key):
+ s = engine.format_from_file(
+ filename=bib_file,
+ style="unsrt",
+ citations=[key],
+ output_backend="plaintext",
+ )
+ return s.strip("\n").replace("[1] ", "")
+
+ with open(json_file) as src:
+ nnt_refs = json.load(src)
+
+ for _, value in nnt_refs.items():
+ for bib_category in value:
+ for bib_item in value[bib_category]:
+ if bib_item["bibkey"] not in ["", None]:
+ if bib_item["citation"] == "" or overwrite:
+ bib_item["citation"] = _get_citation(bib_item["bibkey"])
+
+ with open(json_file, "w") as dst:
+ json.dump(nnt_refs, dst, indent=4)
+
+
+def _check_freesurfer_subjid(subject_id, subjects_dir=None):
+ """
+ Check that `subject_id` exists in provided FreeSurfer `subjects_dir`.
+
+ Parameters
+ ----------
+ subject_id : str
+ FreeSurfer subject ID
+ subjects_dir : str, optional
+ Path to FreeSurfer subject directory. If not set, will inherit from
+ the environmental variable $SUBJECTS_DIR. Default: None
+
+ Returns
+ -------
+ subject_id : str
+ FreeSurfer subject ID, as provided
+ subjects_dir : str
+ Full filepath to `subjects_dir`
+
+ Raises
+ ------
+ FileNotFoundError
+ """
+ # check inputs for subjects_dir and subject_id
+ if subjects_dir is None or not os.path.isdir(subjects_dir):
+ try:
+ subjects_dir = Path(os.environ["SUBJECTS_DIR"])
+ except KeyError:
+ subjects_dir = Path.cwd()
+ else:
+ subjects_dir = Path(subjects_dir).resolve()
+
+ subjdir = subjects_dir / subject_id
+ if not subjdir.is_dir():
+ raise FileNotFoundError(
+ f"Cannot find specified subject id {subject_id} in "
+ f"provided subject directory {subjects_dir}."
+ )
+
+ return subject_id, subjects_dir
+
+
+def _get_freesurfer_subjid(subject_id, subjects_dir=None):
+ """
+ Get fsaverage version `subject_id`, fetching if required.
+
+ Parameters
+ ----------
+ subject_id : str
+ FreeSurfer subject ID
+ subjects_dir : str, optional
+ Path to FreeSurfer subject directory. If not set, will inherit from
+ the environmental variable $SUBJECTS_DIR. Default: None
+
+ Returns
+ -------
+ subject_id : str
+ FreeSurfer subject ID
+ subjects_dir : str
+ Path to subject directory with `subject_id`
+ """
+ # check for FreeSurfer install w/fsaverage; otherwise, fetch required
+ try:
+ subject_id, subjects_dir = _check_freesurfer_subjid(subject_id, subjects_dir)
+ except FileNotFoundError:
+ if "fsaverage" not in subject_id:
+ raise ValueError(
+ f"Provided subject {subject_id} does not exist in provided "
+ f"subjects_dir {subjects_dir}"
+ ) from None
+ from .fetch_template import fetch_fsaverage
+
+ fetch_fsaverage(subject_id)
+ subjects_dir = os.path.join(_get_data_dir(), "tpl-fsaverage")
+ subject_id, subjects_dir = _check_freesurfer_subjid(subject_id, subjects_dir)
+
+ return subject_id, subjects_dir
diff --git a/netneurotools/datasets/fetch_atlas.py b/netneurotools/datasets/fetch_atlas.py
new file mode 100644
index 0000000..921c9a1
--- /dev/null
+++ b/netneurotools/datasets/fetch_atlas.py
@@ -0,0 +1,388 @@
+"""Functions for fetching atlas data."""
+
+import itertools
+
+
+from sklearn.utils import Bunch
+
+from .datasets_utils import SURFACE, _get_reference_info, fetch_file
+
+
+def fetch_cammoun2012(
+ version="MNI152NLin2009aSym", force=False, data_dir=None, verbose=1
+):
+ """
+ Download files for Cammoun et al., 2012 multiscale parcellation.
+
+ This dataset contains
+
+ If you used this data, please cite 1_.
+
+ Parameters
+ ----------
+ version : str, optional
+ Specifies which version of the dataset to download, where
+ 'MNI152NLin2009aSym' will return .nii.gz atlas files defined in MNI152
+ space, 'fsaverageX' will return .annot files defined in fsaverageX
+ space (FreeSurfer 6.0.1), 'fslr32k' will return .label.gii files in
+ fs_LR_32k HCP space, and 'gcs' will return FreeSurfer-style .gcs
+ probabilistic atlas files for generating new, subject-specific
+ parcellations. Default: 'MNI152NLin2009aSym'
+
+ Returns
+ -------
+ filenames : :class:`sklearn.utils.Bunch`
+ Dictionary-like object with keys ['scale033', 'scale060', 'scale125',
+ 'scale250', 'scale500'], where corresponding values are lists of
+ filepaths to downloaded parcellation files.
+
+ Other Parameters
+ ----------------
+ force : bool, optional
+ If True, will overwrite existing dataset. Default: False
+ data_dir : str, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+ verbose : int, optional
+ Modifies verbosity of download, where higher numbers mean more updates.
+ Default: 1
+
+ Notes
+ -----
+ License: https://raw.githubusercontent.com/LTS5/cmp/master/COPYRIGHT
+
+ References
+ ----------
+ .. [1] Leila Cammoun, Xavier Gigandet, Djalel Meskaldji, Jean Philippe
+ Thiran, Olaf Sporns, Kim Q Do, Philippe Maeder, Reto Meuli, and Patric
+ Hagmann. Mapping the human connectome at multiple scales with diffusion
+ spectrum mri. Journal of neuroscience methods, 203(2):386\u2013397,
+ 2012.
+ """
+ versions = [
+ "gcs",
+ "fsaverage",
+ "fsaverage5",
+ "fsaverage6",
+ "fslr32k",
+ "MNI152NLin2009aSym",
+ ]
+ if version not in versions:
+ raise ValueError(
+ f"The version of Cammoun et al., 2012 parcellation "
+ f"requested {version} does not exist. Must be one of {versions}"
+ )
+
+ dataset_name = "atl-cammoun2012"
+ _get_reference_info(dataset_name, verbose=verbose)
+
+ keys = ["scale033", "scale060", "scale125", "scale250", "scale500"]
+
+ fetched = fetch_file(
+ dataset_name, keys=version, force=force, data_dir=data_dir, verbose=verbose
+ )
+
+ if version == "MNI152NLin2009aSym":
+ _fname = "atl-Cammoun2012_space-MNI152NLin2009aSym_res-{}_deterministic.nii.gz"
+ data = {
+ k: fetched
+ / _fname.format(k[-3:])
+ for k in keys
+ }
+ data["info"] = fetched / "atl-Cammoun2012_space-MNI152NLin2009aSym_info.csv"
+ elif version == "fslr32k":
+ _fname = "atl-Cammoun2012_space-fslr32k_res-{}_hemi-{}_deterministic.label.gii"
+ data = {
+ k: SURFACE(
+ fetched / _fname.format(k[-3:], "L"),
+ fetched / _fname.format(k[-3:], "R")
+ )
+ for k in keys
+ }
+ elif version in ("fsaverage", "fsaverage5", "fsaverage6"):
+ _fname = "atl-Cammoun2012_space-{}_res-{}_hemi-{}_deterministic.annot"
+ data = {
+ k: SURFACE(
+ fetched / _fname.format(version, k[-3:], "L"),
+ fetched / _fname.format(version, k[-3:], "R")
+ )
+ for k in keys
+ }
+ else:
+ data = {
+ k: [
+ fetched / f"atl-Cammoun2012_res-{k[5:]}_hemi-L_probabilistic.gcs",
+ fetched / f"atl-Cammoun2012_res-{k[5:]}_hemi-R_probabilistic.gcs",
+ ]
+ for k in keys[:-1]
+ }
+ data[keys[-1]] = list(
+ itertools.chain.from_iterable(
+ [
+ [
+ fetched
+ / f"atl-Cammoun2012_res-{k[5:]}_hemi-L_probabilistic.gcs",
+ fetched
+ / f"atl-Cammoun2012_res-{k[5:]}_hemi-R_probabilistic.gcs",
+ ]
+ for k in ["scale500v1", "scale500v2", "scale500v3"]
+ ]
+ )
+ )
+
+ return Bunch(**data)
+
+
+def fetch_schaefer2018(version="fsaverage", force=False, data_dir=None, verbose=1):
+ """
+ Download FreeSurfer .annot files for Schaefer et al., 2018 parcellation.
+
+ This dataset contains
+
+ If you used this data, please cite 1_.
+
+ Parameters
+ ----------
+ version : {'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k'}
+ Specifies which surface annotation files should be matched to. Default:
+ 'fsaverage'
+
+ Returns
+ -------
+ filenames : :class:`sklearn.utils.Bunch`
+ Dictionary-like object with keys of format '{}Parcels{}Networks' where
+ corresponding values are the left/right hemisphere annotation files
+
+ Other Parameters
+ ----------------
+ force : bool, optional
+ If True, will overwrite existing dataset. Default: False
+ data_dir : str, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+ verbose : int, optional
+ Modifies verbosity of download, where higher numbers mean more updates.
+ Default: 1
+
+ Notes
+ -----
+ License: https://github.com/ThomasYeoLab/CBIG/blob/master/LICENSE.md
+
+ References
+ ----------
+ .. [1] Alexander Schaefer, Ru Kong, Evan M Gordon, Timothy O Laumann,
+ Xi-Nian Zuo, Avram J Holmes, Simon B Eickhoff, and BT Thomas Yeo.
+ Local-global parcellation of the human cerebral cortex from intrinsic
+ functional connectivity mri. Cerebral cortex, 28(9):3095\u20133114,
+ 2018.
+ """
+ versions = ["fsaverage", "fsaverage5", "fsaverage6", "fslr32k"]
+ if version not in versions:
+ raise ValueError(
+ f"The version of Schaefer et al., 2018 parcellation "
+ f'requested "{version}" does not exist. Must be one of {versions}'
+ )
+
+ dataset_name = "atl-schaefer2018"
+ _get_reference_info(dataset_name, verbose=verbose)
+
+ keys = [f"{p}Parcels{n}Networks" for p in range(100, 1001, 100) for n in [7, 17]]
+
+ fetched = fetch_file(
+ dataset_name, keys=version, force=force, data_dir=data_dir, verbose=verbose
+ )
+
+ if version == "fslr32k":
+ _fname = "atl-Schaefer2018_space-{}_hemi-{}_desc-{}_deterministic.dlabel.nii"
+ data = {
+ k: fetched / _fname.format(version, "LR", k)
+ for k in keys
+ }
+ else:
+ _fname = "atl-Schaefer2018_space-{}_hemi-{}_desc-{}_deterministic.annot"
+ data = {
+ k: SURFACE(
+ fetched / _fname.format(version, "L", k),
+ fetched / _fname.format(version, "R", k)
+ )
+ for k in keys
+ }
+
+ return Bunch(**data)
+
+
+def fetch_mmpall(version="fslr32k", force=False, data_dir=None, verbose=1):
+ """
+ Download .label.gii files for Glasser et al., 2016 MMPAll atlas.
+
+ This dataset contains
+
+ If you used this data, please cite 1_.
+
+ Parameters
+ ----------
+ version : {'fslr32k'}
+ Specifies which surface annotation files should be matched to. Default:
+ 'fslr32k'
+
+ Returns
+ -------
+ filenames : :class:`sklearn.utils.Bunch`
+ Namedtuple with fields ('lh', 'rh') corresponding to filepaths to
+ left/right hemisphere parcellation files
+
+ Other Parameters
+ ----------------
+ force : bool, optional
+ If True, will overwrite existing dataset. Default: False
+ data_dir : str, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+ verbose : int, optional
+ Modifies verbosity of download, where higher numbers mean more updates.
+ Default: 1
+
+ Notes
+ -----
+ License: https://www.humanconnectome.org/study/hcp-young-adult/document/wu-minn-hcp-consortium-open-access-data-use-terms
+
+ References
+ ----------
+ .. [1] Matthew F Glasser, Timothy S Coalson, Emma C Robinson, Carl D Hacker,
+ John Harwell, Essa Yacoub, Kamil Ugurbil, Jesper Andersson, Christian F
+ Beckmann, Mark Jenkinson, and others. A multi-modal parcellation of
+ human cerebral cortex. Nature, 536(7615):171\u2013178, 2016.
+ """
+ versions = ["fslr32k"]
+ if version not in versions:
+ raise ValueError(
+ f"The version of Glasser et al., 2016 parcellation "
+ f'requested "{version}" does not exist. Must be one of {versions}'
+ )
+
+ dataset_name = "atl-mmpall"
+ _get_reference_info(dataset_name, verbose=verbose)
+
+ fetched = fetch_file(
+ dataset_name, keys=version, force=force, data_dir=data_dir, verbose=verbose
+ )
+
+ return SURFACE(
+ fetched / f"atl-MMPAll_space-{version}_hemi-L_deterministic.label.gii",
+ fetched / f"atl-MMPAll_space-{version}_hemi-R_deterministic.label.gii",
+ )
+
+
+def fetch_pauli2018(force=False, data_dir=None, verbose=1):
+ """
+ Download files for Pauli et al., 2018 subcortical parcellation.
+
+ This dataset contains
+
+ If you used this data, please cite 1_.
+
+ Returns
+ -------
+ filenames : :class:`sklearn.utils.Bunch`
+ Dictionary-like object with keys ['probabilistic', 'deterministic'],
+ where corresponding values are filepaths to downloaded atlas files.
+
+ Other Parameters
+ ----------------
+ force : bool, optional
+ If True, will overwrite existing dataset. Default: False
+ data_dir : str, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+ verbose : int, optional
+ Modifies verbosity of download, where higher numbers mean more updates.
+ Default: 1
+
+ Notes
+ -----
+ License: CC-BY Attribution 4.0 International
+
+ References
+ ----------
+ .. [1] Wolfgang M Pauli, Amanda N Nili, and J Michael Tyszka. A
+ high-resolution probabilistic in vivo atlas of human subcortical brain
+ nuclei. Scientific data, 5(1):1\u201313, 2018.
+ """
+ dataset_name = "atl-pauli2018"
+ _get_reference_info(dataset_name, verbose=verbose)
+
+ fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose)
+
+ data = {
+ "probabilistic": fetched
+ / "atl-pauli2018_space-MNI152NLin2009cAsym_hemi-both_probabilistic.nii.gz",
+ "deterministic": fetched
+ / "atl-pauli2018_space-MNI152NLin2009cAsym_hemi-both_deterministic.nii.gz",
+ "info": fetched / "atl-pauli2018_space-MNI152NLin2009cAsym_info.csv",
+ }
+
+ return Bunch(**data)
+
+
+def fetch_ye2020():
+ """Fetch Ye et al., 2020 subcortical parcellation."""
+ pass
+
+
+def fetch_voneconomo(force=False, data_dir=None, verbose=1):
+ """
+ Fetch von-Economo Koskinas probabilistic FreeSurfer atlas.
+
+ This dataset contains
+
+ If you used this data, please cite 1_.
+
+ Returns
+ -------
+ filenames : :class:`sklearn.utils.Bunch`
+ Dictionary-like object with keys ['gcs', 'ctab', 'info']
+
+ Other Parameters
+ ----------------
+ force : bool, optional
+ If True, will overwrite existing dataset. Default: False
+ data_dir : str, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+ verbose : int, optional
+ Modifies verbosity of download, where higher numbers mean more updates.
+ Default: 1
+
+ Notes
+ -----
+ License: CC-BY-NC-SA 4.0
+
+ References
+ ----------
+ .. [1] Lianne H Scholtens, Marcel A de Reus, Siemon C de Lange, Ruben
+ Schmidt, and Martijn P van den Heuvel. An mri von economo\u2013koskinas
+ atlas. NeuroImage, 170:249\u2013256, 2018.
+ """
+ dataset_name = "atl-voneconomo_koskinas"
+ _get_reference_info(dataset_name, verbose=verbose)
+
+ fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose)
+
+ data = {
+ "gcs": SURFACE(
+ fetched / "atl-vonEconomoKoskinas_hemi-L_probabilistic.gcs",
+ fetched / "atl-vonEconomoKoskinas_hemi-R_probabilistic.gcs",
+ ),
+ "ctab": SURFACE(
+ fetched / "atl-vonEconomoKoskinas_hemi-L_probabilistic.ctab",
+ fetched / "atl-vonEconomoKoskinas_hemi-R_probabilistic.ctab",
+ ),
+ "info": fetched / "atl-vonEconomoKoskinas_info.csv",
+ }
+
+ return Bunch(**data)
diff --git a/netneurotools/datasets/fetch_project.py b/netneurotools/datasets/fetch_project.py
new file mode 100644
index 0000000..4462307
--- /dev/null
+++ b/netneurotools/datasets/fetch_project.py
@@ -0,0 +1,513 @@
+"""Functions for fetching project data."""
+
+import os
+import numpy as np
+
+from sklearn.utils import Bunch
+
+from .datasets_utils import _get_data_dir, _get_reference_info, fetch_file
+
+from ._mirchi2018 import _get_fc, _get_panas
+
+
+def fetch_vazquez_rodriguez2019(force=False, data_dir=None, verbose=1):
+ """
+ Download files from Vazquez-Rodriguez et al., 2019, PNAS.
+
+ This dataset contains one file: rsquared_gradient.csv, which contains
+ two columns: rsquared and gradient.
+
+ If you used this data, please cite [1]_.
+
+ Returns
+ -------
+ data : :class:`sklearn.utils.Bunch`
+ Dictionary-like object with fetched data.
+
+ Other Parameters
+ ----------------
+ force : bool, optional
+ If True, will overwrite existing dataset. Default: False
+ data_dir : str, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+ verbose : int, optional
+ Modifies verbosity of download, where higher numbers mean more updates.
+ Default: 1
+
+ References
+ ----------
+ .. [1] Bertha V\u00e1zquez-Rodr\u00edguez, Laura E Su\u00e1rez, Ross D
+ Markello, Golia Shafiei, Casey Paquola, Patric Hagmann, Martijn P Van
+ Den Heuvel, Boris C Bernhardt, R Nathan Spreng, and Bratislav Misic.
+ Gradients of structure\u2013function tethering across neocortex.
+ Proceedings of the National Academy of Sciences,
+ 116(42):21219\u201321227, 2019.
+ """
+ dataset_name = "ds-vazquez_rodriguez2019"
+ _get_reference_info(dataset_name, verbose=verbose)
+
+ fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose)
+
+ # load data
+ rsq, grad = np.loadtxt(
+ fetched / "rsquared_gradient.csv", delimiter=",", skiprows=1
+ ).T
+ data = {"rsquared": rsq, "gradient": grad}
+
+ return Bunch(**data)
+
+
+def fetch_mirchi2018(force=False, data_dir=None, verbose=1):
+ """
+ Download (and creates) dataset for replicating Mirchi et al., 2018, SCAN.
+
+ Parameters
+ ----------
+ data_dir : str, optional
+ Directory to check for existing data files (if they exist) or to save
+ generated data files. Files should be named mirchi2018_fc.npy and
+ mirchi2018_panas.csv for the functional connectivity and behavioral
+ data, respectively.
+
+ Returns
+ -------
+ X : (73, 198135) numpy.ndarray
+ Functional connections from MyConnectome rsfMRI time series data
+ Y : (73, 13) numpy.ndarray
+ PANAS subscales from MyConnectome behavioral data
+ """
+ data_dir = _get_data_dir(data_dir=data_dir) / "ds-mirchi2018"
+ data_dir.mkdir(exist_ok=True, parents=True)
+
+ X_fname = data_dir / "myconnectome_fc.npy"
+ Y_fname = data_dir / "myconnectome_panas.csv"
+
+ if not os.path.exists(X_fname):
+ X = _get_fc(verbose=verbose)
+ np.save(X_fname, X, allow_pickle=False)
+ else:
+ X = np.load(X_fname, allow_pickle=False)
+
+ if not os.path.exists(Y_fname):
+ Y = _get_panas()
+ np.savetxt(
+ Y_fname,
+ np.column_stack(list(Y.values())),
+ header=",".join(Y.keys()),
+ delimiter=",",
+ fmt="%i",
+ )
+ # convert dictionary to structured array before returning
+ Y = np.array(
+ [tuple(row) for row in np.column_stack(list(Y.values()))],
+ dtype=dict(names=list(Y.keys()), formats=["i8"] * len(Y)),
+ )
+ else:
+ Y = np.genfromtxt(Y_fname, delimiter=",", names=True, dtype=int)
+
+ return X, Y
+
+
+def fetch_hansen_manynetworks(force=False, data_dir=None, verbose=1):
+ """
+ Download files from Hansen et al., 2023, PLOS Biology.
+
+ This dataset contains
+
+ If you used this data, please cite [1]_.
+
+ Returns
+ -------
+ filenames : :class:`sklearn.utils.Bunch`
+ Dictionary-like object with fetched data.
+
+ Other Parameters
+ ----------------
+ force : bool, optional
+ If True, will overwrite existing dataset. Default: False
+ data_dir : str, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+ verbose : int, optional
+ Modifies verbosity of download, where higher numbers mean more updates.
+ Default: 1
+
+ References
+ ----------
+ .. [1]
+ """
+ dataset_name = "ds-hansen_manynetworks"
+ _get_reference_info(dataset_name, verbose=verbose)
+
+ fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose)
+
+ # _parc = {
+ # "cammoun033": "Cammoun033",
+ # "schaefer100": "Schaefer100",
+ # "schaefer400": "Schaefer400",
+ # }
+
+ # data = {
+ # parc: {
+ # "bigbrain": fetched
+ # / f"data/{_parc[parc]}/bigbrain_intensities.csv",
+ # "cognitive": fetched
+ # / f"data/{_parc[parc]}/cognitive_similarity.npy",
+ # "sc": fetched / f"data/{_parc[parc]}/consensusSC.npy",
+ # "sc_wei": fetched / f"data/{_parc[parc]}/consensusSC_wei.npy",
+ # "ephys": fetched
+ # / f"data/{_parc[parc]}/electrophysiological_connectivity.npy",
+ # "gene": fetched / f"data/{_parc[parc]}/gene_coexpression.npy",
+ # "megfc": fetched
+ # / f"data/{_parc[parc]}/groupFCmeg_aec_orth_{_parc[parc]}.npy.npz",
+ # "haemodynamic": fetched
+ # / f"data/{_parc[parc]}/haemodynamic_connectivity.npy",
+ # "laminar": fetched / f"data/{_parc[parc]}/laminar_similarity.npy",
+ # "metabolic": fetched
+ # / f"data/{_parc[parc]}/metabolic_connectivity.npy",
+ # "receptor": fetched
+ # / f"data/{_parc[parc]}/receptor_similarity.npy",
+ # "temporal": fetched
+ # / f"data/{_parc[parc]}/temporal_similarity.npy",
+ # "voneconomo": fetched
+ # / f"data/{_parc[parc]}/voneconomo_{_parc[parc]}.csv",
+ # }
+ # for parc in ["cammoun033", "schaefer100", "schaefer400"]
+ # }
+
+ return fetched
+
+
+def fetch_hansen_receptors(force=False, data_dir=None, verbose=1):
+ """
+ Download files from Hansen et al., 2022, Nature Neuroscience.
+
+ This dataset contains
+
+ If you used this data, please cite [1]_.
+
+ Returns
+ -------
+ filenames : :class:`sklearn.utils.Bunch`
+ Dictionary-like object with fetched data.
+
+ Other Parameters
+ ----------------
+ force : bool, optional
+ If True, will overwrite existing dataset. Default: False
+ data_dir : str, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+ verbose : int, optional
+ Modifies verbosity of download, where higher numbers mean more updates.
+ Default: 1
+
+ References
+ ----------
+ .. [1]
+ """
+ dataset_name = "ds-hansen_receptors"
+ _get_reference_info(dataset_name, verbose=verbose)
+
+ fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose)
+
+ return fetched
+
+
+def fetch_hansen_genescognition(force=False, data_dir=None, verbose=1):
+ """Download files from Hansen et al., 2021, Nature Human Behaviour.
+
+ This dataset contains
+
+ If you used this data, please cite [1]_.
+
+ Returns
+ -------
+ filenames : :class:`sklearn.utils.Bunch`
+ Dictionary-like object with fetched data.
+
+ Other Parameters
+ ----------------
+ force : bool, optional
+ If True, will overwrite existing dataset. Default: False
+ data_dir : str, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+ verbose : int, optional
+ Modifies verbosity of download, where higher numbers mean more updates.
+ Default: 1
+
+ References
+ ----------
+ .. [1]
+ """
+ dataset_name = "ds-hansen_genescognition"
+ _get_reference_info(dataset_name, verbose=verbose)
+
+ fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose)
+
+ return fetched
+
+
+def fetch_hansen_brainstemfc(force=False, data_dir=None, verbose=1):
+ """Download files from Hansen et al., 2024.
+
+ This dataset contains
+
+ If you used this data, please cite [1]_.
+
+ Returns
+ -------
+ filenames : :class:`sklearn.utils.Bunch`
+ Dictionary-like object with fetched data.
+
+ Other Parameters
+ ----------------
+ force : bool, optional
+ If True, will overwrite existing dataset. Default: False
+ data_dir : str, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+ verbose : int, optional
+ Modifies verbosity of download, where higher numbers mean more updates.
+ Default: 1
+
+ References
+ ----------
+ .. [1]
+ """
+ dataset_name = "ds-hansen_brainstemfc"
+ _get_reference_info(dataset_name, verbose=verbose)
+
+ fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose)
+
+ return fetched
+
+
+def fetch_shafiei_megfmrimapping(force=False, data_dir=None, verbose=1):
+ """Download files from Shafiei et al., 2022.
+
+ This dataset contains
+
+ If you used this data, please cite [1]_.
+
+ Returns
+ -------
+ filenames : :class:`sklearn.utils.Bunch`
+ Dictionary-like object with fetched data.
+
+ Other Parameters
+ ----------------
+ force : bool, optional
+ If True, will overwrite existing dataset. Default: False
+ data_dir : str, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+ verbose : int, optional
+ Modifies verbosity of download, where higher numbers mean more updates.
+ Default: 1
+
+ References
+ ----------
+ .. [1]
+ """
+ dataset_name = "ds-shafiei_megfmrimapping"
+ _get_reference_info(dataset_name, verbose=verbose)
+
+ fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose)
+
+ return fetched
+
+
+def fetch_shafiei_megdynamics(force=False, data_dir=None, verbose=1):
+ """Download files from Shafiei et al., 2023.
+
+ This dataset contains
+
+ If you used this data, please cite [1]_.
+
+ Returns
+ -------
+ filenames : :class:`sklearn.utils.Bunch`
+ Dictionary-like object with fetched data.
+
+ Other Parameters
+ ----------------
+ force : bool, optional
+ If True, will overwrite existing dataset. Default: False
+ data_dir : str, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+ verbose : int, optional
+ Modifies verbosity of download, where higher numbers mean more updates.
+ Default: 1
+
+ References
+ ----------
+ .. [1]
+ """
+ dataset_name = "ds-shafiei_megdynamics"
+ _get_reference_info(dataset_name, verbose=verbose)
+
+ fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose)
+
+ return fetched
+
+
+def fetch_suarez_mami(force=False, data_dir=None, verbose=1):
+ """Download files from Suarez et al., 2022, eLife.
+
+ This dataset contains
+
+ If you used this data, please cite [1]_.
+
+ Returns
+ -------
+ filenames : :class:`sklearn.utils.Bunch`
+ Dictionary-like object with fetched data.
+
+ Other Parameters
+ ----------------
+ force : bool, optional
+ If True, will overwrite existing dataset. Default: False
+ data_dir : str, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+ verbose : int, optional
+ Modifies verbosity of download, where higher numbers mean more updates.
+ Default: 1
+
+ References
+ ----------
+ .. [1]
+ """
+ dataset_name = "ds-suarez_mami"
+ _get_reference_info(dataset_name, verbose=verbose)
+
+ fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose)
+
+ return fetched
+
+
+def fetch_famous_gmat(dataset, force=False, data_dir=None, verbose=1):
+ """
+ Download files from multi-species connectomes.
+
+ This dataset contains
+
+ If you used this data, please cite celegans [1]_, drosophila [2]_, human
+ [3]_, macaque_markov [4]_, macaque_modha [5]_, mouse [6]_, rat [7]_.
+
+ Parameters
+ ----------
+ dataset : str
+ Specifies which dataset to download.
+
+ Returns
+ -------
+ data : :class:`sklearn.utils.Bunch`
+ Dictionary-like object with, at a minimum, keys ['conn', 'labels',
+ 'ref'] providing connectivity / correlation matrix, region labels, and
+ relevant reference. Other possible keys include 'dist' (an array of
+ Euclidean distances between regions of 'conn'), 'coords' (an array of
+ xyz coordinates for regions of 'conn'), 'acronyms' (an array of
+ acronyms for regions of 'conn'), and 'networks' (an array of network
+ affiliations for regions of 'conn').
+
+ Other Parameters
+ ----------------
+ force : bool, optional
+ If True, will overwrite existing dataset. Default: False
+ data_dir : str, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+ verbose : int, optional
+ Modifies verbosity of download, where higher numbers mean more updates.
+ Default: 1
+
+ References
+ ----------
+ .. [1] Lav R Varshney, Beth L Chen, Eric Paniagua, David H Hall, and Dmitri
+ B Chklovskii. Structural properties of the caenorhabditis elegans
+ neuronal network. PLoS computational biology, 7(2):e1001066, 2011.
+ .. [2] Ann-Shyn Chiang, Chih-Yung Lin, Chao-Chun Chuang, Hsiu-Ming Chang,
+ Chang-Huain Hsieh, Chang-Wei Yeh, Chi-Tin Shih, Jian-Jheng Wu, Guo-Tzau
+ Wang, Yung-Chang Chen, and others. Three-dimensional reconstruction of
+ brain-wide wiring networks in drosophila at single-cell resolution.
+ Current biology, 21(1):1\u201311, 2011.
+ .. [3] Alessandra Griffa, Yasser Alem\u00e1n-G\u00f3mez, and Patric Hagmann.
+ Structural and functional connectome from 70 young healthy adults [data
+ set]. Zenodo, 2019.
+ .. [4] Nikola T Markov, Maria Ercsey-Ravasz, Camille Lamy, Ana Rita Ribeiro
+ Gomes, Lo\u00efc Magrou, Pierre Misery, Pascale Giroud, Pascal Barone,
+ Colette Dehay, Zolt\u00e1n Toroczkai, and others. The role of long-range
+ connections on the specificity of the macaque interareal cortical
+ network. Proceedings of the National Academy of Sciences,
+ 110(13):5187\u20135192, 2013.
+ .. [5] Dharmendra S Modha and Raghavendra Singh. Network architecture of the
+ long-distance pathways in the macaque brain. Proceedings of the National
+ Academy of Sciences, 107(30):13485\u201313490, 2010.
+ .. [6] Mikail Rubinov, Rolf JF Ypma, Charles Watson, and Edward T Bullmore.
+ Wiring cost and topological participation of the mouse brain connectome.
+ Proceedings of the National Academy of Sciences,
+ 112(32):10032\u201310037, 2015.
+ .. [7] Mihail Bota, Olaf Sporns, and Larry W Swanson. Architecture of the
+ cerebral cortical association connectome underlying cognition.
+ Proceedings of the National Academy of Sciences,
+ 112(16):E2093\u2013E2101, 2015.
+ """
+ available_connectomes = [
+ "celegans",
+ "drosophila",
+ "human_func_scale033",
+ "human_func_scale060",
+ "human_func_scale125",
+ "human_func_scale250",
+ "human_func_scale500",
+ "human_struct_scale033",
+ "human_struct_scale060",
+ "human_struct_scale125",
+ "human_struct_scale250",
+ "human_struct_scale500",
+ "macaque_markov",
+ "macaque_modha",
+ "mouse",
+ "rat",
+ ]
+
+ if dataset not in available_connectomes:
+ raise ValueError(
+ f"Provided dataset {dataset} not available; "
+ f"must be one of {available_connectomes}"
+ )
+
+ base_dataset_name = "ds-famous_gmat"
+ _get_reference_info(base_dataset_name, verbose=verbose)
+
+ fetched = fetch_file(
+ base_dataset_name, force=force, data_dir=data_dir, verbose=verbose
+ )
+
+ data = {}
+ for f in (fetched / dataset).glob("*.csv"):
+ try:
+ data[f.stem] = np.loadtxt(f, delimiter=",")
+ except ValueError:
+ data[f.stem] = np.loadtxt(f, delimiter=",", dtype=str)
+
+ return Bunch(**data)
+
+
+def fetch_neurosynth():
+ """Download Neurosynth data."""
+ pass
diff --git a/netneurotools/datasets/fetch_template.py b/netneurotools/datasets/fetch_template.py
new file mode 100644
index 0000000..9640ccc
--- /dev/null
+++ b/netneurotools/datasets/fetch_template.py
@@ -0,0 +1,697 @@
+"""Functions for fetching template data."""
+
+import json
+
+from sklearn.utils import Bunch
+
+from .datasets_utils import (
+ SURFACE,
+ _get_reference_info,
+ _check_freesurfer_subjid,
+ fetch_file,
+)
+
+
+def fetch_fsaverage(
+ version="fsaverage", use_local=False, force=False, data_dir=None, verbose=1
+):
+ """
+ Download files for fsaverage FreeSurfer template.
+
+ This dataset contains
+
+ If you used this data, please cite 1_, 2_, 3_.
+
+ Parameters
+ ----------
+ version : str, optional
+ One of {'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5',
+ 'fsaverage6'}. Default: 'fsaverage'
+ use_local : bool, optional
+ If True, will attempt to use local FreeSurfer data. Default: False
+
+ Returns
+ -------
+ filenames : :class:`sklearn.utils.Bunch`
+ Dictionary-like object with keys ['surf'] where corresponding values
+ are length-2 lists downloaded template files (each list composed of
+ files for the left and right hemisphere).
+
+ Other Parameters
+ ----------------
+ force : bool, optional
+ If True, will overwrite existing dataset. Default: False
+ data_dir : str, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+ verbose : int, optional
+ Modifies verbosity of download, where higher numbers mean more updates.
+ Default: 1
+
+ References
+ ----------
+ .. [1] Anders M Dale, Bruce Fischl, and Martin I Sereno. Cortical
+ surface-based analysis: i. segmentation and surface reconstruction.
+ Neuroimage, 9(2):179\u2013194, 1999.
+ .. [2] Bruce Fischl, Martin I Sereno, and Anders M Dale. Cortical
+ surface-based analysis: ii: inflation, flattening, and a surface-based
+ coordinate system. Neuroimage, 9(2):195\u2013207, 1999.
+ .. [3] Bruce Fischl, Martin I Sereno, Roger BH Tootell, and Anders M Dale.
+ High-resolution intersubject averaging and a coordinate system for the
+ cortical surface. Human brain mapping, 8(4):272\u2013284, 1999.
+ """
+ versions = ["fsaverage", "fsaverage3", "fsaverage4", "fsaverage5", "fsaverage6"]
+ if version not in versions:
+ raise ValueError(
+ f"The version of fsaverage requested {version} does not "
+ f"exist. Must be one of {versions}"
+ )
+
+ dataset_name = "tpl-fsaverage"
+ _get_reference_info(dataset_name, verbose=verbose)
+
+ keys = ["orig", "white", "smoothwm", "pial", "inflated", "sphere"]
+
+ if use_local:
+ try:
+ data_dir = _check_freesurfer_subjid(version)[1]
+ data = {
+ k: SURFACE(
+ data_dir / f"{version}/surf/lh.{k}",
+ data_dir / f"{version}/surf/rh.{k}",
+ )
+ for k in keys
+ }
+ except FileNotFoundError:
+ raise FileNotFoundError(
+ f"Local FreeSurfer data for {version} not found. "
+ "Please ensure FreeSurfer is installed and properly set up."
+ ) from None
+ else:
+ fetched = fetch_file(
+ dataset_name, keys=version, force=force, data_dir=data_dir, verbose=verbose
+ )
+
+ data = {
+ k: SURFACE(
+ fetched / f"surf/lh.{k}",
+ fetched / f"surf/rh.{k}",
+ )
+ for k in keys
+ }
+
+ return Bunch(**data)
+
+
+def fetch_fsaverage_curated(version="fsaverage", force=False, data_dir=None, verbose=1):
+ """
+ Download files for fsaverage FreeSurfer template.
+
+ Curated by neuromaps.
+
+ This dataset contains
+
+ If you used this data, please cite 1_, 2_, 3_, 4_.
+
+ Parameters
+ ----------
+ version : str, optional
+ One of {'fsaverage', 'fsaverage4', 'fsaverage5',
+ 'fsaverage6'}. Default: 'fsaverage'
+
+ Returns
+ -------
+ filenames : :class:`sklearn.utils.Bunch`
+ Dictionary-like object with template files.
+
+ Other Parameters
+ ----------------
+ force : bool, optional
+ If True, will overwrite existing dataset. Default: False
+ data_dir : str, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+ verbose : int, optional
+ Modifies verbosity of download, where higher numbers mean more updates.
+ Default: 1
+
+ References
+ ----------
+ .. [1] Anders M Dale, Bruce Fischl, and Martin I Sereno. Cortical
+ surface-based analysis: i. segmentation and surface reconstruction.
+ Neuroimage, 9(2):179\u2013194, 1999.
+ .. [2] Bruce Fischl, Martin I Sereno, and Anders M Dale. Cortical
+ surface-based analysis: ii: inflation, flattening, and a surface-based
+ coordinate system. Neuroimage, 9(2):195\u2013207, 1999.
+ .. [3] Bruce Fischl, Martin I Sereno, Roger BH Tootell, and Anders M Dale.
+ High-resolution intersubject averaging and a coordinate system for the
+ cortical surface. Human brain mapping, 8(4):272\u2013284, 1999.
+ .. [4] Ross D Markello, Justine Y Hansen, Zhen-Qi Liu, Vincent Bazinet,
+ Golia Shafiei, Laura E Su\u00e1rez, Nadia Blostein, Jakob Seidlitz,
+ Sylvain Baillet, Theodore D Satterthwaite, and others. Neuromaps:
+ structural and functional interpretation of brain maps. Nature Methods,
+ 19(11):1472\u20131479, 2022.
+ """
+ versions = ["fsaverage", "fsaverage6", "fsaverage5", "fsaverage4"]
+ if version not in versions:
+ raise ValueError(
+ f"The version of fsaverage requested {version} does not "
+ f"exist. Must be one of {versions}"
+ )
+
+ dataset_name = "tpl-fsaverage_curated"
+ _get_reference_info("tpl-fsaverage_curated", verbose=verbose)
+
+ keys = ["white", "pial", "inflated", "sphere", "medial", "sulc", "vaavg"]
+ keys_suffix = {
+ "white": "white.surf",
+ "pial": "pial.surf",
+ "inflated": "inflated.surf",
+ "sphere": "sphere.surf",
+ "medial": "desc-nomedialwall_dparc.label",
+ "sulc": "desc-sulc_midthickness.shape",
+ "vaavg": "desc-vaavg_midthickness.shape",
+ }
+ version_density = {
+ "fsaverage": "164k",
+ "fsaverage6": "41k",
+ "fsaverage5": "10k",
+ "fsaverage4": "3k",
+ }
+ density = version_density[version]
+
+ fetched = fetch_file(
+ dataset_name, keys=version, force=force, data_dir=data_dir, verbose=verbose
+ )
+
+ # deal with default neuromaps directory structure in the archive
+ if not fetched.exists():
+ import shutil
+
+ shutil.move(fetched.parent / "atlases/fsaverage", fetched)
+ shutil.rmtree(fetched.parent / "atlases")
+
+ data = {
+ k: SURFACE(
+ fetched / f"tpl-fsaverage_den-{density}_hemi-L_{keys_suffix[k]}.gii",
+ fetched / f"tpl-fsaverage_den-{density}_hemi-R_{keys_suffix[k]}.gii",
+ )
+ for k in keys
+ }
+
+ return Bunch(**data)
+
+
+def fetch_hcp_standards(force=False, data_dir=None, verbose=1):
+ """
+ Fetch HCP standard mesh atlases for converting between FreeSurfer and HCP.
+
+ This dataset contains
+
+ The original file was from 3_, but is no longer available. The archived
+ file is available from 4_.
+
+ If you used this data, please cite 1_, 2_.
+
+ Returns
+ -------
+ standards : str
+ Filepath to standard_mesh_atlases directory
+
+ Other Parameters
+ ----------------
+ force : bool, optional
+ If True, will overwrite existing dataset. Default: False
+ data_dir : str, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+ verbose : int, optional
+ Modifies verbosity of download, where higher numbers mean more updates.
+ Default: 1
+
+ References
+ ----------
+ .. [1] David C Van Essen, Kamil Ugurbil, Edward Auerbach, Deanna
+ Barch,Timothy EJ Behrens, Richard Bucholz, Acer Chang, Liyong Chen,
+ Maurizio Corbetta, Sandra W Curtiss, and others. The human connectome
+ project: a data acquisition perspective. Neuroimage,
+ 62(4):2222\u20132231, 2012.
+ .. [2] Matthew F Glasser, Stamatios N Sotiropoulos, J Anthony Wilson,
+ Timothy S Coalson, Bruce Fischl, Jesper L Andersson, Junqian Xu, Saad
+ Jbabdi, Matthew Webster, Jonathan R Polimeni, and others. The minimal
+ preprocessing pipelines for the human connectome project. Neuroimage,
+ 80:105\u2013124, 2013.
+ .. [3] http://brainvis.wustl.edu/workbench/standard_mesh_atlases.zip
+ .. [4] https://web.archive.org/web/20220121035833/http://brainvis.wustl.edu/workbench/standard_mesh_atlases.zip
+ """
+ dataset_name = "tpl-hcp_standards"
+ _get_reference_info(dataset_name, verbose=verbose)
+
+ fetched = fetch_file(
+ dataset_name,
+ keys="standard_mesh_atlases",
+ force=force,
+ data_dir=data_dir,
+ verbose=verbose,
+ )
+
+ return fetched
+
+
+def fetch_fslr_curated(version="fslr32k", force=False, data_dir=None, verbose=1):
+ """
+ Download files for HCP fsLR template.
+
+ Curated by neuromaps.
+
+ This dataset contains
+
+ If you used this data, please cite 1_, 2_, 3_.
+
+ Parameters
+ ----------
+ version : str, optional
+ One of {"fslr4k", "fslr8k", "fslr32k", "fslr164k"}. Default: 'fslr32k'
+
+ Returns
+ -------
+ filenames : :class:`sklearn.utils.Bunch`
+ Dictionary-like object with template files.
+
+ Other Parameters
+ ----------------
+ force : bool, optional
+ If True, will overwrite existing dataset. Default: False
+ data_dir : str, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+ verbose : int, optional
+ Modifies verbosity of download, where higher numbers mean more updates.
+ Default: 1
+
+ References
+ ----------
+ .. [1] David C Van Essen, Kamil Ugurbil, Edward Auerbach, Deanna
+ Barch,Timothy EJ Behrens, Richard Bucholz, Acer Chang, Liyong Chen,
+ Maurizio Corbetta, Sandra W Curtiss, and others. The human connectome
+ project: a data acquisition perspective. Neuroimage,
+ 62(4):2222\u20132231, 2012.
+ .. [2] Matthew F Glasser, Stamatios N Sotiropoulos, J Anthony Wilson,
+ Timothy S Coalson, Bruce Fischl, Jesper L Andersson, Junqian Xu, Saad
+ Jbabdi, Matthew Webster, Jonathan R Polimeni, and others. The minimal
+ preprocessing pipelines for the human connectome project. Neuroimage,
+ 80:105\u2013124, 2013.
+ .. [3] Ross D Markello, Justine Y Hansen, Zhen-Qi Liu, Vincent Bazinet,
+ Golia Shafiei, Laura E Su\u00e1rez, Nadia Blostein, Jakob Seidlitz,
+ Sylvain Baillet, Theodore D Satterthwaite, and others. Neuromaps:
+ structural and functional interpretation of brain maps. Nature Methods,
+ 19(11):1472\u20131479, 2022.
+ """
+ versions = ["fslr4k", "fslr8k", "fslr32k", "fslr164k"]
+ if version not in versions:
+ raise ValueError(
+ f"The version of fsaverage requested {version} does not "
+ f"exist. Must be one of {versions}"
+ )
+
+ dataset_name = "tpl-fslr_curated"
+ _get_reference_info("tpl-fslr_curated", verbose=verbose)
+
+ keys = [
+ "midthickness",
+ "inflated",
+ "veryinflated",
+ "sphere",
+ "medial",
+ "sulc",
+ "vaavg",
+ ]
+ if version in ["fslr4k", "fslr8k"]:
+ keys.remove("veryinflated")
+ keys_suffix = {
+ "midthickness": "midthickness.surf",
+ "inflated": "inflated.surf",
+ "veryinflated": "veryinflated.surf",
+ "sphere": "sphere.surf",
+ "medial": "desc-nomedialwall_dparc.label",
+ "sulc": "desc-sulc_midthickness.shape",
+ "vaavg": "desc-vaavg_midthickness.shape",
+ }
+ version_density = {
+ "fslr4k": "4k",
+ "fslr8k": "8k",
+ "fslr32k": "32k",
+ "fslr164k": "164k",
+ }
+ density = version_density[version]
+
+ fetched = fetch_file(
+ dataset_name, keys=version, force=force, data_dir=data_dir, verbose=verbose
+ )
+
+ # deal with default neuromaps directory structure in the archive
+ if not fetched.exists():
+ import shutil
+
+ shutil.move(fetched.parent / "atlases/fsLR", fetched)
+ shutil.rmtree(fetched.parent / "atlases")
+
+ data = {
+ k: SURFACE(
+ fetched / f"tpl-fsLR_den-{density}_hemi-L_{keys_suffix[k]}.gii",
+ fetched / f"tpl-fsLR_den-{density}_hemi-R_{keys_suffix[k]}.gii",
+ )
+ for k in keys
+ }
+
+ return Bunch(**data)
+
+
+def fetch_civet(density="41k", version="v1", force=False, data_dir=None, verbose=1):
+ """
+ Fetch CIVET surface files.
+
+ This dataset contains
+
+ If you used this data, please cite 1_, 2_, 3_.
+
+ Parameters
+ ----------
+ density : {'41k', '164k'}, optional
+ Which density of the CIVET-space geometry files to fetch. The
+ high-resolution '164k' surface only exists for version 'v2'
+ version : {'v1, 'v2'}, optional
+ Which version of the CIVET surfaces to use. Default: 'v2'
+
+ Returns
+ -------
+ filenames : :class:`sklearn.utils.Bunch`
+ Dictionary-like object with keys ['mid', 'white'] containing geometry
+ files for CIVET surface. Note for version 'v1' the 'mid' and 'white'
+ files are identical.
+
+ Other Parameters
+ ----------------
+ force : bool, optional
+ If True, will overwrite existing dataset. Default: False
+ data_dir : str, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+ verbose : int, optional
+ Modifies verbosity of download, where higher numbers mean more updates.
+ Default: 1
+
+ Notes
+ -----
+ License: https://github.com/aces/CIVET_Full_Project/blob/master/LICENSE
+
+ References
+ ----------
+ .. [1] Oliver Lyttelton, Maxime Boucher, Steven Robbins, and Alan Evans. An
+ unbiased iterative group registration template for cortical surface
+ analysis. Neuroimage, 34(4):1535\u20131544, 2007.
+ .. [2] Vladimir S Fonov, Alan C Evans, Robert C McKinstry, C Robert Almli,
+ and DL Collins. Unbiased nonlinear average age-appropriate brain
+ templates from birth to adulthood. NeuroImage, 47:S102, 2009.
+ .. [3] Y Ad-Dab'bagh, O Lyttelton, J Muehlboeck, C Lepage, D Einarson, K
+ Mok, O Ivanov, R Vincent, J Lerch, and E Fombonne. The civet
+ image-processing environment: a fully automated comprehensive pipeline
+ for anatomical neuroimaging research. proceedings of the 12th annual
+ meeting of the organization for human brain mapping. Florence, Italy,
+ pages 2266, 2006.
+ """
+ densities = ["41k", "164k"]
+ if density not in densities:
+ raise ValueError(
+ f'The density of CIVET requested "{density}" does not exist. '
+ f"Must be one of {densities}"
+ )
+ versions = ["v1", "v2"]
+ if version not in versions:
+ raise ValueError(
+ f'The version of CIVET requested "{version}" does not exist. '
+ f"Must be one of {versions}"
+ )
+
+ if version == "v1" and density == "164k":
+ raise ValueError(
+ 'The "164k" density CIVET surface only exists for ' 'version "v2"'
+ )
+
+ dataset_name = "tpl-civet"
+ _get_reference_info(dataset_name, verbose=verbose)
+
+ keys = ["mid", "white"]
+
+ fetched = fetch_file(
+ dataset_name,
+ keys=[version, "civet" + density],
+ force=force,
+ data_dir=data_dir,
+ verbose=verbose,
+ )
+
+ data = {
+ k: SURFACE(
+ fetched / f"tpl-civet_space-ICBM152_hemi-L_den-{density}_{k}.obj",
+ fetched / f"tpl-civet_space-ICBM152_hemi-R_den-{density}_{k}.obj",
+ )
+ for k in keys
+ }
+ return Bunch(**data)
+
+
+def fetch_civet_curated(version="civet41k", force=False, data_dir=None, verbose=1):
+ """
+ Download files for CIVET template.
+
+ Curated by neuromaps.
+
+ This dataset contains
+
+ If you used this data, please cite 1_, 2_, 3_, 4_.
+
+ Parameters
+ ----------
+ version : {'civet41k', 'civet164k'}, optional
+ Which density of the CIVET-space geometry files to fetch.
+
+ Returns
+ -------
+ filenames : :class:`sklearn.utils.Bunch`
+ Dictionary-like object with template files.
+
+ Other Parameters
+ ----------------
+ force : bool, optional
+ If True, will overwrite existing dataset. Default: False
+ data_dir : str, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+ verbose : int, optional
+ Modifies verbosity of download, where higher numbers mean more updates.
+ Default: 1
+
+ Notes
+ -----
+ License: https://github.com/aces/CIVET_Full_Project/blob/master/LICENSE
+
+ References
+ ----------
+ .. [1] Oliver Lyttelton, Maxime Boucher, Steven Robbins, and Alan Evans. An
+ unbiased iterative group registration template for cortical surface
+ analysis. Neuroimage, 34(4):1535\u20131544, 2007.
+ .. [2] Vladimir S Fonov, Alan C Evans, Robert C McKinstry, C Robert Almli,
+ and DL Collins. Unbiased nonlinear average age-appropriate brain
+ templates from birth to adulthood. NeuroImage, 47:S102, 2009.
+ .. [3] Y Ad-Dab'bagh, O Lyttelton, J Muehlboeck, C Lepage, D Einarson, K
+ Mok, O Ivanov, R Vincent, J Lerch, and E Fombonne. The civet
+ image-processing environment: a fully automated comprehensive pipeline
+ for anatomical neuroimaging research. proceedings of the 12th annual
+ meeting of the organization for human brain mapping. Florence, Italy,
+ pages 2266, 2006.
+ .. [4] Ross D Markello, Justine Y Hansen, Zhen-Qi Liu, Vincent Bazinet,
+ Golia Shafiei, Laura E Su\u00e1rez, Nadia Blostein, Jakob Seidlitz,
+ Sylvain Baillet, Theodore D Satterthwaite, and others. Neuromaps:
+ structural and functional interpretation of brain maps. Nature Methods,
+ 19(11):1472\u20131479, 2022.
+ """
+ versions = ["civet41k", "civet164k"]
+ if version not in versions:
+ raise ValueError(
+ f"The version of fsaverage requested {version} does not "
+ f"exist. Must be one of {versions}"
+ )
+
+ dataset_name = "tpl-civet_curated"
+ _get_reference_info("tpl-civet_curated", verbose=verbose)
+
+ keys = [
+ "white",
+ "midthickness",
+ "inflated",
+ "veryinflated",
+ "sphere",
+ "medial",
+ "sulc",
+ "vaavg",
+ ]
+ keys_suffix = {
+ "white": "white.surf",
+ "midthickness": "midthickness.surf",
+ "inflated": "inflated.surf",
+ "veryinflated": "veryinflated.surf",
+ "sphere": "sphere.surf",
+ "medial": "desc-nomedialwall_dparc.label",
+ "sulc": "desc-sulc_midthickness.shape",
+ "vaavg": "desc-vaavg_midthickness.shape",
+ }
+ version_density = {
+ "civet41k": "41k",
+ "civet164k": "164k",
+ }
+ density = version_density[version]
+
+ fetched = fetch_file(
+ dataset_name,
+ keys=["v2", version],
+ force=force,
+ data_dir=data_dir,
+ verbose=verbose,
+ )
+
+ # deal with default neuromaps directory structure in the archive
+ if not fetched.exists():
+ import shutil
+
+ shutil.move(fetched.parent / "atlases/civet", fetched)
+ shutil.rmtree(fetched.parent / "atlases")
+
+ data = {
+ k: SURFACE(
+ fetched / f"tpl-civet_den-{density}_hemi-L_{keys_suffix[k]}.gii",
+ fetched / f"tpl-civet_den-{density}_hemi-R_{keys_suffix[k]}.gii",
+ )
+ for k in keys
+ }
+
+ return Bunch(**data)
+
+
+def fetch_conte69(force=False, data_dir=None, verbose=1):
+ """
+ Download files for Van Essen et al., 2012 Conte69 template.
+
+ This dataset contains
+
+ If you used this data, please cite 1_, 2_.
+
+ Returns
+ -------
+ filenames : :class:`sklearn.utils.Bunch`
+ Dictionary-like object with keys ['midthickness', 'inflated',
+ 'vinflated'], where corresponding values are lists of filepaths to
+ downloaded template files.
+
+ Other Parameters
+ ----------------
+ force : bool, optional
+ If True, will overwrite existing dataset. Default: False
+ data_dir : str, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+ verbose : int, optional
+ Modifies verbosity of download, where higher numbers mean more updates.
+ Default: 1
+
+ References
+ ----------
+ .. [1] David C Van Essen, Kamil Ugurbil, Edward Auerbach, Deanna Barch,
+ Timothy EJ Behrens, Richard Bucholz, Acer Chang, Liyong Chen, Maurizio
+ Corbetta, Sandra W Curtiss, and others. The human connectome project: a
+ data acquisition perspective. Neuroimage, 62(4):2222\u20132231, 2012.
+ .. [2] David C Van Essen, Matthew F Glasser, Donna L Dierker, John Harwell,
+ and Timothy Coalson. Parcellations and hemispheric asymmetries of human
+ cerebral cortex analyzed on surface-based atlases. Cerebral cortex,
+ 22(10):2241\u20132262, 2012.
+ .. [3] http://brainvis.wustl.edu/wiki/index.php//Caret:Atlases/Conte69_Atlas
+ """
+ dataset_name = "tpl-conte69"
+ _get_reference_info(dataset_name, verbose=verbose)
+
+ keys = ["midthickness", "inflated", "vinflated"]
+
+ fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose)
+
+ data = {
+ k: SURFACE(
+ fetched / f"tpl-conte69_space-MNI305_variant-fsLR32k_{k}.L.surf.gii",
+ fetched / f"tpl-conte69_space-MNI305_variant-fsLR32k_{k}.R.surf.gii",
+ )
+ for k in keys
+ }
+ data["info"] = json.load(open(fetched / "template_description.json", "r"))
+
+ return Bunch(**data)
+
+
+def fetch_yerkes19(force=False, data_dir=None, verbose=1):
+ """
+ Download files for Donahue et al., 2016 Yerkes19 template.
+
+ This dataset contains
+
+ If you used this data, please cite 1_.
+
+ Returns
+ -------
+ filenames : :class:`sklearn.utils.Bunch`
+ Dictionary-like object with keys ['midthickness', 'inflated',
+ 'vinflated'], where corresponding values are lists of filepaths to
+ downloaded template files.
+
+ Other Parameters
+ ----------------
+ force : bool, optional
+ If True, will overwrite existing dataset. Default: False
+ data_dir : str, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+ verbose : int, optional
+ Modifies verbosity of download, where higher numbers mean more updates.
+ Default: 1
+
+ References
+ ----------
+ .. [1] Chad J Donahue, Stamatios N Sotiropoulos, Saad Jbabdi, Moises
+ Hernandez-Fernandez, Timothy E Behrens, Tim B Dyrby, Timothy Coalson,
+ Henry Kennedy, Kenneth Knoblauch, David C Van Essen, and others. Using
+ diffusion tractography to predict cortical connection strength and
+ distance: a quantitative comparison with tracers in the monkey. Journal
+ of Neuroscience, 36(25):6758\u20136770, 2016.
+ .. [2] https://balsa.wustl.edu/reference/show/976nz
+ """
+ dataset_name = "tpl-yerkes19"
+ _get_reference_info(dataset_name, verbose=verbose)
+
+ keys = ["midthickness", "inflated", "vinflated"]
+
+ fetched = fetch_file(dataset_name, force=force, data_dir=data_dir, verbose=verbose)
+
+ data = {
+ k: SURFACE(
+ fetched / f"tpl-yerkes19_space-fsLR32k_{k}.L.surf.gii",
+ fetched / f"tpl-yerkes19_space-fsLR32k_{k}.R.surf.gii",
+ )
+ for k in keys
+ }
+
+ return Bunch(**data)
diff --git a/netneurotools/datasets/fetchers.py b/netneurotools/datasets/fetchers.py
deleted file mode 100644
index b2fa95d..0000000
--- a/netneurotools/datasets/fetchers.py
+++ /dev/null
@@ -1,882 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Functions for fetching datasets from the internet."""
-
-from collections import namedtuple
-import itertools
-import json
-import os.path as op
-import warnings
-
-try:
- # nilearn 0.10.3
- from nilearn.datasets._utils import fetch_files as _fetch_files
-except ImportError:
- from nilearn.datasets.utils import _fetch_files
-
-import numpy as np
-from sklearn.utils import Bunch
-
-from .utils import _get_data_dir, _get_dataset_info
-from ..utils import check_fs_subjid
-
-SURFACE = namedtuple('Surface', ('lh', 'rh'))
-
-
-def fetch_cammoun2012(version='MNI152NLin2009aSym', data_dir=None, url=None,
- resume=True, verbose=1):
- """
- Download files for Cammoun et al., 2012 multiscale parcellation.
-
- Parameters
- ----------
- version : str, optional
- Specifies which version of the dataset to download, where
- 'MNI152NLin2009aSym' will return .nii.gz atlas files defined in MNI152
- space, 'fsaverageX' will return .annot files defined in fsaverageX
- space (FreeSurfer 6.0.1), 'fslr32k' will return .label.gii files in
- fs_LR_32k HCP space, and 'gcs' will return FreeSurfer-style .gcs
- probabilistic atlas files for generating new, subject-specific
- parcellations. Default: 'MNI152NLin2009aSym'
- data_dir : str, optional
- Path to use as data directory. If not specified, will check for
- environmental variable 'NNT_DATA'; if that is not set, will use
- `~/nnt-data` instead. Default: None
- url : str, optional
- URL from which to download data. Default: None
- resume : bool, optional
- Whether to attempt to resume partial download, if possible. Default:
- True
- verbose : int, optional
- Modifies verbosity of download, where higher numbers mean more updates.
- Default: 1
-
- Returns
- -------
- filenames : :class:`sklearn.utils.Bunch`
- Dictionary-like object with keys ['scale033', 'scale060', 'scale125',
- 'scale250', 'scale500'], where corresponding values are lists of
- filepaths to downloaded parcellation files.
-
- References
- ----------
- Cammoun, L., Gigandet, X., Meskaldji, D., Thiran, J. P., Sporns, O., Do, K.
- Q., Maeder, P., and Meuli, R., & Hagmann, P. (2012). Mapping the human
- connectome at multiple scales with diffusion spectrum MRI. Journal of
- Neuroscience Methods, 203(2), 386-397.
-
- Notes
- -----
- License: https://raw.githubusercontent.com/LTS5/cmp/master/COPYRIGHT
- """
- if version == 'surface':
- warnings.warn('Providing `version="surface"` is deprecated and will '
- 'be removed in a future release. For consistent '
- 'behavior please use `version="fsaverage"` instead.',
- DeprecationWarning, stacklevel=2)
- version = 'fsaverage'
- elif version == 'volume':
- warnings.warn('Providing `version="volume"` is deprecated and will '
- 'be removed in a future release. For consistent '
- 'behavior please use `version="MNI152NLin2009aSym"` '
- 'instead.',
- DeprecationWarning, stacklevel=2)
- version = 'MNI152NLin2009aSym'
-
- versions = [
- 'gcs', 'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k',
- 'MNI152NLin2009aSym'
- ]
- if version not in versions:
- raise ValueError('The version of Cammoun et al., 2012 parcellation '
- 'requested "{}" does not exist. Must be one of {}'
- .format(version, versions))
-
- dataset_name = 'atl-cammoun2012'
- keys = ['scale033', 'scale060', 'scale125', 'scale250', 'scale500']
-
- data_dir = _get_data_dir(data_dir=data_dir)
- info = _get_dataset_info(dataset_name)[version]
- if url is None:
- url = info['url']
-
- opts = {
- 'uncompress': True,
- 'md5sum': info['md5'],
- 'move': '{}.tar.gz'.format(dataset_name)
- }
-
- # filenames differ based on selected version of dataset
- if version == 'MNI152NLin2009aSym':
- filenames = [
- 'atl-Cammoun2012_space-MNI152NLin2009aSym_res-{}_deterministic{}'
- .format(res[-3:], suff) for res in keys for suff in ['.nii.gz']
- ] + ['atl-Cammoun2012_space-MNI152NLin2009aSym_info.csv']
- elif version == 'fslr32k':
- filenames = [
- 'atl-Cammoun2012_space-fslr32k_res-{}_hemi-{}_deterministic{}'
- .format(res[-3:], hemi, suff) for res in keys
- for hemi in ['L', 'R'] for suff in ['.label.gii']
- ]
- elif version in ('fsaverage', 'fsaverage5', 'fsaverage6'):
- filenames = [
- 'atl-Cammoun2012_space-{}_res-{}_hemi-{}_deterministic{}'
- .format(version, res[-3:], hemi, suff) for res in keys
- for hemi in ['L', 'R'] for suff in ['.annot']
- ]
- else:
- filenames = [
- 'atl-Cammoun2012_res-{}_hemi-{}_probabilistic{}'
- .format(res[5:], hemi, suff)
- for res in keys[:-1] + ['scale500v1', 'scale500v2', 'scale500v3']
- for hemi in ['L', 'R'] for suff in ['.gcs', '.ctab']
- ]
-
- files = [
- (op.join(dataset_name, version, f), url, opts) for f in filenames
- ]
- data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose)
-
- if version == 'MNI152NLin2009aSym':
- keys += ['info']
- elif version in ('fslr32k', 'fsaverage', 'fsaverage5', 'fsaverage6'):
- data = [SURFACE(*data[i:i + 2]) for i in range(0, len(data), 2)]
- else:
- data = [data[::2][i:i + 2] for i in range(0, len(data) // 2, 2)]
- # deal with the fact that last scale is split into three files :sigh:
- data = data[:-3] + [list(itertools.chain.from_iterable(data[-3:]))]
-
- return Bunch(**dict(zip(keys, data)))
-
-
-def fetch_conte69(data_dir=None, url=None, resume=True, verbose=1):
- """
- Download files for Van Essen et al., 2012 Conte69 template.
-
- Parameters
- ----------
- data_dir : str, optional
- Path to use as data directory. If not specified, will check for
- environmental variable 'NNT_DATA'; if that is not set, will use
- `~/nnt-data` instead. Default: None
- url : str, optional
- URL from which to download data. Default: None
- resume : bool, optional
- Whether to attempt to resume partial download, if possible. Default:
- True
- verbose : int, optional
- Modifies verbosity of download, where higher numbers mean more updates.
- Default: 1
-
- Returns
- -------
- filenames : :class:`sklearn.utils.Bunch`
- Dictionary-like object with keys ['midthickness', 'inflated',
- 'vinflated'], where corresponding values are lists of filepaths to
- downloaded template files.
-
- References
- ----------
- http://brainvis.wustl.edu/wiki/index.php//Caret:Atlases/Conte69_Atlas
-
- Van Essen, D. C., Glasser, M. F., Dierker, D. L., Harwell, J., & Coalson,
- T. (2011). Parcellations and hemispheric asymmetries of human cerebral
- cortex analyzed on surface-based atlases. Cerebral cortex, 22(10),
- 2241-2262.
-
- Notes
- -----
- License: ???
- """
- dataset_name = 'tpl-conte69'
- keys = ['midthickness', 'inflated', 'vinflated']
-
- data_dir = _get_data_dir(data_dir=data_dir)
- info = _get_dataset_info(dataset_name)
- if url is None:
- url = info['url']
-
- opts = {
- 'uncompress': True,
- 'md5sum': info['md5'],
- 'move': '{}.tar.gz'.format(dataset_name)
- }
-
- filenames = [
- 'tpl-conte69/tpl-conte69_space-MNI305_variant-fsLR32k_{}.{}.surf.gii'
- .format(res, hemi) for res in keys for hemi in ['L', 'R']
- ] + ['tpl-conte69/template_description.json']
-
- data = _fetch_files(data_dir, files=[(f, url, opts) for f in filenames],
- resume=resume, verbose=verbose)
-
- with open(data[-1], 'r') as src:
- data[-1] = json.load(src)
-
- # bundle hemispheres together
- data = [SURFACE(*data[:-1][i:i + 2]) for i in range(0, 6, 2)] + [data[-1]]
-
- return Bunch(**dict(zip(keys + ['info'], data)))
-
-
-def fetch_yerkes19(data_dir=None, url=None, resume=None, verbose=1):
- """
- Download files for Donahue et al., 2016 Yerkes19 template.
-
- Parameters
- ----------
- data_dir : str, optional
- Path to use as data directory. If not specified, will check for
- environmental variable 'NNT_DATA'; if that is not set, will use
- `~/nnt-data` instead. Default: None
- url : str, optional
- URL from which to download data. Default: None
- resume : bool, optional
- Whether to attempt to resume partial download, if possible. Default:
- True
- verbose : int, optional
- Modifies verbosity of download, where higher numbers mean more updates.
- Default: 1
-
- Returns
- -------
- filenames : :class:`sklearn.utils.Bunch`
- Dictionary-like object with keys ['midthickness', 'inflated',
- 'vinflated'], where corresponding values are lists of filepaths to
- downloaded template files.
-
- References
- ----------
- https://balsa.wustl.edu/reference/show/976nz
-
- Donahue, C. J., Sotiropoulos, S. N., Jbabdi, S., Hernandez-Fernandez, M.,
- Behrens, T. E., Dyrby, T. B., ... & Glasser, M. F. (2016). Using diffusion
- tractography to predict cortical connection strength and distance: a
- quantitative comparison with tracers in the monkey. Journal of
- Neuroscience, 36(25), 6758-6770.
-
- Notes
- -----
- License: ???
- """
- dataset_name = 'tpl-yerkes19'
- keys = ['midthickness', 'inflated', 'vinflated']
-
- data_dir = _get_data_dir(data_dir=data_dir)
- info = _get_dataset_info(dataset_name)
- if url is None:
- url = info['url']
-
- opts = {
- 'uncompress': True,
- 'md5sum': info['md5'],
- 'move': '{}.tar.gz'.format(dataset_name)
- }
-
- filenames = [
- 'tpl-yerkes19/tpl-yerkes19_space-fsLR32k_{}.{}.surf.gii'
- .format(res, hemi) for res in keys for hemi in ['L', 'R']
- ]
-
- data = _fetch_files(data_dir, files=[(f, url, opts) for f in filenames],
- resume=resume, verbose=verbose)
-
- # bundle hemispheres together
- data = [SURFACE(*data[i:i + 2]) for i in range(0, 6, 2)]
-
- return Bunch(**dict(zip(keys + ['info'], data)))
-
-
-def fetch_pauli2018(data_dir=None, url=None, resume=True, verbose=1):
- """
- Download files for Pauli et al., 2018 subcortical parcellation.
-
- Parameters
- ----------
- data_dir : str, optional
- Path to use as data directory. If not specified, will check for
- environmental variable 'NNT_DATA'; if that is not set, will use
- `~/nnt-data` instead. Default: None
- url : str, optional
- URL from which to download data. Default: None
- resume : bool, optional
- Whether to attempt to resume partial download, if possible. Default:
- True
- verbose : int, optional
- Modifies verbosity of download, where higher numbers mean more updates.
- Default: 1
-
- Returns
- -------
- filenames : :class:`sklearn.utils.Bunch`
- Dictionary-like object with keys ['probabilistic', 'deterministic'],
- where corresponding values are filepaths to downloaded atlas files.
-
- References
- ----------
- Pauli, W. M., Nili, A. N., & Tyszka, J. M. (2018). A high-resolution
- probabilistic in vivo atlas of human subcortical brain nuclei. Scientific
- Data, 5, 180063.
-
- Notes
- -----
- License: CC-BY Attribution 4.0 International
- """
- dataset_name = 'atl-pauli2018'
- keys = ['probabilistic', 'deterministic', 'info']
-
- data_dir = _get_data_dir(data_dir=data_dir)
- info = _get_dataset_info(dataset_name)
-
- # format the query how _fetch_files() wants things and then download data
- files = [
- (i['name'], i['url'], dict(md5sum=i['md5'], move=i['name']))
- for i in info
- ]
-
- data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose)
-
- return Bunch(**dict(zip(keys, data)))
-
-
-def fetch_fsaverage(version='fsaverage', data_dir=None, url=None, resume=True,
- verbose=1):
- """
- Download files for fsaverage FreeSurfer template.
-
- Parameters
- ----------
- version : str, optional
- One of {'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5',
- 'fsaverage6'}. Default: 'fsaverage'
- data_dir : str, optional
- Path to use as data directory. If not specified, will check for
- environmental variable 'NNT_DATA'; if that is not set, will use
- `~/nnt-data` instead. Default: None
- url : str, optional
- URL from which to download data. Default: None
- resume : bool, optional
- Whether to attempt to resume partial download, if possible. Default:
- True
- verbose : int, optional
- Modifies verbosity of download, where higher numbers mean more updates.
- Default: 1
-
- Returns
- -------
- filenames : :class:`sklearn.utils.Bunch`
- Dictionary-like object with keys ['surf'] where corresponding values
- are length-2 lists downloaded template files (each list composed of
- files for the left and right hemisphere).
- """
- versions = [
- 'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6'
- ]
- if version not in versions:
- raise ValueError('The version of fsaverage requested "{}" does not '
- 'exist. Must be one of {}'.format(version, versions))
-
- dataset_name = 'tpl-fsaverage'
- keys = ['orig', 'white', 'smoothwm', 'pial', 'inflated', 'sphere']
-
- data_dir = _get_data_dir(data_dir=data_dir)
- info = _get_dataset_info(dataset_name)[version]
- if url is None:
- url = info['url']
-
- opts = {
- 'uncompress': True,
- 'md5sum': info['md5'],
- 'move': '{}.tar.gz'.format(dataset_name)
- }
-
- filenames = [
- op.join(version, 'surf', '{}.{}'.format(hemi, surf))
- for surf in keys for hemi in ['lh', 'rh']
- ]
-
- try:
- data_dir = check_fs_subjid(version)[1]
- data = [op.join(data_dir, f) for f in filenames]
- except FileNotFoundError:
- data = _fetch_files(data_dir, resume=resume, verbose=verbose,
- files=[(op.join(dataset_name, f), url, opts)
- for f in filenames])
-
- data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)]
-
- return Bunch(**dict(zip(keys, data)))
-
-
-def available_connectomes():
- """
- List datasets available via :func:`~.fetch_connectome`.
-
- Returns
- -------
- datasets : list of str
- List of available datasets
- """
- return sorted(_get_dataset_info('ds-connectomes').keys())
-
-
-def fetch_connectome(dataset, data_dir=None, url=None, resume=True,
- verbose=1):
- """
- Download files from multi-species connectomes.
-
- Parameters
- ----------
- dataset : str
- Specifies which dataset to download; must be one of the datasets listed
- in :func:`netneurotools.datasets.available_connectomes()`.
- data_dir : str, optional
- Path to use as data directory. If not specified, will check for
- environmental variable 'NNT_DATA'; if that is not set, will use
- `~/nnt-data` instead. Default: None
- url : str, optional
- URL from which to download data. Default: None
- resume : bool, optional
- Whether to attempt to resume partial download, if possible. Default:
- True
- verbose : int, optional
- Modifies verbosity of download, where higher numbers mean more updates.
- Default: 1
-
- Returns
- -------
- data : :class:`sklearn.utils.Bunch`
- Dictionary-like object with, at a minimum, keys ['conn', 'labels',
- 'ref'] providing connectivity / correlation matrix, region labels, and
- relevant reference. Other possible keys include 'dist' (an array of
- Euclidean distances between regions of 'conn'), 'coords' (an array of
- xyz coordinates for regions of 'conn'), 'acronyms' (an array of
- acronyms for regions of 'conn'), and 'networks' (an array of network
- affiliations for regions of 'conn')
-
- References
- ----------
- See `ref` key of returned dictionary object for relevant dataset reference
- """
- if dataset not in available_connectomes():
- raise ValueError('Provided dataset {} not available; must be one of {}'
- .format(dataset, available_connectomes()))
-
- dataset_name = 'ds-connectomes'
-
- data_dir = op.join(_get_data_dir(data_dir=data_dir), dataset_name)
- info = _get_dataset_info(dataset_name)[dataset]
- if url is None:
- url = info['url']
- opts = {
- 'uncompress': True,
- 'md5sum': info['md5'],
- 'move': '{}.tar.gz'.format(dataset)
- }
-
- filenames = [
- op.join(dataset, '{}.csv'.format(fn)) for fn in info['keys']
- ] + [op.join(dataset, 'ref.txt')]
- data = _fetch_files(data_dir, files=[(f, url, opts) for f in filenames],
- resume=resume, verbose=verbose)
-
- # load data
- for n, arr in enumerate(data[:-1]):
- try:
- data[n] = np.loadtxt(arr, delimiter=',')
- except ValueError:
- data[n] = np.loadtxt(arr, delimiter=',', dtype=str)
- with open(data[-1]) as src:
- data[-1] = src.read().strip()
-
- return Bunch(**dict(zip(info['keys'] + ['ref'], data)))
-
-
-def fetch_vazquez_rodriguez2019(data_dir=None, url=None, resume=True,
- verbose=1):
- """
- Download files from Vazquez-Rodriguez et al., 2019, PNAS.
-
- Parameters
- ----------
- data_dir : str, optional
- Path to use as data directory. If not specified, will check for
- environmental variable 'NNT_DATA'; if that is not set, will use
- `~/nnt-data` instead. Default: None
- url : str, optional
- URL from which to download data. Default: None
- resume : bool, optional
- Whether to attempt to resume partial download, if possible. Default:
- True
- verbose : int, optional
- Modifies verbosity of download, where higher numbers mean more updates.
- Default: 1
-
- Returns
- -------
- data : :class:`sklearn.utils.Bunch`
- Dictionary-like object with keys ['rsquared', 'gradient'] containing
- 1000 values from
-
- References
- ----------
- See `ref` key of returned dictionary object for relevant dataset reference
- """
- dataset_name = 'ds-vazquez_rodriguez2019'
-
- data_dir = _get_data_dir(data_dir=data_dir)
- info = _get_dataset_info(dataset_name)
- if url is None:
- url = info['url']
- opts = {
- 'uncompress': True,
- 'md5sum': info['md5'],
- 'move': '{}.tar.gz'.format(dataset_name)
- }
-
- filenames = [
- op.join(dataset_name, 'rsquared_gradient.csv')
- ]
- data = _fetch_files(data_dir, files=[(f, url, opts) for f in filenames],
- resume=resume, verbose=verbose)
-
- # load data
- rsq, grad = np.loadtxt(data[0], delimiter=',', skiprows=1).T
-
- return Bunch(rsquared=rsq, gradient=grad)
-
-
-def fetch_schaefer2018(version='fsaverage', data_dir=None, url=None,
- resume=True, verbose=1):
- """
- Download FreeSurfer .annot files for Schaefer et al., 2018 parcellation.
-
- Parameters
- ----------
- version : {'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k'}
- Specifies which surface annotation files should be matched to. Default:
- 'fsaverage'
- data_dir : str, optional
- Path to use as data directory. If not specified, will check for
- environmental variable 'NNT_DATA'; if that is not set, will use
- `~/nnt-data` instead. Default: None
- url : str, optional
- URL from which to download data. Default: None
- resume : bool, optional
- Whether to attempt to resume partial download, if possible. Default:
- True
- verbose : int, optional
- Modifies verbosity of download, where higher numbers mean more updates.
- Default: 1
-
- Returns
- -------
- filenames : :class:`sklearn.utils.Bunch`
- Dictionary-like object with keys of format '{}Parcels{}Networks' where
- corresponding values are the left/right hemisphere annotation files
-
- References
- ----------
- Schaefer, A., Kong, R., Gordon, E. M., Laumann, T. O., Zuo, X. N., Holmes,
- A. J., ... & Yeo, B. T. (2017). Local-global parcellation of the human
- cerebral cortex from intrinsic functional connectivity MRI. Cerebral
- Cortex, 28(9), 3095-3114.
-
- Notes
- -----
- License: https://github.com/ThomasYeoLab/CBIG/blob/master/LICENSE.md
- """
- versions = ['fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k']
- if version not in versions:
- raise ValueError('The version of Schaefer et al., 2018 parcellation '
- 'requested "{}" does not exist. Must be one of {}'
- .format(version, versions))
-
- dataset_name = 'atl-schaefer2018'
- keys = [
- '{}Parcels{}Networks'.format(p, n)
- for p in range(100, 1001, 100) for n in [7, 17]
- ]
-
- data_dir = _get_data_dir(data_dir=data_dir)
- info = _get_dataset_info(dataset_name)[version]
- if url is None:
- url = info['url']
-
- opts = {
- 'uncompress': True,
- 'md5sum': info['md5'],
- 'move': '{}.tar.gz'.format(dataset_name)
- }
-
- if version == 'fslr32k':
- hemispheres, suffix = ['LR'], 'dlabel.nii'
- else:
- hemispheres, suffix = ['L', 'R'], 'annot'
- filenames = [
- 'atl-Schaefer2018_space-{}_hemi-{}_desc-{}_deterministic.{}'
- .format(version, hemi, desc, suffix)
- for desc in keys for hemi in hemispheres
- ]
-
- files = [(op.join(dataset_name, version, f), url, opts)
- for f in filenames]
- data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose)
-
- if suffix == 'annot':
- data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)]
-
- return Bunch(**dict(zip(keys, data)))
-
-
-def fetch_hcp_standards(data_dir=None, url=None, resume=True, verbose=1):
- """
- Fetch HCP standard mesh atlases for converting between FreeSurfer and HCP.
-
- Parameters
- ----------
- data_dir : str, optional
- Path to use as data directory. If not specified, will check for
- environmental variable 'NNT_DATA'; if that is not set, will use
- `~/nnt-data` instead. Default: None
- url : str, optional
- URL from which to download data. Default: None
- resume : bool, optional
- Whether to attempt to resume partial download, if possible. Default:
- True
- verbose : int, optional
- Modifies verbosity of download, where higher numbers mean more updates.
- Default: 1
-
- Returns
- -------
- standards : str
- Filepath to standard_mesh_atlases directory
- """
- if url is None:
- url = 'https://web.archive.org/web/20220121035833/' + \
- 'http://brainvis.wustl.edu/workbench/standard_mesh_atlases.zip'
- dataset_name = 'standard_mesh_atlases'
- data_dir = _get_data_dir(data_dir=data_dir)
- opts = {
- 'uncompress': True,
- 'move': '{}.zip'.format(dataset_name)
- }
- filenames = [
- 'L.sphere.32k_fs_LR.surf.gii', 'R.sphere.32k_fs_LR.surf.gii'
- ]
- files = [(op.join(dataset_name, f), url, opts) for f in filenames]
- _fetch_files(data_dir, files=files, resume=resume, verbose=verbose)
-
- return op.join(data_dir, dataset_name)
-
-
-def fetch_mmpall(version='fslr32k', data_dir=None, url=None, resume=True,
- verbose=1):
- """
- Download .label.gii files for Glasser et al., 2016 MMPAll atlas.
-
- Parameters
- ----------
- version : {'fslr32k'}
- Specifies which surface annotation files should be matched to. Default:
- 'fslr32k'
- data_dir : str, optional
- Path to use as data directory. If not specified, will check for
- environmental variable 'NNT_DATA'; if that is not set, will use
- `~/nnt-data` instead. Default: None
- url : str, optional
- URL from which to download data. Default: None
- resume : bool, optional
- Whether to attempt to resume partial download, if possible. Default:
- True
- verbose : int, optional
- Modifies verbosity of download, where higher numbers mean more updates.
- Default: 1
-
- Returns
- -------
- filenames : :class:`sklearn.utils.Bunch`
- Namedtuple with fields ('lh', 'rh') corresponding to filepaths to
- left/right hemisphere parcellation files
-
- References
- ----------
- Glasser, M. F., Coalson, T. S., Robinson, E. C., Hacker, C. D., Harwell,
- J., Yacoub, E., ... & Van Essen, D. C. (2016). A multi-modal parcellation
- of human cerebral cortex. Nature, 536(7615), 171-178.
-
- Notes
- -----
- License: https://www.humanconnectome.org/study/hcp-young-adult/document/
- wu-minn-hcp-consortium-open-access-data-use-terms
- """
- versions = ['fslr32k']
- if version not in versions:
- raise ValueError('The version of Glasser et al., 2016 parcellation '
- 'requested "{}" does not exist. Must be one of {}'
- .format(version, versions))
-
- dataset_name = 'atl-mmpall'
-
- data_dir = _get_data_dir(data_dir=data_dir)
- info = _get_dataset_info(dataset_name)[version]
- if url is None:
- url = info['url']
- opts = {
- 'uncompress': True,
- 'md5sum': info['md5'],
- 'move': '{}.tar.gz'.format(dataset_name)
- }
-
- hemispheres = ['L', 'R']
- filenames = [
- 'atl-MMPAll_space-{}_hemi-{}_deterministic.label.gii'
- .format(version, hemi) for hemi in hemispheres
- ]
-
- files = [(op.join(dataset_name, version, f), url, opts) for f in filenames]
- data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose)
-
- return SURFACE(*data)
-
-
-def fetch_voneconomo(data_dir=None, url=None, resume=True, verbose=1):
- """
- Fetch von-Economo Koskinas probabilistic FreeSurfer atlas.
-
- Parameters
- ----------
- data_dir : str, optional
- Path to use as data directory. If not specified, will check for
- environmental variable 'NNT_DATA'; if that is not set, will use
- `~/nnt-data` instead. Default: None
- url : str, optional
- URL from which to download data. Default: None
- resume : bool, optional
- Whether to attempt to resume partial download, if possible. Default:
- True
- verbose : int, optional
- Modifies verbosity of download, where higher numbers mean more updates.
- Default: 1
-
- Returns
- -------
- filenames : :class:`sklearn.utils.Bunch`
- Dictionary-like object with keys ['gcs', 'ctab', 'info']
-
- References
- ----------
- Scholtens, L. H., de Reus, M. A., de Lange, S. C., Schmidt, R., & van den
- Heuvel, M. P. (2018). An MRI von Economo–Koskinas atlas. NeuroImage, 170,
- 249-256.
-
- Notes
- -----
- License: CC-BY-NC-SA 4.0
- """
- dataset_name = 'atl-voneconomo_koskinas'
- keys = ['gcs', 'ctab', 'info']
-
- data_dir = _get_data_dir(data_dir=data_dir)
- info = _get_dataset_info(dataset_name)
- if url is None:
- url = info['url']
- opts = {
- 'uncompress': True,
- 'md5sum': info['md5'],
- 'move': '{}.tar.gz'.format(dataset_name)
- }
- filenames = [
- 'atl-vonEconomoKoskinas_hemi-{}_probabilistic.{}'.format(hemi, suff)
- for hemi in ['L', 'R'] for suff in ['gcs', 'ctab']
- ] + ['atl-vonEconomoKoskinas_info.csv']
- files = [(op.join(dataset_name, f), url, opts) for f in filenames]
- data = _fetch_files(data_dir, files=files, resume=resume, verbose=verbose)
- data = [SURFACE(*data[:-1:2])] + [SURFACE(*data[1:-1:2])] + [data[-1]]
-
- return Bunch(**dict(zip(keys, data)))
-
-
-def fetch_civet(density='41k', version='v1', data_dir=None, url=None,
- resume=True, verbose=1):
- """
- Fetch CIVET surface files.
-
- Parameters
- ----------
- density : {'41k', '164k'}, optional
- Which density of the CIVET-space geometry files to fetch. The
- high-resolution '164k' surface only exists for version 'v2'
- version : {'v1, 'v2'}, optional
- Which version of the CIVET surfaces to use. Default: 'v2'
- data_dir : str, optional
- Path to use as data directory. If not specified, will check for
- environmental variable 'NNT_DATA'; if that is not set, will use
- `~/nnt-data` instead. Default: None
- url : str, optional
- URL from which to download data. Default: None
- resume : bool, optional
- Whether to attempt to resume partial download, if possible. Default:
- True
- verbose : int, optional
- Modifies verbosity of download, where higher numbers mean more updates.
- Default: 1
-
- Returns
- -------
- filenames : :class:`sklearn.utils.Bunch`
- Dictionary-like object with keys ['mid', 'white'] containing geometry
- files for CIVET surface. Note for version 'v1' the 'mid' and 'white'
- files are identical.
-
- References
- ----------
- Y. Ad-Dab’bagh, O. Lyttelton, J.-S. Muehlboeck, C. Lepage, D. Einarson, K.
- Mok, O. Ivanov, R. Vincent, J. Lerch, E. Fombonne, A. C. Evans, The CIVET
- image-processing environment: A fully automated comprehensive pipeline for
- anatomical neuroimaging research. Proceedings of the 12th Annual Meeting of
- the Organization for Human Brain Mapping (2006).
-
- Notes
- -----
- License: https://github.com/aces/CIVET_Full_Project/blob/master/LICENSE
- """
- densities = ['41k', '164k']
- if density not in densities:
- raise ValueError('The density of CIVET requested "{}" does not exist. '
- 'Must be one of {}'.format(density, densities))
- versions = ['v1', 'v2']
- if version not in versions:
- raise ValueError('The version of CIVET requested "{}" does not exist. '
- 'Must be one of {}'.format(version, versions))
-
- if version == 'v1' and density == '164k':
- raise ValueError('The "164k" density CIVET surface only exists for '
- 'version "v2"')
-
- dataset_name = 'tpl-civet'
- keys = ['mid', 'white']
-
- data_dir = _get_data_dir(data_dir=data_dir)
- info = _get_dataset_info(dataset_name)[version]['civet{}'.format(density)]
- if url is None:
- url = info['url']
-
- opts = {
- 'uncompress': True,
- 'md5sum': info['md5'],
- 'move': '{}.tar.gz'.format(dataset_name)
- }
- filenames = [
- op.join(dataset_name, version, 'civet{}'.format(density),
- 'tpl-civet_space-ICBM152_hemi-{}_den-{}_{}.obj'
- .format(hemi, density, surf))
- for surf in keys for hemi in ['L', 'R']
- ]
-
- data = _fetch_files(data_dir, resume=resume, verbose=verbose,
- files=[(f, url, opts) for f in filenames])
-
- data = [SURFACE(*data[i:i + 2]) for i in range(0, len(keys) * 2, 2)]
-
- return Bunch(**dict(zip(keys, data)))
diff --git a/netneurotools/datasets/generators.py b/netneurotools/datasets/generators.py
deleted file mode 100644
index 42c4f56..0000000
--- a/netneurotools/datasets/generators.py
+++ /dev/null
@@ -1,93 +0,0 @@
-
-# -*- coding: utf-8 -*-
-"""Functions for making "random" datasets."""
-
-import numpy as np
-from sklearn.utils.validation import check_random_state
-
-
-def make_correlated_xy(corr=0.85, size=10000, seed=None, tol=0.001):
- """
- Generate random vectors that are correlated to approximately `corr`.
-
- Parameters
- ----------
- corr : [-1, 1] float or (N, N) numpy.ndarray, optional
- The approximate correlation desired. If a float is provided, two
- vectors with the specified level of correlation will be generated. If
- an array is provided, it is assumed to be a symmetrical correlation
- matrix and ``len(corr)`` vectors with the specified levels of
- correlation will be generated. Default: 0.85
- size : int or tuple, optional
- Desired size of the generated vectors. Default: 1000
- seed : {int, np.random.RandomState instance, None}, optional
- Seed for random number generation. Default: None
- tol : [0, 1] float, optional
- Tolerance of correlation between generated `vectors` and specified
- `corr`. Default: 0.001
-
- Returns
- -------
- vectors : numpy.ndarray
- Random vectors of size `size` with correlation specified by `corr`
-
- Examples
- --------
- >>> from netneurotools import datasets
-
- By default two vectors are generated with specified correlation
-
- >>> x, y = datasets.make_correlated_xy()
- >>> np.corrcoef(x, y) # doctest: +SKIP
- array([[1. , 0.85083661],
- [0.85083661, 1. ]])
- >>> x, y = datasets.make_correlated_xy(corr=0.2)
- >>> np.corrcoef(x, y) # doctest: +SKIP
- array([[1. , 0.20069953],
- [0.20069953, 1. ]])
-
- You can also provide correlation matrices to generate more than two vectors
- if desired. Note that this makes it more difficult to ensure the actual
- correlations are close to the desired values:
-
- >>> corr = [[1, 0.5, 0.3], [0.5, 1, 0], [0.3, 0, 1]]
- >>> out = datasets.make_correlated_xy(corr=corr)
- >>> out.shape
- (3, 10000)
- >>> np.corrcoef(out) # doctest: +SKIP
- array([[1. , 0.50965273, 0.30235686],
- [0.50965273, 1. , 0.01089107],
- [0.30235686, 0.01089107, 1. ]])
- """
- rs = check_random_state(seed)
-
- # no correlations outside [-1, 1] bounds
- if np.any(np.abs(corr) > 1):
- raise ValueError('Provided `corr` must (all) be in range [-1, 1].')
-
- # if we're given a single number, assume two vectors are desired
- if isinstance(corr, (int, float)):
- covs = np.ones((2, 2)) * 0.111
- covs[(0, 1), (1, 0)] *= corr
- # if we're given a correlation matrix, assume `N` vectors are desired
- elif isinstance(corr, (list, np.ndarray)):
- corr = np.asarray(corr)
- if corr.ndim != 2 or len(corr) != len(corr.T):
- raise ValueError('If `corr` is a list or array, must be a 2D '
- 'square array, not {}'.format(corr.shape))
- if np.any(np.diag(corr) != 1):
- raise ValueError('Diagonal of `corr` must be 1.')
- covs = corr * 0.111
- means = [0] * len(covs)
-
- # generate the variables
- count = 0
- while count < 500:
- vectors = rs.multivariate_normal(mean=means, cov=covs, size=size).T
- flat = vectors.reshape(len(vectors), -1)
- # if diff between actual and desired correlations less than tol, break
- if np.all(np.abs(np.corrcoef(flat) - (covs / 0.111)) < tol):
- break
- count += 1
-
- return vectors
diff --git a/netneurotools/datasets/netneurotools.bib b/netneurotools/datasets/netneurotools.bib
new file mode 100644
index 0000000..2e725c4
--- /dev/null
+++ b/netneurotools/datasets/netneurotools.bib
@@ -0,0 +1,264 @@
+@article{cammoun2012mapping,
+ title={Mapping the human connectome at multiple scales with diffusion spectrum MRI},
+ author={Cammoun, Leila and Gigandet, Xavier and Meskaldji, Djalel and Thiran, Jean Philippe and Sporns, Olaf and Do, Kim Q and Maeder, Philippe and Meuli, Reto and Hagmann, Patric},
+ journal={Journal of neuroscience methods},
+ volume={203},
+ number={2},
+ pages={386--397},
+ year={2012},
+ publisher={Elsevier}
+}
+
+@article{pauli2018high,
+ title={A high-resolution probabilistic in vivo atlas of human subcortical brain nuclei},
+ author={Pauli, Wolfgang M and Nili, Amanda N and Tyszka, J Michael},
+ journal={Scientific data},
+ volume={5},
+ number={1},
+ pages={1--13},
+ year={2018},
+ publisher={Nature Publishing Group}
+}
+
+@article{van2012human,
+ title={The Human Connectome Project: a data acquisition perspective},
+ author={Van Essen, David C and Ugurbil, Kamil and Auerbach, Edward and Barch, Deanna and Behrens, Timothy EJ and Bucholz, Richard and Chang, Acer and Chen, Liyong and Corbetta, Maurizio and Curtiss, Sandra W and others},
+ journal={Neuroimage},
+ volume={62},
+ number={4},
+ pages={2222--2231},
+ year={2012},
+ publisher={Elsevier}
+}
+
+@article{van2012parcellations,
+ title={Parcellations and hemispheric asymmetries of human cerebral cortex analyzed on surface-based atlases},
+ author={Van Essen, David C and Glasser, Matthew F and Dierker, Donna L and Harwell, John and Coalson, Timothy},
+ journal={Cerebral cortex},
+ volume={22},
+ number={10},
+ pages={2241--2262},
+ year={2012},
+ publisher={Oxford University Press}
+}
+
+@article{glasser2013minimal,
+ title={The minimal preprocessing pipelines for the Human Connectome Project},
+ author={Glasser, Matthew F and Sotiropoulos, Stamatios N and Wilson, J Anthony and Coalson, Timothy S and Fischl, Bruce and Andersson, Jesper L and Xu, Junqian and Jbabdi, Saad and Webster, Matthew and Polimeni, Jonathan R and others},
+ journal={Neuroimage},
+ volume={80},
+ pages={105--124},
+ year={2013},
+ publisher={Elsevier}
+}
+
+@article{donahue2016using,
+ title={Using diffusion tractography to predict cortical connection strength and distance: a quantitative comparison with tracers in the monkey},
+ author={Donahue, Chad J and Sotiropoulos, Stamatios N and Jbabdi, Saad and Hernandez-Fernandez, Moises and Behrens, Timothy E and Dyrby, Tim B and Coalson, Timothy and Kennedy, Henry and Knoblauch, Kenneth and Van Essen, David C and others},
+ journal={Journal of Neuroscience},
+ volume={36},
+ number={25},
+ pages={6758--6770},
+ year={2016},
+ publisher={Soc Neuroscience}
+}
+
+@article{dale1999cortical,
+ title={Cortical surface-based analysis: I. Segmentation and surface reconstruction},
+ author={Dale, Anders M and Fischl, Bruce and Sereno, Martin I},
+ journal={Neuroimage},
+ volume={9},
+ number={2},
+ pages={179--194},
+ year={1999},
+ publisher={Elsevier}
+}
+
+@article{fischl1999cortical,
+ title={Cortical surface-based analysis: II: inflation, flattening, and a surface-based coordinate system},
+ author={Fischl, Bruce and Sereno, Martin I and Dale, Anders M},
+ journal={Neuroimage},
+ volume={9},
+ number={2},
+ pages={195--207},
+ year={1999},
+ publisher={Elsevier}
+}
+
+@article{fischl1999high,
+ title={High-resolution intersubject averaging and a coordinate system for the cortical surface},
+ author={Fischl, Bruce and Sereno, Martin I and Tootell, Roger BH and Dale, Anders M},
+ journal={Human brain mapping},
+ volume={8},
+ number={4},
+ pages={272--284},
+ year={1999},
+ publisher={Wiley Online Library}
+}
+
+@article{lyttelton2007unbiased,
+ title={An unbiased iterative group registration template for cortical surface analysis},
+ author={Lyttelton, Oliver and Boucher, Maxime and Robbins, Steven and Evans, Alan},
+ journal={Neuroimage},
+ volume={34},
+ number={4},
+ pages={1535--1544},
+ year={2007},
+ publisher={Elsevier}
+}
+
+@article{fonov2009unbiased,
+ title={Unbiased nonlinear average age-appropriate brain templates from birth to adulthood},
+ author={Fonov, Vladimir S and Evans, Alan C and McKinstry, Robert C and Almli, C Robert and Collins, DL},
+ journal={NeuroImage},
+ volume={47},
+ pages={S102},
+ year={2009},
+ publisher={Elsevier}
+}
+
+@article{ad2006civet,
+ title={The CIVET image-processing environment: A fully automated comprehensive pipeline for anatomical neuroimaging research. Proceedings of the 12th annual meeting of the organization for human brain mapping},
+ author={Ad-Dab'bagh, Y and Lyttelton, O and Muehlboeck, J and Lepage, C and Einarson, D and Mok, K and Ivanov, O and Vincent, R and Lerch, J and Fombonne, E},
+ journal={Florence, Italy},
+ pages={2266},
+ year={2006}
+}
+
+@article{varshney2011structural,
+ title={Structural properties of the Caenorhabditis elegans neuronal network},
+ author={Varshney, Lav R and Chen, Beth L and Paniagua, Eric and Hall, David H and Chklovskii, Dmitri B},
+ journal={PLoS computational biology},
+ volume={7},
+ number={2},
+ pages={e1001066},
+ year={2011},
+ publisher={Public Library of Science San Francisco, USA}
+}
+
+@article{chiang2011three,
+ title={Three-dimensional reconstruction of brain-wide wiring networks in Drosophila at single-cell resolution},
+ author={Chiang, Ann-Shyn and Lin, Chih-Yung and Chuang, Chao-Chun and Chang, Hsiu-Ming and Hsieh, Chang-Huain and Yeh, Chang-Wei and Shih, Chi-Tin and Wu, Jian-Jheng and Wang, Guo-Tzau and Chen, Yung-Chang and others},
+ journal={Current biology},
+ volume={21},
+ number={1},
+ pages={1--11},
+ year={2011},
+ publisher={Elsevier}
+}
+
+@article{griffa2019lausanne,
+ title={Structural and functional connectome from 70 young healthy adults [data set]},
+ author={Griffa, Alessandra and Alem{\'a}n-G{\'o}mez, Yasser and Hagmann, Patric},
+ journal={Zenodo},
+ year={2019}
+}
+
+@article{markov2013role,
+ title={The role of long-range connections on the specificity of the macaque interareal cortical network},
+ author={Markov, Nikola T and Ercsey-Ravasz, Maria and Lamy, Camille and Ribeiro Gomes, Ana Rita and Magrou, Lo{\"\i}c and Misery, Pierre and Giroud, Pascale and Barone, Pascal and Dehay, Colette and Toroczkai, Zolt{\'a}n and others},
+ journal={Proceedings of the National Academy of Sciences},
+ volume={110},
+ number={13},
+ pages={5187--5192},
+ year={2013},
+ publisher={National Acad Sciences}
+}
+
+@article{modha2010network,
+ title={Network architecture of the long-distance pathways in the macaque brain},
+ author={Modha, Dharmendra S and Singh, Raghavendra},
+ journal={Proceedings of the National Academy of Sciences},
+ volume={107},
+ number={30},
+ pages={13485--13490},
+ year={2010},
+ publisher={National Acad Sciences}
+}
+
+@article{rubinov2015wiring,
+ title={Wiring cost and topological participation of the mouse brain connectome},
+ author={Rubinov, Mikail and Ypma, Rolf JF and Watson, Charles and Bullmore, Edward T},
+ journal={Proceedings of the National Academy of Sciences},
+ volume={112},
+ number={32},
+ pages={10032--10037},
+ year={2015},
+ publisher={National Acad Sciences}
+}
+
+@article{bota2015architecture,
+ title={Architecture of the cerebral cortical association connectome underlying cognition},
+ author={Bota, Mihail and Sporns, Olaf and Swanson, Larry W},
+ journal={Proceedings of the National Academy of Sciences},
+ volume={112},
+ number={16},
+ pages={E2093--E2101},
+ year={2015},
+ publisher={National Acad Sciences}
+}
+
+@article{vazquez2019gradients,
+ title={Gradients of structure--function tethering across neocortex},
+ author={V{\'a}zquez-Rodr{\'\i}guez, Bertha and Su{\'a}rez, Laura E and Markello, Ross D and Shafiei, Golia and Paquola, Casey and Hagmann, Patric and Van Den Heuvel, Martijn P and Bernhardt, Boris C and Spreng, R Nathan and Misic, Bratislav},
+ journal={Proceedings of the National Academy of Sciences},
+ volume={116},
+ number={42},
+ pages={21219--21227},
+ year={2019},
+ publisher={National Acad Sciences}
+}
+
+@article{hansen2023integrating,
+ title={Integrating multimodal and multiscale connectivity blueprints of the human cerebral cortex in health and disease},
+ author={Hansen, Justine Y and Shafiei, Golia and Voigt, Katharina and Liang, Emma X and Cox, Sylvia ML and Leyton, Marco and Jamadar, Sharna D and Misic, Bratislav},
+ journal={PLoS biology},
+ volume={21},
+ number={9},
+ pages={e3002314},
+ year={2023},
+ publisher={Public Library of Science San Francisco, CA USA}
+}
+
+@article{schaefer2018local,
+ title={Local-global parcellation of the human cerebral cortex from intrinsic functional connectivity MRI},
+ author={Schaefer, Alexander and Kong, Ru and Gordon, Evan M and Laumann, Timothy O and Zuo, Xi-Nian and Holmes, Avram J and Eickhoff, Simon B and Yeo, BT Thomas},
+ journal={Cerebral cortex},
+ volume={28},
+ number={9},
+ pages={3095--3114},
+ year={2018},
+ publisher={Oxford University Press}
+}
+
+@article{glasser2016multi,
+ title={A multi-modal parcellation of human cerebral cortex},
+ author={Glasser, Matthew F and Coalson, Timothy S and Robinson, Emma C and Hacker, Carl D and Harwell, John and Yacoub, Essa and Ugurbil, Kamil and Andersson, Jesper and Beckmann, Christian F and Jenkinson, Mark and others},
+ journal={Nature},
+ volume={536},
+ number={7615},
+ pages={171--178},
+ year={2016},
+ publisher={Nature Publishing Group}
+}
+
+@article{scholtens2018mri,
+ title={An mri von economo--koskinas atlas},
+ author={Scholtens, Lianne H and de Reus, Marcel A and de Lange, Siemon C and Schmidt, Ruben and van den Heuvel, Martijn P},
+ journal={NeuroImage},
+ volume={170},
+ pages={249--256},
+ year={2018},
+ publisher={Elsevier}
+}
+
+@article{markello2022neuromaps,
+ title={Neuromaps: structural and functional interpretation of brain maps},
+ author={Markello, Ross D and Hansen, Justine Y and Liu, Zhen-Qi and Bazinet, Vincent and Shafiei, Golia and Su{\'a}rez, Laura E and Blostein, Nadia and Seidlitz, Jakob and Baillet, Sylvain and Satterthwaite, Theodore D and others},
+ journal={Nature Methods},
+ volume={19},
+ number={11},
+ pages={1472--1479},
+ year={2022},
+ publisher={Nature Publishing Group US New York}
+}
diff --git a/netneurotools/datasets/references.json b/netneurotools/datasets/references.json
new file mode 100644
index 0000000..fe51a99
--- /dev/null
+++ b/netneurotools/datasets/references.json
@@ -0,0 +1,324 @@
+{
+ "atl-cammoun2012": {
+ "primary": [
+ {
+ "citation": "Leila Cammoun, Xavier Gigandet, Djalel Meskaldji, Jean Philippe Thiran, Olaf Sporns, Kim Q Do, Philippe Maeder, Reto Meuli, and Patric Hagmann. Mapping the human connectome at multiple scales with diffusion spectrum mri. Journal of neuroscience methods, 203(2):386\u2013397, 2012.",
+ "bibkey": "cammoun2012mapping"
+ }
+ ]
+ },
+ "atl-pauli2018": {
+ "primary": [
+ {
+ "citation": "Wolfgang M Pauli, Amanda N Nili, and J Michael Tyszka. A high-resolution probabilistic in vivo atlas of human subcortical brain nuclei. Scientific data, 5(1):1\u201313, 2018.",
+ "bibkey": "pauli2018high"
+ }
+ ]
+ },
+ "tpl-conte69": {
+ "primary": [
+ {
+ "citation": "David C Van Essen, Kamil Ugurbil, Edward Auerbach, Deanna Barch, Timothy EJ Behrens, Richard Bucholz, Acer Chang, Liyong Chen, Maurizio Corbetta, Sandra W Curtiss, and others. The human connectome project: a data acquisition perspective. Neuroimage, 62(4):2222\u20132231, 2012.",
+ "bibkey": "van2012human"
+ },
+ {
+ "citation": "David C Van Essen, Matthew F Glasser, Donna L Dierker, John Harwell, and Timothy Coalson. Parcellations and hemispheric asymmetries of human cerebral cortex analyzed on surface-based atlases. Cerebral cortex, 22(10):2241\u20132262, 2012.",
+ "bibkey": "van2012parcellations"
+ }
+ ]
+ },
+ "tpl-yerkes19": {
+ "primary": [
+ {
+ "citation": "Chad J Donahue, Stamatios N Sotiropoulos, Saad Jbabdi, Moises Hernandez-Fernandez, Timothy E Behrens, Tim B Dyrby, Timothy Coalson, Henry Kennedy, Kenneth Knoblauch, David C Van Essen, and others. Using diffusion tractography to predict cortical connection strength and distance: a quantitative comparison with tracers in the monkey. Journal of Neuroscience, 36(25):6758\u20136770, 2016.",
+ "bibkey": "donahue2016using"
+ }
+ ]
+ },
+ "tpl-fsaverage": {
+ "primary": [
+ {
+ "citation": "Anders M Dale, Bruce Fischl, and Martin I Sereno. Cortical surface-based analysis: i. segmentation and surface reconstruction. Neuroimage, 9(2):179\u2013194, 1999.",
+ "bibkey": "dale1999cortical"
+ },
+ {
+ "citation": "Bruce Fischl, Martin I Sereno, and Anders M Dale. Cortical surface-based analysis: ii: inflation, flattening, and a surface-based coordinate system. Neuroimage, 9(2):195\u2013207, 1999.",
+ "bibkey": "fischl1999cortical"
+ },
+ {
+ "citation": "Bruce Fischl, Martin I Sereno, Roger BH Tootell, and Anders M Dale. High-resolution intersubject averaging and a coordinate system for the cortical surface. Human brain mapping, 8(4):272\u2013284, 1999.",
+ "bibkey": "fischl1999high"
+ }
+ ]
+ },
+ "tpl-fsaverage_curated": {
+ "primary": [
+ {
+ "citation": "Anders M Dale, Bruce Fischl, and Martin I Sereno. Cortical surface-based analysis: i. segmentation and surface reconstruction. Neuroimage, 9(2):179\u2013194, 1999.",
+ "bibkey": "dale1999cortical"
+ },
+ {
+ "citation": "Bruce Fischl, Martin I Sereno, and Anders M Dale. Cortical surface-based analysis: ii: inflation, flattening, and a surface-based coordinate system. Neuroimage, 9(2):195\u2013207, 1999.",
+ "bibkey": "fischl1999cortical"
+ },
+ {
+ "citation": "Bruce Fischl, Martin I Sereno, Roger BH Tootell, and Anders M Dale. High-resolution intersubject averaging and a coordinate system for the cortical surface. Human brain mapping, 8(4):272\u2013284, 1999.",
+ "bibkey": "fischl1999high"
+ },
+ {
+ "citation": "Ross D Markello, Justine Y Hansen, Zhen-Qi Liu, Vincent Bazinet, Golia Shafiei, Laura E Su\u00e1rez, Nadia Blostein, Jakob Seidlitz, Sylvain Baillet, Theodore D Satterthwaite, and others. Neuromaps: structural and functional interpretation of brain maps. Nature Methods, 19(11):1472\u20131479, 2022.",
+ "bibkey": "markello2022neuromaps"
+ }
+ ]
+ },
+ "tpl-hcp_standards": {
+ "primary": [
+ {
+ "citation": "David C Van Essen, Kamil Ugurbil, Edward Auerbach, Deanna Barch, Timothy EJ Behrens, Richard Bucholz, Acer Chang, Liyong Chen, Maurizio Corbetta, Sandra W Curtiss, and others. The human connectome project: a data acquisition perspective. Neuroimage, 62(4):2222\u20132231, 2012.",
+ "bibkey": "van2012human"
+ },
+ {
+ "citation": "Matthew F Glasser, Stamatios N Sotiropoulos, J Anthony Wilson, Timothy S Coalson, Bruce Fischl, Jesper L Andersson, Junqian Xu, Saad Jbabdi, Matthew Webster, Jonathan R Polimeni, and others. The minimal preprocessing pipelines for the human connectome project. Neuroimage, 80:105\u2013124, 2013.",
+ "bibkey": "glasser2013minimal"
+ }
+ ]
+ },
+ "tpl-fslr_curated": {
+ "primary": [
+ {
+ "citation": "David C Van Essen, Kamil Ugurbil, Edward Auerbach, Deanna Barch, Timothy EJ Behrens, Richard Bucholz, Acer Chang, Liyong Chen, Maurizio Corbetta, Sandra W Curtiss, and others. The human connectome project: a data acquisition perspective. Neuroimage, 62(4):2222\u20132231, 2012.",
+ "bibkey": "van2012human"
+ },
+ {
+ "citation": "Matthew F Glasser, Stamatios N Sotiropoulos, J Anthony Wilson, Timothy S Coalson, Bruce Fischl, Jesper L Andersson, Junqian Xu, Saad Jbabdi, Matthew Webster, Jonathan R Polimeni, and others. The minimal preprocessing pipelines for the human connectome project. Neuroimage, 80:105\u2013124, 2013.",
+ "bibkey": "glasser2013minimal"
+ },
+ {
+ "citation": "Ross D Markello, Justine Y Hansen, Zhen-Qi Liu, Vincent Bazinet, Golia Shafiei, Laura E Su\u00e1rez, Nadia Blostein, Jakob Seidlitz, Sylvain Baillet, Theodore D Satterthwaite, and others. Neuromaps: structural and functional interpretation of brain maps. Nature Methods, 19(11):1472\u20131479, 2022.",
+ "bibkey": "markello2022neuromaps"
+ }
+ ]
+ },
+ "tpl-civet": {
+ "primary": [
+ {
+ "citation": "Oliver Lyttelton, Maxime Boucher, Steven Robbins, and Alan Evans. An unbiased iterative group registration template for cortical surface analysis. Neuroimage, 34(4):1535\u20131544, 2007.",
+ "bibkey": "lyttelton2007unbiased"
+ },
+ {
+ "citation": "Vladimir S Fonov, Alan C Evans, Robert C McKinstry, C Robert Almli, and DL Collins. Unbiased nonlinear average age-appropriate brain templates from birth to adulthood. NeuroImage, 47:S102, 2009.",
+ "bibkey": "fonov2009unbiased"
+ },
+ {
+ "citation": "Y Ad-Dab'bagh, O Lyttelton, J Muehlboeck, C Lepage, D Einarson, K Mok, O Ivanov, R Vincent, J Lerch, and E Fombonne. The civet image-processing environment: a fully automated comprehensive pipeline for anatomical neuroimaging research. proceedings of the 12th annual meeting of the organization for human brain mapping. Florence, Italy, pages 2266, 2006.",
+ "bibkey": "ad2006civet"
+ }
+ ]
+ },
+ "tpl-civet_curated": {
+ "primary": [
+ {
+ "citation": "Oliver Lyttelton, Maxime Boucher, Steven Robbins, and Alan Evans. An unbiased iterative group registration template for cortical surface analysis. Neuroimage, 34(4):1535\u20131544, 2007.",
+ "bibkey": "lyttelton2007unbiased"
+ },
+ {
+ "citation": "Vladimir S Fonov, Alan C Evans, Robert C McKinstry, C Robert Almli, and DL Collins. Unbiased nonlinear average age-appropriate brain templates from birth to adulthood. NeuroImage, 47:S102, 2009.",
+ "bibkey": "fonov2009unbiased"
+ },
+ {
+ "citation": "Y Ad-Dab'bagh, O Lyttelton, J Muehlboeck, C Lepage, D Einarson, K Mok, O Ivanov, R Vincent, J Lerch, and E Fombonne. The civet image-processing environment: a fully automated comprehensive pipeline for anatomical neuroimaging research. proceedings of the 12th annual meeting of the organization for human brain mapping. Florence, Italy, pages 2266, 2006.",
+ "bibkey": "ad2006civet"
+ },
+ {
+ "citation": "Ross D Markello, Justine Y Hansen, Zhen-Qi Liu, Vincent Bazinet, Golia Shafiei, Laura E Su\u00e1rez, Nadia Blostein, Jakob Seidlitz, Sylvain Baillet, Theodore D Satterthwaite, and others. Neuromaps: structural and functional interpretation of brain maps. Nature Methods, 19(11):1472\u20131479, 2022.",
+ "bibkey": "markello2022neuromaps"
+ }
+ ]
+ },
+ "ds-famous_gmat": {
+ "primary": [
+ {
+ "citation": "",
+ "bibkey": ""
+ }
+ ],
+ "celegans": [
+ {
+ "citation": "Lav R Varshney, Beth L Chen, Eric Paniagua, David H Hall, and Dmitri B Chklovskii. Structural properties of the caenorhabditis elegans neuronal network. PLoS computational biology, 7(2):e1001066, 2011.",
+ "bibkey": "varshney2011structural"
+ }
+ ],
+ "drosophila": [
+ {
+ "citation": "Ann-Shyn Chiang, Chih-Yung Lin, Chao-Chun Chuang, Hsiu-Ming Chang, Chang-Huain Hsieh, Chang-Wei Yeh, Chi-Tin Shih, Jian-Jheng Wu, Guo-Tzau Wang, Yung-Chang Chen, and others. Three-dimensional reconstruction of brain-wide wiring networks in drosophila at single-cell resolution. Current biology, 21(1):1\u201311, 2011.",
+ "bibkey": "chiang2011three"
+ }
+ ],
+ "human": [
+ {
+ "citation": "Alessandra Griffa, Yasser Alem\u00e1n-G\u00f3mez, and Patric Hagmann. Structural and functional connectome from 70 young healthy adults [data set]. Zenodo, 2019.",
+ "bibkey": "griffa2019lausanne"
+ }
+ ],
+ "macaque_markov": [
+ {
+ "citation": "Nikola T Markov, Maria Ercsey-Ravasz, Camille Lamy, Ana Rita Ribeiro Gomes, Lo\u00efc Magrou, Pierre Misery, Pascale Giroud, Pascal Barone, Colette Dehay, Zolt\u00e1n Toroczkai, and others. The role of long-range connections on the specificity of the macaque interareal cortical network. Proceedings of the National Academy of Sciences, 110(13):5187\u20135192, 2013.",
+ "bibkey": "markov2013role"
+ }
+ ],
+ "macaque_modha": [
+ {
+ "citation": "Dharmendra S Modha and Raghavendra Singh. Network architecture of the long-distance pathways in the macaque brain. Proceedings of the National Academy of Sciences, 107(30):13485\u201313490, 2010.",
+ "bibkey": "modha2010network"
+ }
+ ],
+ "mouse": [
+ {
+ "citation": "Mikail Rubinov, Rolf JF Ypma, Charles Watson, and Edward T Bullmore. Wiring cost and topological participation of the mouse brain connectome. Proceedings of the National Academy of Sciences, 112(32):10032\u201310037, 2015.",
+ "bibkey": "rubinov2015wiring"
+ }
+ ],
+ "rat": [
+ {
+ "citation": "Mihail Bota, Olaf Sporns, and Larry W Swanson. Architecture of the cerebral cortical association connectome underlying cognition. Proceedings of the National Academy of Sciences, 112(16):E2093\u2013E2101, 2015.",
+ "bibkey": "bota2015architecture"
+ }
+ ]
+ },
+ "ds-vazquez_rodriguez2019": {
+ "primary": [
+ {
+ "citation": "Bertha V\u00e1zquez-Rodr\u00edguez, Laura E Su\u00e1rez, Ross D Markello, Golia Shafiei, Casey Paquola, Patric Hagmann, Martijn P Van Den Heuvel, Boris C Bernhardt, R Nathan Spreng, and Bratislav Misic. Gradients of structure\u2013function tethering across neocortex. Proceedings of the National Academy of Sciences, 116(42):21219\u201321227, 2019.",
+ "bibkey": "vazquez2019gradients"
+ }
+ ]
+ },
+ "atl-schaefer2018": {
+ "primary": [
+ {
+ "citation": "Alexander Schaefer, Ru Kong, Evan M Gordon, Timothy O Laumann, Xi-Nian Zuo, Avram J Holmes, Simon B Eickhoff, and BT Thomas Yeo. Local-global parcellation of the human cerebral cortex from intrinsic functional connectivity mri. Cerebral cortex, 28(9):3095\u20133114, 2018.",
+ "bibkey": "schaefer2018local"
+ }
+ ]
+ },
+ "atl-mmpall": {
+ "primary": [
+ {
+ "citation": "Matthew F Glasser, Timothy S Coalson, Emma C Robinson, Carl D Hacker, John Harwell, Essa Yacoub, Kamil Ugurbil, Jesper Andersson, Christian F Beckmann, Mark Jenkinson, and others. A multi-modal parcellation of human cerebral cortex. Nature, 536(7615):171\u2013178, 2016.",
+ "bibkey": "glasser2016multi"
+ }
+ ]
+ },
+ "atl-voneconomo_koskinas": {
+ "primary": [
+ {
+ "citation": "Lianne H Scholtens, Marcel A de Reus, Siemon C de Lange, Ruben Schmidt, and Martijn P van den Heuvel. An mri von economo\u2013koskinas atlas. NeuroImage, 170:249\u2013256, 2018.",
+ "bibkey": "scholtens2018mri"
+ }
+ ]
+ },
+ "ds-hansen_manynetworks": {
+ "primary": [
+ {
+ "citation": "Justine Y Hansen, Golia Shafiei, Katharina Voigt, Emma X Liang, Sylvia ML Cox, Marco Leyton, Sharna D Jamadar, and Bratislav Misic. Integrating multimodal and multiscale connectivity blueprints of the human cerebral cortex in health and disease. PLoS biology, 21(9):e3002314, 2023.",
+ "bibkey": "hansen2023integrating"
+ }
+ ],
+ "gene": [
+ {
+ "citation": "",
+ "bibkey": ""
+ }
+ ],
+ "receptor": [
+ {
+ "citation": "",
+ "bibkey": ""
+ }
+ ],
+ "larminar": [
+ {
+ "citation": "",
+ "bibkey": ""
+ }
+ ],
+ "metabolic": [
+ {
+ "citation": "",
+ "bibkey": ""
+ }
+ ],
+ "haemodynamic": [
+ {
+ "citation": "",
+ "bibkey": ""
+ }
+ ],
+ "electrophysiological": [
+ {
+ "citation": "",
+ "bibkey": ""
+ }
+ ],
+ "temporal": [
+ {
+ "citation": "",
+ "bibkey": ""
+ }
+ ],
+ "cognitive": [
+ {
+ "citation": "",
+ "bibkey": ""
+ }
+ ]
+ },
+ "ds-hansen_receptors": {
+ "primary": [
+ {
+ "citation": "",
+ "bibkey": ""
+ }
+ ]
+ },
+ "ds-hansen_genescognition": {
+ "primary": [
+ {
+ "citation": "",
+ "bibkey": ""
+ }
+ ]
+ },
+ "ds-hansen_brainstemfc": {
+ "primary": [
+ {
+ "citation": "",
+ "bibkey": ""
+ }
+ ]
+ },
+ "ds-shafiei_megfmrimapping": {
+ "primary": [
+ {
+ "citation": "",
+ "bibkey": ""
+ }
+ ]
+ },
+ "ds-shafiei_megdynamics": {
+ "primary": [
+ {
+ "citation": "",
+ "bibkey": ""
+ }
+ ]
+ },
+ "ds-suarez_mami": {
+ "primary": [
+ {
+ "citation": "",
+ "bibkey": ""
+ }
+ ]
+ }
+}
\ No newline at end of file
diff --git a/netneurotools/tests/__init__.py b/netneurotools/datasets/tests/__init__.py
similarity index 100%
rename from netneurotools/tests/__init__.py
rename to netneurotools/datasets/tests/__init__.py
diff --git a/netneurotools/datasets/tests/test_datasets_utils.py b/netneurotools/datasets/tests/test_datasets_utils.py
new file mode 100644
index 0000000..7ecb0e8
--- /dev/null
+++ b/netneurotools/datasets/tests/test_datasets_utils.py
@@ -0,0 +1,34 @@
+"""For testing netneurotools.datasets.datasets_utils functionality."""
+import os
+
+import pytest
+
+from netneurotools.datasets import datasets_utils as utils
+
+
+@pytest.mark.parametrize('dset, expected', [
+ ('atl-cammoun2012', ['fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k',
+ 'MNI152NLin2009aSym', 'gcs']),
+ ('tpl-conte69', ['url', 'md5']),
+ ('tpl-fsaverage', ['fsaverage' + f for f in ['', '3', '4', '5', '6']]),
+ ('atl-schaefer2018', ['fsaverage', 'fsaverage6', 'fsaverage6'])
+])
+def test_get_dataset_info(dset, expected):
+ """Test getting dataset info."""
+ info = utils._get_dataset_info(dset)
+ if isinstance(info, dict):
+ assert all(k in info.keys() for k in expected)
+ elif isinstance(info, list):
+ for f in info:
+ assert all(k in f.keys() for k in expected)
+ else:
+ assert False
+
+ with pytest.raises(KeyError):
+ utils._get_dataset_info('notvalid')
+
+
+def test_get_data_dir(tmpdir):
+ """Test getting data directory."""
+ data_dir = utils._get_data_dir(tmpdir)
+ assert os.path.isdir(data_dir)
diff --git a/netneurotools/datasets/tests/test_fetch.py b/netneurotools/datasets/tests/test_fetch.py
new file mode 100644
index 0000000..0447a0c
--- /dev/null
+++ b/netneurotools/datasets/tests/test_fetch.py
@@ -0,0 +1,268 @@
+"""For testing netneurotools.datasets.fetch_* functionality."""
+
+import os
+import pytest
+from pathlib import Path
+import numpy as np
+from netneurotools import datasets
+
+
+class TestFetchTemplate:
+ """Test fetching of template datasets."""
+
+ @pytest.mark.parametrize(
+ "version", ["fsaverage", "fsaverage3", "fsaverage4", "fsaverage5", "fsaverage6"]
+ )
+ def test_fetch_fsaverage(self, tmpdir, version):
+ """Test fetching of fsaverage surfaces."""
+ fsaverage = datasets.fetch_fsaverage(
+ version=version, data_dir=tmpdir, verbose=0
+ )
+ for k in ["orig", "white", "smoothwm", "pial", "inflated", "sphere"]:
+ assert k in fsaverage
+ assert fsaverage[k].L.exists()
+ assert fsaverage[k].R.exists()
+
+ @pytest.mark.parametrize(
+ "version", ["fsaverage", "fsaverage4", "fsaverage5", "fsaverage6"]
+ )
+ def test_fetch_fsaverage_curated(self, tmpdir, version):
+ """Test fetching of curated fsaverage surfaces."""
+ fsaverage = datasets.fetch_fsaverage_curated(
+ version=version, data_dir=tmpdir, verbose=0
+ )
+ for k in ["white", "pial", "inflated", "sphere", "medial", "sulc", "vaavg"]:
+ assert k in fsaverage
+ assert fsaverage[k].L.exists()
+ assert fsaverage[k].R.exists()
+
+ def test_fetch_hcp_standards(self, tmpdir):
+ """Test fetching of HCP standard meshes."""
+ hcp = datasets.fetch_hcp_standards(data_dir=tmpdir, verbose=0)
+ assert hcp.exists()
+
+ @pytest.mark.parametrize("version", ["fslr4k", "fslr8k", "fslr32k", "fslr164k"])
+ def test_fetch_fslr_curated(self, tmpdir, version):
+ """Test fetching of curated fsLR surfaces."""
+ fslr = datasets.fetch_fslr_curated(
+ version=version, data_dir=tmpdir, verbose=0
+ )
+ for k in [
+ "midthickness",
+ "inflated",
+ "veryinflated",
+ "sphere",
+ "medial",
+ "sulc",
+ "vaavg",
+ ]:
+ if version in ["fslr4k", "fslr8k"] and k == "veryinflated":
+ continue
+ assert k in fslr
+ assert fslr[k].L.exists()
+ assert fslr[k].R.exists()
+
+ @pytest.mark.parametrize("version", ["v1", "v2"])
+ def test_fetch_civet(self, tmpdir, version):
+ """Test fetching of CIVET templates."""
+ civet = datasets.fetch_civet(version=version, data_dir=tmpdir, verbose=0)
+ for key in ("mid", "white"):
+ assert key in civet
+ assert civet[key].L.exists()
+ assert civet[key].R.exists()
+
+ @pytest.mark.parametrize("version", ["civet41k", "civet164k"])
+ def test_fetch_civet_curated(self, tmpdir, version):
+ """Test fetching of curated CIVET templates."""
+ civet = datasets.fetch_civet_curated(
+ version=version, data_dir=tmpdir, verbose=0
+ )
+ for k in [
+ "white",
+ "midthickness",
+ "inflated",
+ "veryinflated",
+ "sphere",
+ "medial",
+ "sulc",
+ "vaavg",
+ ]:
+ assert k in civet
+ assert civet[k].L.exists()
+ assert civet[k].R.exists()
+
+ def test_fetch_conte69(self, tmpdir):
+ """Test fetching of Conte69 surfaces."""
+ conte = datasets.fetch_conte69(data_dir=tmpdir, verbose=0)
+ assert all(
+ hasattr(conte, k) for k in ["midthickness", "inflated", "vinflated", "info"]
+ )
+
+ def test_fetch_yerkes19(self, tmpdir):
+ """Test fetching of Yerkes19 surfaces."""
+ yerkes19 = datasets.fetch_yerkes19(data_dir=tmpdir, verbose=0)
+ assert all(
+ hasattr(yerkes19, k) for k in ["midthickness", "inflated", "vinflated"]
+ )
+
+
+class TestFetchAtlas:
+ """Test fetching of atlas datasets."""
+
+ @pytest.mark.parametrize(
+ "version, expected",
+ [
+ ("MNI152NLin2009aSym", [1, 1, 1, 1, 1]),
+ ("fsaverage", [2, 2, 2, 2, 2]),
+ ("fsaverage5", [2, 2, 2, 2, 2]),
+ ("fsaverage6", [2, 2, 2, 2, 2]),
+ ("fslr32k", [2, 2, 2, 2, 2]),
+ ("gcs", [2, 2, 2, 2, 6]),
+ ],
+ )
+ def test_fetch_cammoun2012(self, tmpdir, version, expected):
+ """Test fetching of Cammoun2012 parcellations."""
+ keys = ["scale033", "scale060", "scale125", "scale250", "scale500"]
+ cammoun = datasets.fetch_cammoun2012(version, data_dir=tmpdir, verbose=0)
+
+ # output has expected keys
+ assert all(hasattr(cammoun, k) for k in keys)
+ # and keys are expected lengths!
+ for k, e in zip(keys, expected):
+ out = getattr(cammoun, k)
+ if isinstance(out, (tuple, list)):
+ assert len(out) == e
+ else:
+ assert isinstance(out, Path) and str(out).endswith(".nii.gz")
+
+ @pytest.mark.parametrize(
+ "version", ["fsaverage", "fsaverage5", "fsaverage6", "fslr32k"]
+ )
+ def test_fetch_schaefer2018(self, tmpdir, version):
+ """Test fetching of Schaefer2018 parcellations."""
+ keys = [
+ f"{p}Parcels{n}Networks" for p in range(100, 1001, 100) for n in [7, 17]
+ ]
+ schaefer = datasets.fetch_schaefer2018(version, data_dir=tmpdir, verbose=0)
+
+ if version == "fslr32k":
+ assert all(k in schaefer and os.path.isfile(schaefer[k]) for k in keys)
+ else:
+ for k in keys:
+ assert k in schaefer
+ assert len(schaefer[k]) == 2
+ assert all(os.path.isfile(hemi) for hemi in schaefer[k])
+
+ def test_fetch_mmpall(self, tmpdir):
+ """Test fetching of MMPAll parcellations."""
+ mmp = datasets.fetch_mmpall(data_dir=tmpdir, verbose=0)
+ assert len(mmp) == 2
+ assert all(os.path.isfile(hemi) for hemi in mmp)
+ assert all(hasattr(mmp, attr) for attr in ("L", "R"))
+
+ def test_fetch_pauli2018(self, tmpdir):
+ """Test fetching of Pauli2018 parcellations."""
+ pauli = datasets.fetch_pauli2018(data_dir=tmpdir, verbose=0)
+ assert all(
+ hasattr(pauli, k) and os.path.isfile(pauli[k])
+ for k in ["probabilistic", "deterministic", "info"]
+ )
+
+ @pytest.mark.xfail
+ def test_fetch_ye2020(self, tmpdir):
+ """Test fetching of Ye2020 parcellations."""
+ assert False
+
+ def test_fetch_voneconomo(self, tmpdir):
+ """Test fetching of von Economo parcellations."""
+ vek = datasets.fetch_voneconomo(data_dir=tmpdir, verbose=0)
+ assert all(hasattr(vek, k) and len(vek[k]) == 2 for k in ["gcs", "ctab"])
+ assert isinstance(vek.get("info"), Path)
+
+
+class TestFetchProject:
+ """Test fetching of project datasets."""
+
+ def test_fetch_vazquez_rodriguez2019(self, tmpdir):
+ """Test fetching of Vazquez-Rodriguez2019 dataset."""
+ vazquez = datasets.fetch_vazquez_rodriguez2019(data_dir=tmpdir, verbose=0)
+ for k in ["rsquared", "gradient"]:
+ assert hasattr(vazquez, k)
+ assert isinstance(getattr(vazquez, k), np.ndarray)
+
+ @pytest.mark.xfail
+ def test_fetch_mirchi2018(self, tmpdir):
+ """Test fetching of Mirchi2018 dataset."""
+ X, Y = datasets.fetch_mirchi2018(data_dir=tmpdir, verbose=0)
+ assert isinstance(X, np.ndarray)
+ assert X.shape == (73, 198135)
+ assert isinstance(Y, np.ndarray)
+ assert Y.shape == (73, 13)
+
+ def test_fetch_hansen_manynetworks(self, tmpdir):
+ """Test fetching of Hansen et al., 2023 many-networks dataset."""
+ hansen = datasets.fetch_hansen_manynetworks(data_dir=tmpdir, verbose=0)
+ assert hansen.exists()
+ # assert "cammoun033" in hansen
+ # assert "gene" in hansen["cammoun033"]
+ # assert isinstance(hansen["cammoun033"]["gene"], Path)
+
+ def test_fetch_hansen_receptors(self, tmpdir):
+ """Test fetching of Hansen et al., 2022 receptor dataset."""
+ hansen = datasets.fetch_hansen_receptors(data_dir=tmpdir, verbose=0)
+ assert hansen.exists()
+
+ def test_fetch_hansen_genescognition(self, tmpdir):
+ """Test fetching of Hansen et al., 2021 gene-cognition dataset."""
+ hansen = datasets.fetch_hansen_genescognition(data_dir=tmpdir, verbose=0)
+ assert hansen.exists()
+
+ def test_fetch_hansen_brainstemfc(self, tmpdir):
+ """Test fetching of Hansen et al., 2024 brainstem dataset."""
+ hansen = datasets.fetch_hansen_brainstemfc(data_dir=tmpdir, verbose=0)
+ assert hansen.exists()
+
+ def test_fetch_shafiei_megfmrimapping(self, tmpdir):
+ """Test fetching of Shafiei et al., 2022 & 2023 HCP-MEG dataset."""
+ shafiei = datasets.fetch_shafiei_megfmrimapping(data_dir=tmpdir, verbose=0)
+ assert shafiei.exists()
+
+ def test_fetch_shafiei_megdynamics(self, tmpdir):
+ """Test fetching of Shafiei et al., 2022 & 2023 HCP-MEG dataset."""
+ shafiei = datasets.fetch_shafiei_megdynamics(data_dir=tmpdir, verbose=0)
+ assert shafiei.exists()
+
+ def test_fetch_suarez_mami(self, tmpdir):
+ """Test fetching of Suarez et al., 2022 mami dataset."""
+ suarez = datasets.fetch_suarez_mami(data_dir=tmpdir, verbose=0)
+ assert suarez.exists()
+
+ @pytest.mark.parametrize(
+ "dataset, expected",
+ [
+ ("celegans", ["conn", "dist", "labels", "ref"]),
+ ("drosophila", ["conn", "coords", "labels", "networks", "ref"]),
+ ("human_func_scale033", ["conn", "coords", "labels", "ref"]),
+ ("human_func_scale060", ["conn", "coords", "labels", "ref"]),
+ ("human_func_scale125", ["conn", "coords", "labels", "ref"]),
+ ("human_func_scale250", ["conn", "coords", "labels", "ref"]),
+ ("human_func_scale500", ["conn", "coords", "labels", "ref"]),
+ ("human_struct_scale033", ["conn", "coords", "dist", "labels", "ref"]),
+ ("human_struct_scale060", ["conn", "coords", "dist", "labels", "ref"]),
+ ("human_struct_scale125", ["conn", "coords", "dist", "labels", "ref"]),
+ ("human_struct_scale250", ["conn", "coords", "dist", "labels", "ref"]),
+ ("human_struct_scale500", ["conn", "coords", "dist", "labels", "ref"]),
+ ("macaque_markov", ["conn", "dist", "labels", "ref"]),
+ ("macaque_modha", ["conn", "coords", "dist", "labels", "ref"]),
+ ("mouse", ["acronyms", "conn", "coords", "dist", "labels", "ref"]),
+ ("rat", ["conn", "labels", "ref"]),
+ ],
+ )
+ def test_fetch_famous_gmat(self, tmpdir, dataset, expected):
+ """Test fetching of famous G.mat datasets."""
+ connectome = datasets.fetch_famous_gmat(dataset, data_dir=tmpdir, verbose=0)
+
+ expected.remove("ref")
+ for key in expected:
+ assert key in connectome
+ assert isinstance(connectome[key], str if key == "ref" else np.ndarray)
diff --git a/netneurotools/datasets/transforms.py b/netneurotools/datasets/transforms.py
new file mode 100644
index 0000000..120a8b0
--- /dev/null
+++ b/netneurotools/datasets/transforms.py
@@ -0,0 +1 @@
+"""Transforms for datasets."""
diff --git a/netneurotools/datasets/utils.py b/netneurotools/datasets/utils.py
deleted file mode 100644
index 4339c57..0000000
--- a/netneurotools/datasets/utils.py
+++ /dev/null
@@ -1,100 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Utilites for loading / creating datasets."""
-
-import json
-import os
-import importlib.resources
-
-if getattr(importlib.resources, 'files', None) is not None:
- _importlib_avail = True
-else:
- from pkg_resources import resource_filename
- _importlib_avail = False
-
-
-def _osfify_urls(data):
- """
- Format `data` object with OSF API URL.
-
- Parameters
- ----------
- data : object
- If dict with a `url` key, will format OSF_API with relevant values
-
- Returns
- -------
- data : object
- Input data with all `url` dict keys formatted
- """
- OSF_API = "https://files.osf.io/v1/resources/{}/providers/osfstorage/{}"
-
- if isinstance(data, str):
- return data
- elif 'url' in data:
- data['url'] = OSF_API.format(*data['url'])
-
- try:
- for key, value in data.items():
- data[key] = _osfify_urls(value)
- except AttributeError:
- for n, value in enumerate(data):
- data[n] = _osfify_urls(value)
-
- return data
-
-
-if _importlib_avail:
- osf = importlib.resources.files("netneurotools") / "data/osf.json"
-else:
- osf = resource_filename('netneurotools', 'data/osf.json')
-
-with open(osf) as src:
- OSF_RESOURCES = _osfify_urls(json.load(src))
-
-
-def _get_dataset_info(name):
- """
- Return url and MD5 checksum for dataset `name`.
-
- Parameters
- ----------
- name : str
- Name of dataset
-
- Returns
- -------
- url : str
- URL from which to download dataset
- md5 : str
- MD5 checksum for file downloade from `url`
- """
- try:
- return OSF_RESOURCES[name]
- except KeyError:
- raise KeyError("Provided dataset '{}' is not valid. Must be one of: {}"
- .format(name, sorted(OSF_RESOURCES.keys()))) from None
-
-
-def _get_data_dir(data_dir=None):
- """
- Get path to netneurotools data directory.
-
- Parameters
- ----------
- data_dir : str, optional
- Path to use as data directory. If not specified, will check for
- environmental variable 'NNT_DATA'; if that is not set, will use
- `~/nnt-data` instead. Default: None
-
- Returns
- -------
- data_dir : str
- Path to use as data directory
- """
- if data_dir is None:
- data_dir = os.environ.get('NNT_DATA', os.path.join('~', 'nnt-data'))
- data_dir = os.path.expanduser(data_dir)
- if not os.path.exists(data_dir):
- os.makedirs(data_dir)
-
- return data_dir
diff --git a/netneurotools/experimental/__init__.py b/netneurotools/experimental/__init__.py
new file mode 100644
index 0000000..911c0f7
--- /dev/null
+++ b/netneurotools/experimental/__init__.py
@@ -0,0 +1,4 @@
+"""Functions in alpha stage."""
+
+
+__all__ = []
diff --git a/netneurotools/freesurfer.py b/netneurotools/freesurfer.py
deleted file mode 100644
index 047590d..0000000
--- a/netneurotools/freesurfer.py
+++ /dev/null
@@ -1,662 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Functions for working with FreeSurfer data and parcellations."""
-
-import os
-import os.path as op
-import warnings
-
-from nibabel.freesurfer import read_annot, read_geometry
-import numpy as np
-from scipy import sparse
-try: # scipy >= 1.8.0
- from scipy.ndimage._measurements import _stats, labeled_comprehension
-except ImportError: # scipy < 1.8.0
- from scipy.ndimage.measurements import _stats, labeled_comprehension
-from scipy.spatial.distance import cdist
-
-from .datasets import fetch_fsaverage
-from .stats import gen_spinsamples
-from .surface import make_surf_graph
-from .utils import check_fs_subjid, run
-
-FSIGNORE = [
- 'unknown', 'corpuscallosum', 'Background+FreeSurfer_Defined_Medial_Wall'
-]
-
-
-def apply_prob_atlas(subject_id, gcs, hemi, *, orig='white', annot=None,
- ctab=None, subjects_dir=None, use_cache=True,
- quiet=False):
- """
- Create an annotation file for `subject_id` by applying atlas in `gcs`.
-
- Runs subprocess calling FreeSurfer's "mris_ca_label" function; as such,
- FreeSurfer must be installed and accesible on the local system path.
-
- Parameters
- ----------
- subject_id : str
- FreeSurfer subject ID
- gcs : str
- Filepath to .gcs file containing classifier array
- hemi : {'lh', 'rh'}
- Hemisphere corresponding to `gcs` file
- orig : str, optional
- Original surface to which to apply classifer. Default: 'white'
- annot : str, optional
- Path to output annotation file to generate. If set to None, the name is
- created from the provided `hemi` and `gcs`. If provided as a
- relative path, it is assumed to stem from `subjects_dir`/`subject_id`.
- Default: None
- ctab : str, optional
- Path to colortable corresponding to `gcs`. Default: None
- subjects_dir : str, optional
- Path to FreeSurfer subject directory. If not set, will inherit from
- the environmental variable $SUBJECTS_DIR. Default: None
- use_cache : bool, optional
- Whether to check for existence of `annot` in directory specified by
- `{subjects_dir}/{subject_id}/label' and use that, if it exists. If
- False, will create a new annot file. Default: True
- quiet : bool, optional
- Whether to restrict status messages. Default: False
-
- Returns
- -------
- annot : str
- Path to generated annotation file
- """
- cmd = 'mris_ca_label {opts}{subject_id} {hemi} {hemi}.sphere.reg ' \
- '{gcs} {annot}'
-
- if hemi not in ['rh', 'lh']:
- raise ValueError('Provided hemisphere designation `hemi` must be one '
- 'of \'rh\' or \'lh\'. Provided: {}'.format(hemi))
- if not op.isfile(gcs):
- raise ValueError('Cannot find specified `gcs` file {}.'.format(gcs))
-
- subject_id, subjects_dir = check_fs_subjid(subject_id, subjects_dir)
-
- # add all the options together, as specified
- opts = ''
- if ctab is not None and op.isfile(ctab):
- opts += '-t {} '.format(ctab)
- if orig is not None:
- opts += '-orig {} '.format(orig)
- if subjects_dir is not None:
- opts += '-sdir {} '.format(subjects_dir)
- else:
- subjects_dir = os.environ['SUBJECTS_DIR']
-
- # generate output filename
- if annot is None:
- base = '{}.{}.annot'.format(hemi, gcs[:-4])
- annot = op.join(subjects_dir, subject_id, 'label', base)
- else:
- # if not a full path, assume relative from subjects_dir/subject_id
- if not annot.startswith(op.abspath(os.sep)):
- annot = op.join(subjects_dir, subject_id, annot)
-
- # if annotation file doesn't exist or we explicitly want to make a new one
- if not op.isfile(annot) or not use_cache:
- run(cmd.format(opts=opts, subject_id=subject_id, hemi=hemi,
- gcs=gcs, annot=annot),
- quiet=quiet)
-
- return annot
-
-
-def _decode_list(vals):
- """List decoder."""
- return [val.decode() if hasattr(val, 'decode') else val for val in vals]
-
-
-def find_parcel_centroids(*, lhannot, rhannot, method='surface',
- version='fsaverage', surf='sphere', drop=None):
- """
- Return vertex coords corresponding to centroids of parcels in annotations.
-
- Note that using any other `surf` besides the default of 'sphere' may result
- in centroids that are not directly within the parcels themselves due to
- sulcal folding patterns.
-
- Parameters
- ----------
- {lh,rh}annot : str
- Path to .annot file containing labels of parcels on the {left,right}
- hemisphere. These must be specified as keyword arguments to avoid
- accidental order switching.
- method : {'average', 'surface', 'geodesic'}, optional
- Method for calculation of parcel centroid. See Notes for more
- information. Default: 'surface'
- version : str, optional
- Specifies which version of `fsaverage` provided annotation files
- correspond to. Must be one of {'fsaverage', 'fsaverage3', 'fsaverage4',
- 'fsaverage5', 'fsaverage6'}. Default: 'fsaverage'
- surf : str, optional
- Specifies which surface projection of fsaverage to use for finding
- parcel centroids. Default: 'sphere'
- drop : list, optional
- Specifies regions in {lh,rh}annot for which the parcel centroid should
- not be calculated. If not specified, centroids for parcels defined in
- `netneurotools.freesurfer.FSIGNORE` are not calculated. Default: None
-
- Returns
- -------
- centroids : (N, 3) numpy.ndarray
- xyz coordinates of vertices closest to the centroid of each parcel
- defined in `lhannot` and `rhannot`
- hemiid : (N,) numpy.ndarray
- Array denoting hemisphere designation of coordinates in `centroids`,
- where `hemiid=0` denotes the left and `hemiid=1` the right hemisphere
-
- Notes
- -----
- The following methods can be used for finding parcel centroids:
-
- 1. ``method='average'``
-
- Uses the arithmetic mean of the coordinates for the vertices in each
- parcel. Note that in this case the calculated centroids will not act
- actually fall on the surface of `surf`.
-
- 2. ``method='surface'``
-
- Calculates the 'average' coordinates and then finds the closest vertex
- on `surf`, where closest is defined as the vertex with the minimum
- Euclidean distance.
-
- 3. ``method='geodesic'``
-
- Uses the coordinates of the vertex with the minimum average geodesic
- distance to all other vertices in the parcel. Note that this is slightly
- more time-consuming than the other two methods, especially for
- high-resolution meshes.
- """
- methods = ['average', 'surface', 'geodesic']
- if method not in methods:
- raise ValueError('Provided method for centroid calculation {} is '
- 'invalid. Must be one of {}'.format(methods, methods))
-
- if drop is None:
- drop = FSIGNORE
- drop = _decode_list(drop)
-
- surfaces = fetch_fsaverage(version)[surf]
-
- centroids, hemiid = [], []
- for n, (annot, surf) in enumerate(zip([lhannot, rhannot], surfaces)):
- vertices, faces = read_geometry(surf)
- labels, ctab, names = read_annot(annot)
- names = _decode_list(names)
-
- for lab in np.unique(labels):
- if names[lab] in drop:
- continue
- if method in ['average', 'surface']:
- roi = np.atleast_2d(vertices[labels == lab].mean(axis=0))
- if method == 'surface': # find closest vertex on the sphere
- roi = vertices[np.argmin(cdist(vertices, roi), axis=0)[0]]
- elif method == 'geodesic':
- inds, = np.where(labels == lab)
- roi = _geodesic_parcel_centroid(vertices, faces, inds)
- centroids.append(roi)
- hemiid.append(n)
-
- return np.vstack(centroids), np.asarray(hemiid)
-
-
-def _geodesic_parcel_centroid(vertices, faces, inds):
- """
- Calculate parcel centroids based on surface distance.
-
- Parameters
- ----------
- vertices : (N, 3)
- Coordinates of vertices defining surface
- faces : (F, 3)
- Triangular faces defining surface
- inds : (R,)
- Indices of `vertices` that belong to parcel
-
- Returns
- -------
- roi : (3,) numpy.ndarray
- Vertex corresponding to centroid of parcel
- """
- mask = np.ones(len(vertices), dtype=bool)
- mask[inds] = False
- mat = make_surf_graph(vertices, faces, mask=mask)
- paths = sparse.csgraph.dijkstra(mat, directed=False, indices=inds)[:, inds]
-
- # the selected vertex is the one with the minimum average shortest path
- # to the other vertices in the parcel
- roi = vertices[inds[paths.mean(axis=1).argmin()]]
-
- return roi
-
-
-def parcels_to_vertices(data, *, lhannot, rhannot, drop=None):
- """
- Project parcellated `data` to vertices defined in annotation files.
-
- Assigns np.nan to all ROIs in `drop`
-
- Parameters
- ----------
- data : (N,) numpy.ndarray
- Parcellated data to be projected to vertices. Parcels should be ordered
- by [left, right] hemisphere; ordering within hemisphere should
- correspond to the provided annotation files.
- {lh,rh}annot : str
- Path to .annot file containing labels of parcels on the {left,right}
- hemisphere. These must be specified as keyword arguments to avoid
- accidental order switching.
- drop : list, optional
- Specifies regions in {lh,rh}annot that are not present in `data`. NaNs
- will be inserted in place of the these regions in the returned data. If
- not specified, parcels defined in `netneurotools.freesurfer.FSIGNORE`
- are assumed to not be present. Default: None
-
- Returns
- -------
- projected : numpy.ndarray
- Vertex-level data
- """
- if drop is None:
- drop = FSIGNORE
- drop = _decode_list(drop)
-
- data = np.vstack(data).astype(float)
-
- # check this so we're not unduly surprised by anything...
- n_vert = expected = 0
- for a in [lhannot, rhannot]:
- vn, _, names = read_annot(a)
- n_vert += len(vn)
- names = _decode_list(names)
- expected += len(names) - len(set(drop) & set(names))
- if expected != len(data):
- raise ValueError('Number of parcels in provided annotation files '
- 'differs from size of parcellated data array.\n'
- ' EXPECTED: {} parcels\n'
- ' RECEIVED: {} parcels'
- .format(expected, len(data)))
-
- projected = np.zeros((n_vert, data.shape[-1]), dtype=data.dtype)
- start = end = n_vert = 0
- for annot in [lhannot, rhannot]:
- # read files and update end index for `data`
- labels, ctab, names = read_annot(annot)
- names = _decode_list(names)
- todrop = set(names) & set(drop)
- end += len(names) - len(todrop) # unknown and corpuscallosum
-
- # get indices of unknown and corpuscallosum and insert NaN values
- inds = sorted([names.index(f) for f in todrop])
- inds = [f - n for n, f in enumerate(inds)]
- currdata = np.insert(data[start:end], inds, np.nan, axis=0)
-
- # project to vertices and store
- projected[n_vert:n_vert + len(labels), :] = currdata[labels]
- start = end
- n_vert += len(labels)
-
- return np.squeeze(projected)
-
-
-def vertices_to_parcels(data, *, lhannot, rhannot, drop=None):
- """
- Reduce vertex-level `data` to parcels defined in annotation files.
-
- Takes average of vertices within each parcel, excluding np.nan values
- (i.e., np.nanmean). Assigns np.nan to parcels for which all vertices are
- np.nan.
-
- Parameters
- ----------
- data : (N,) numpy.ndarray
- Vertex-level data to be reduced to parcels
- {lh,rh}annot : str
- Path to .annot file containing labels to parcels on the {left,right}
- hemisphere
- drop : list, optional
- Specifies regions in {lh,rh}annot that should be removed from the
- parcellated version of `data`. If not specified, vertices corresponding
- to parcels defined in `netneurotools.freesurfer.FSIGNORE` will be
- removed. Default: None
-
- Returns
- -------
- reduced : numpy.ndarray
- Parcellated `data`, without regions specified in `drop`
- """
- if drop is None:
- drop = FSIGNORE
- drop = _decode_list(drop)
-
- data = np.vstack(data)
-
- n_parc = expected = 0
- for a in [lhannot, rhannot]:
- vn, _, names = read_annot(a)
- expected += len(vn)
- names = _decode_list(names)
- n_parc += len(names) - len(set(drop) & set(names))
- if expected != len(data):
- raise ValueError('Number of vertices in provided annotation files '
- 'differs from size of vertex-level data array.\n'
- ' EXPECTED: {} vertices\n'
- ' RECEIVED: {} vertices'
- .format(expected, len(data)))
-
- reduced = np.zeros((n_parc, data.shape[-1]), dtype=data.dtype)
- start = end = n_parc = 0
- for annot in [lhannot, rhannot]:
- # read files and update end index for `data`
- labels, ctab, names = read_annot(annot)
- names = _decode_list(names)
-
- indices = np.unique(labels)
- end += len(labels)
-
- for idx in range(data.shape[-1]):
- # get average of vertex-level data within parcels
- # set all NaN values to 0 before calling `_stats` because we are
- # returning sums, so the 0 values won't impact the sums (if we left
- # the NaNs then all parcels with even one NaN entry would be NaN)
- currdata = np.squeeze(data[start:end, idx])
- isna = np.isnan(currdata)
- counts, sums = _stats(np.nan_to_num(currdata), labels, indices)
-
- # however, we do need to account for the NaN values in the counts
- # so that our means are similar to what we'd get from e.g.,
- # np.nanmean here, our "sums" are the counts of NaN values in our
- # parcels
- _, nacounts = _stats(isna, labels, indices)
- counts = (np.asanyarray(counts, dtype=float)
- - np.asanyarray(nacounts, dtype=float))
-
- with np.errstate(divide='ignore', invalid='ignore'):
- currdata = sums / counts
-
- # get indices of unkown and corpuscallosum and delete from parcels
- inds = sorted([names.index(f) for f in set(drop) & set(names)])
- currdata = np.delete(currdata, inds)
-
- # store parcellated data
- reduced[n_parc:n_parc + len(names) - len(inds), idx] = currdata
-
- start = end
- n_parc += len(names) - len(inds)
-
- return np.squeeze(reduced)
-
-
-def _get_fsaverage_coords(version='fsaverage', surface='sphere'):
- """
- Get vertex coordinates for specified `surface` of fsaverage `version`.
-
- Parameters
- ----------
- version : str, optional
- One of {'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5',
- 'fsaverage6'}. Default: 'fsaverage'
- surface : str, optional
- Surface for which to return vertex coordinates. Default: 'sphere'
-
- Returns
- -------
- coords : (N, 3) numpy.ndarray
- xyz coordinates of vertices for {left,right} hemisphere
- hemiid : (N,) numpy.ndarray
- Array denoting hemisphere designation of entries in `coords`, where
- `hemiid=0` denotes the left and `hemiid=1` the right hemisphere
- """
- # get coordinates and hemisphere designation for spin generation
- lhsphere, rhsphere = fetch_fsaverage(version)[surface]
- coords, hemi = [], []
- for n, sphere in enumerate([lhsphere, rhsphere]):
- coords.append(read_geometry(sphere)[0])
- hemi.append(np.ones(len(coords[-1])) * n)
-
- return np.vstack(coords), np.hstack(hemi)
-
-
-def _get_fsaverage_spins(version='fsaverage', spins=None, n_rotate=1000,
- **kwargs):
- """
- Generate spatial permutation resamples for fsaverage `version`.
-
- If `spins` are provided then performs checks to confirm they are valid
-
- Parameters
- ----------
- version : str, optional
- Specifies which version of `fsaverage` for which to generate spins.
- Must be one of {'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5',
- 'fsaverage6'}. Default: 'fsaverage'
- spins : array_like, optional
- Pre-computed spins to use instead of generating them on the fly. If not
- provided will use other provided parameters to create them. Default:
- None
- n_rotate : int, optional
- Number of rotations to generate. Default: 1000
- return_cost : bool, optional
- Whether to return cost array (specified as Euclidean distance) for each
- coordinate for each rotation. Currently this option is not supported if
- pre-computed `spins` are provided. Default: True
- kwargs : key-value pairs
- Keyword arguments passed to `netneurotools.stats.gen_spinsamples`
-
- Returns
- -------
- spins : (N, S) numpy.ndarray
- Resampling array
- """
- if spins is None:
- coords, hemiid = _get_fsaverage_coords(version, 'sphere')
- spins = gen_spinsamples(coords, hemiid, n_rotate=n_rotate,
- **kwargs)
- if kwargs.get('return_cost'):
- return spins
-
- spins = np.asarray(spins, dtype='int32')
- if spins.shape[-1] != n_rotate:
- warnings.warn('Shape of provided `spins` array does not match '
- 'number of rotations requested with `n_rotate`. '
- 'Ignoring specified `n_rotate` parameter and using '
- 'all provided `spins`.', stacklevel=2)
- n_rotate = spins.shape[-1]
-
- return spins, None
-
-
-def spin_data(data, *, lhannot, rhannot, version='fsaverage', n_rotate=1000,
- spins=None, drop=None, verbose=False, **kwargs):
- """
- Project parcellated `data` to surface, rotates, and re-parcellates.
-
- Projection to the surface uses `{lh,rh}annot` files. Rotation uses vertex
- coordinates from the specified fsaverage `version` and relies on
- :func:`netneurotools.stats.gen_spinsamples`. Re-parcellated data will not
- be exactly identical to original values due to re-averaging process.
- Parcels subsumed by regions in `drop` will be listed as NaN.
-
- Parameters
- ----------
- data : (N,) numpy.ndarray
- Parcellated data to be rotated. Parcels should be ordered by [left,
- right] hemisphere; ordering within hemisphere should correspond to the
- provided `{lh,rh}annot` annotation files.
- {lh,rh}annot : str
- Path to .annot file containing labels to parcels on the {left,right}
- hemisphere
- version : str, optional
- Specifies which version of `fsaverage` provided annotation files
- correspond to. Must be one of {'fsaverage', 'fsaverage3', 'fsaverage4',
- 'fsaverage5', 'fsaverage6'}. Default: 'fsaverage'
- n_rotate : int, optional
- Number of rotations to generate. Default: 1000
- spins : array_like, optional
- Pre-computed spins to use instead of generating them on the fly. If not
- provided will use other provided parameters to create them. Default:
- None
- drop : list, optional
- Specifies regions in {lh,rh}annot that are not present in `data`. NaNs
- will be inserted in place of the these regions in the returned data. If
- not specified, parcels defined in `netneurotools.freesurfer.FSIGNORE`
- are assumed to not be present. Default: None
- verbose : bool, optional
- Whether to print occasional status messages. Default: False
- kwargs : key-value pairs
- Keyword arguments passed to `netneurotools.stats.gen_spinsamples`
-
- Returns
- -------
- rotated : (N, `n_rotate`) numpy.ndarray
- Rotated `data
- cost : (N, `n_rotate`,) numpy.ndarray
- Cost (specified as Euclidean distance) of re-assigning each coordinate
- for every rotation in `spinsamples`. Only provided if `return_cost` is
- True.
- """
- if drop is None:
- drop = FSIGNORE
-
- # get coordinates and hemisphere designation for spin generation
- vertices = parcels_to_vertices(data, lhannot=lhannot, rhannot=rhannot,
- drop=drop)
-
- # get spins + cost (if requested)
- spins, cost = _get_fsaverage_spins(version=version, spins=spins,
- n_rotate=n_rotate,
- verbose=verbose, **kwargs)
- if len(vertices) != len(spins):
- raise ValueError('Provided annotation files have a different '
- 'number of vertices than the specified fsaverage '
- 'surface.\n ANNOTATION: {} vertices\n '
- 'FSAVERAGE: {} vertices'
- .format(len(vertices), len(spins)))
-
- spun = np.zeros(data.shape + (n_rotate,))
- for n in range(n_rotate):
- if verbose:
- msg = f'Reducing vertices to parcels: {n:>5}/{n_rotate}'
- print(msg, end='\b' * len(msg), flush=True)
- spun[..., n] = vertices_to_parcels(vertices[spins[:, n]],
- lhannot=lhannot, rhannot=rhannot,
- drop=drop)
-
- if verbose:
- print(' ' * len(msg) + '\b' * len(msg), end='', flush=True)
-
- if kwargs.get('return_cost'):
- return spun, cost
-
- return spun
-
-
-def spin_parcels(*, lhannot, rhannot, version='fsaverage', n_rotate=1000,
- spins=None, drop=None, verbose=False, **kwargs):
- """
- Rotate parcels in `{lh,rh}annot` and re-assigns based on maximum overlap.
-
- Vertex labels are rotated with :func:`netneurotools.stats.gen_spinsamples`
- and a new label is assigned to each *parcel* based on the region maximally
- overlapping with its boundaries.
-
- Parameters
- ----------
- {lh,rh}annot : str
- Path to .annot file containing labels to parcels on the {left,right}
- hemisphere
- version : str, optional
- Specifies which version of `fsaverage` provided annotation files
- correspond to. Must be one of {'fsaverage', 'fsaverage3', 'fsaverage4',
- 'fsaverage5', 'fsaverage6'}. Default: 'fsaverage'
- n_rotate : int, optional
- Number of rotations to generate. Default: 1000
- spins : array_like, optional
- Pre-computed spins to use instead of generating them on the fly. If not
- provided will use other provided parameters to create them. Default:
- None
- drop : list, optional
- Specifies regions in {lh,rh}annot that are not present in `data`. NaNs
- will be inserted in place of the these regions in the returned data. If
- not specified, parcels defined in `netneurotools.freesurfer.FSIGNORE`
- are assumed to not be present. Default: None
- seed : {int, np.random.RandomState instance, None}, optional
- Seed for random number generation. Default: None
- verbose : bool, optional
- Whether to print occasional status messages. Default: False
- return_cost : bool, optional
- Whether to return cost array (specified as Euclidean distance) for each
- coordinate for each rotation. Default: True
- kwargs : key-value pairs
- Keyword arguments passed to `netneurotools.stats.gen_spinsamples`
-
- Returns
- -------
- spinsamples : (N, `n_rotate`) numpy.ndarray
- Resampling matrix to use in permuting data parcellated with labels from
- {lh,rh}annot, where `N` is the number of parcels. Indices of -1
- indicate that the parcel was completely encompassed by regions in
- `drop` and should be ignored.
- cost : (N, `n_rotate`,) numpy.ndarray
- Cost (specified as Euclidean distance) of re-assigning each coordinate
- for every rotation in `spinsamples`. Only provided if `return_cost` is
- True.
- """
-
- def overlap(vals):
- """Return most common non-negative value in `vals`; -1 if all neg."""
- vals = np.asarray(vals)
- vals, counts = np.unique(vals[vals > 0], return_counts=True)
- try:
- return vals[counts.argmax()]
- except ValueError:
- return -1
-
- if drop is None:
- drop = FSIGNORE
- drop = _decode_list(drop)
-
- # get vertex-level labels (set drop labels to - values)
- vertices, end = [], 0
- for n, annot in enumerate([lhannot, rhannot]):
- labels, ctab, names = read_annot(annot)
- names = _decode_list(names)
- todrop = set(names) & set(drop)
- inds = [names.index(f) - n for n, f in enumerate(todrop)]
- labs = np.arange(len(names) - len(inds)) + (end - (len(inds) * n))
- insert = np.arange(-1, -(len(inds) + 1), -1)
- vertices.append(np.insert(labs, inds, insert)[labels])
- end += len(names)
- vertices = np.hstack(vertices)
- labels = np.unique(vertices)
- mask = labels > -1
-
- # get spins + cost (if requested)
- spins, cost = _get_fsaverage_spins(version=version, spins=spins,
- n_rotate=n_rotate, verbose=verbose,
- **kwargs)
- if len(vertices) != len(spins):
- raise ValueError('Provided annotation files have a different '
- 'number of vertices than the specified fsaverage '
- 'surface.\n ANNOTATION: {} vertices\n '
- 'FSAVERAGE: {} vertices'
- .format(len(vertices), len(spins)))
-
- # spin and assign regions based on max overlap
- regions = np.zeros((len(labels[mask]), n_rotate), dtype='int32')
- for n in range(n_rotate):
- if verbose:
- msg = f'Calculating parcel overlap: {n:>5}/{n_rotate}'
- print(msg, end='\b' * len(msg), flush=True)
- regions[:, n] = labeled_comprehension(vertices[spins[:, n]], vertices,
- labels, overlap, int, -1)[mask]
-
- if kwargs.get('return_cost'):
- return regions, cost
-
- return regions
diff --git a/netneurotools/interface/__init__.py b/netneurotools/interface/__init__.py
new file mode 100644
index 0000000..1b474b1
--- /dev/null
+++ b/netneurotools/interface/__init__.py
@@ -0,0 +1,3 @@
+"""Functions for interfacing with common tools."""
+
+__all__ = []
diff --git a/netneurotools/interface/freesurfer.py b/netneurotools/interface/freesurfer.py
new file mode 100644
index 0000000..9efc02f
--- /dev/null
+++ b/netneurotools/interface/freesurfer.py
@@ -0,0 +1 @@
+"""Functions for working with FreeSurfer data and parcellations."""
diff --git a/netneurotools/interface/tests/__init__.py b/netneurotools/interface/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/netneurotools/interface/tests/test_freesurfer.py b/netneurotools/interface/tests/test_freesurfer.py
new file mode 100644
index 0000000..fcdcd23
--- /dev/null
+++ b/netneurotools/interface/tests/test_freesurfer.py
@@ -0,0 +1 @@
+"""For testing netneurotools.interface.freesurfer functionality."""
diff --git a/netneurotools/metrics/__init__.py b/netneurotools/metrics/__init__.py
new file mode 100644
index 0000000..51d8181
--- /dev/null
+++ b/netneurotools/metrics/__init__.py
@@ -0,0 +1,66 @@
+"""Magics on networks."""
+
+
+from .bct import (
+ # routing
+ degrees_und, degrees_dir,
+ distance_wei_floyd, retrieve_shortest_path,
+ navigation_wu, get_navigation_path_length,
+ # diffusion
+ communicability_bin, communicability_wei,
+ path_transitivity, search_information,
+ mean_first_passage_time, diffusion_efficiency,
+ resource_efficiency_bin, flow_graph,
+ # other
+ assortativity,
+ matching_ind_und,
+ rich_feeder_peripheral
+)
+
+
+from .metrics_utils import (
+ _fast_binarize,
+ _graph_laplacian,
+)
+
+
+from .spreading import (
+ simulate_atrophy
+)
+
+
+from .statistical import (
+ network_pearsonr,
+ network_pearsonr_numba,
+ network_pearsonr_pairwise,
+ effective_resistance,
+ network_polarisation,
+ network_variance,
+ network_variance_numba,
+ network_covariance,
+ network_covariance_numba
+)
+
+
+__all__ = [
+ # bct
+ 'degrees_und', 'degrees_dir',
+ 'distance_wei_floyd', 'retrieve_shortest_path',
+ 'navigation_wu', 'get_navigation_path_length',
+ 'communicability_bin', 'communicability_wei',
+ 'path_transitivity', 'search_information',
+ 'mean_first_passage_time', 'diffusion_efficiency',
+ 'resource_efficiency_bin', 'flow_graph',
+ 'assortativity', 'matching_ind_und',
+ 'rich_feeder_peripheral',
+ # metrics_utils
+ '_fast_binarize', '_graph_laplacian',
+ # spreading
+ 'simulate_atrophy',
+ # statistical
+ 'network_pearsonr', 'network_pearsonr_numba',
+ 'network_pearsonr_pairwise', 'effective_resistance',
+ 'network_polarisation', 'network_variance',
+ 'network_variance_numba', 'network_covariance',
+ 'network_covariance_numba'
+]
diff --git a/netneurotools/metrics.py b/netneurotools/metrics/bct.py
similarity index 95%
rename from netneurotools/metrics.py
rename to netneurotools/metrics/bct.py
index 2b8abf2..3996350 100644
--- a/netneurotools/metrics.py
+++ b/netneurotools/metrics/bct.py
@@ -1,6 +1,5 @@
-# -*- coding: utf-8 -*-
"""
-Functions for calculating network metrics.
+Functions for calculating brain connectivity metrics.
Uses naming conventions adopted from the Brain Connectivity
Toolbox (https://sites.google.com/site/bctnet/).
@@ -18,26 +17,7 @@
except ImportError:
use_numba = False
-
-def _binarize(W):
- """
- Binarize a matrix.
-
- Parameters
- ----------
- W : (N, N) array_like
- Matrix to be binarized
-
- Returns
- -------
- binarized : (N, N) numpy.ndarray
- Binarized matrix
- """
- return (W > 0) * 1
-
-
-if use_numba:
- _binarize = njit(_binarize)
+from .metrics_utils import _fast_binarize
def degrees_und(W):
@@ -56,7 +36,7 @@ def degrees_und(W):
deg : (N,) numpy.ndarray
Degree of each node in `W`
"""
- return np.sum(_binarize(W), axis=0)
+ return np.sum(_fast_binarize(W), axis=0)
def degrees_dir(W):
@@ -78,7 +58,7 @@ def degrees_dir(W):
deg : (N,) numpy.ndarray
Degree (in-degree + out-degree) of each node in `W`
"""
- W_bin = _binarize(W)
+ W_bin = _fast_binarize(W)
deg_in = np.sum(W_bin, axis=0)
deg_out = np.sum(W_bin, axis=1)
deg = deg_in + deg_out
@@ -165,179 +145,6 @@ def retrieve_shortest_path(s, t, p_mat):
retrieve_shortest_path = njit(retrieve_shortest_path)
-def communicability_bin(adjacency, normalize=False):
- """
- Compute the communicability of pairs of nodes in `adjacency`.
-
- Parameters
- ----------
- adjacency : (N, N) array_like
- Unweighted, direct/undirected connection weight/length array
- normalize : bool, optional
- Whether to normalize `adjacency` by largest eigenvalue prior to
- calculation of communicability metric. Default: False
-
- Returns
- -------
- comm : (N, N) numpy.ndarray
- Symmetric array representing communicability of nodes {i, j}
-
- References
- ----------
- Estrada, E., & Hatano, N. (2008). Communicability in complex networks.
- Physical Review E, 77(3), 036111.
-
- Examples
- --------
- >>> from netneurotools import metrics
-
- >>> A = np.array([[1, 0, 1], [0, 1, 1], [1, 0, 1]])
- >>> Q = metrics.communicability_bin(A)
- >>> Q
- array([[4.19452805, 0. , 3.19452805],
- [1.47624622, 2.71828183, 3.19452805],
- [3.19452805, 0. , 4.19452805]])
- """
- if not np.any(np.logical_or(adjacency == 0, adjacency == 1)):
- raise ValueError('Provided adjancecy matrix must be unweighted.')
-
- # normalize by largest eigenvalue to prevent communicability metric from
- # "blowing up"
- if normalize:
- norm = np.linalg.eigvals(adjacency).max()
- adjacency = adjacency / norm
-
- return scipy.sparse.linalg.expm(adjacency)
-
-
-def communicability_wei(adjacency):
- """
- Compute the communicability of pairs of nodes in `adjacency`.
-
- Parameters
- ----------
- adjacency : (N, N) array_like
- Weighted, direct/undirected connection weight/length array
-
- Returns
- -------
- cmc : (N, N) numpy.ndarray
- Symmetric array representing communicability of nodes {i, j}
-
- References
- ----------
- Crofts, J. J., & Higham, D. J. (2009). A weighted communicability measure
- applied to complex brain networks. Journal of the Royal Society Interface,
- 6(33), 411-414.
-
- Examples
- --------
- >>> from netneurotools import metrics
-
- >>> A = np.array([[2, 0, 3], [0, 2, 1], [0.5, 0, 1]])
- >>> Q = metrics.communicability_wei(A)
- >>> Q
- array([[0. , 0. , 1.93581903],
- [0.07810379, 0. , 0.94712177],
- [0.32263651, 0. , 0. ]])
- """
- # negative square root of nodal degrees
- row_sum = adjacency.sum(1)
- neg_sqrt = np.power(row_sum, -0.5)
- square_sqrt = np.diag(neg_sqrt)
-
- # normalize input matrix
- for_expm = square_sqrt @ adjacency @ square_sqrt
-
- # calculate matrix exponential of normalized matrix
- cmc = scipy.sparse.linalg.expm(for_expm)
- cmc[np.diag_indices_from(cmc)] = 0
-
- return cmc
-
-
-def rich_feeder_peripheral(x, sc, stat='median'):
- """
- Calculate connectivity values in rich, feeder, and peripheral edges.
-
- Parameters
- ----------
- x : (N, N) numpy.ndarray
- Symmetric correlation or connectivity matrix
- sc : (N, N) numpy.ndarray
- Binary structural connectivity matrix
- stat : {'mean', 'median'}, optional
- Statistic to use over rich/feeder/peripheral links. Default: 'median'
-
- Returns
- -------
- rfp : (3, k) numpy.ndarray
- Array of median rich (0), feeder (1), and peripheral (2)
- values, defined by `x`. `k` is the maximum degree defined on `sc`.
- pvals : (3, k) numpy.ndarray
- p-value for each link, computed using Welch's t-test.
- Rich links are compared against non-rich links. Feeder links are
- compared against peripheral links. Peripheral links are compared
- against feeder links. T-test is one-sided.
-
- Notes
- -----
- This code was written by Justine Hansen who promises to fix and even
- optimize the code should any issues arise, provided you let her know.
- """
- stats = ['mean', 'median']
- if stat not in stats:
- raise ValueError(f'Provided stat {stat} not valid.\
- Must be one of {stats}')
-
- nnodes = len(sc)
- mask = np.triu(np.ones(nnodes), 1) > 0
- node_degree = degrees_und(sc)
- k = np.max(node_degree).astype(np.int64)
- rfp_label = np.zeros((len(sc[mask]), k))
-
- for degthresh in range(k): # for each degree threshold
- hub_idx = np.where(node_degree >= degthresh) # find the hubs
- hub = np.zeros([nnodes, 1])
- hub[hub_idx, :] = 1
-
- rfp = np.zeros([nnodes, nnodes]) # for each link, define rfp
- for edge1 in range(nnodes):
- for edge2 in range(nnodes):
- if hub[edge1] + hub[edge2] == 2:
- rfp[edge1, edge2] = 1 # rich
- if hub[edge1] + hub[edge2] == 1:
- rfp[edge1, edge2] = 2 # feeder
- if hub[edge1] + hub[edge2] == 0:
- rfp[edge1, edge2] = 3 # peripheral
- rfp_label[:, degthresh] = rfp[mask]
-
- rfp = np.zeros([3, k])
- pvals = np.zeros([3, k])
- for degthresh in range(k):
-
- redfunc = np.median if stat == 'median' else np.mean
- for linktype in range(3):
- rfp[linktype, degthresh] = redfunc(x[mask][rfp_label[:, degthresh]
- == linktype + 1])
-
- # p-value (one-sided Welch's t-test)
- _, pvals[0, degthresh] = ttest_ind(
- x[mask][rfp_label[:, degthresh] == 1],
- x[mask][rfp_label[:, degthresh] != 1],
- equal_var=False, alternative='greater')
- _, pvals[1, degthresh] = ttest_ind(
- x[mask][rfp_label[:, degthresh] == 2],
- x[mask][rfp_label[:, degthresh] == 3],
- equal_var=False, alternative='greater')
- _, pvals[2, degthresh] = ttest_ind(
- x[mask][rfp_label[:, degthresh] == 3],
- x[mask][rfp_label[:, degthresh] == 2],
- equal_var=False, alternative='greater')
-
- return rfp, pvals
-
-
def navigation_wu(nav_dist_mat, sc_mat):
"""
Compute network navigation.
@@ -461,20 +268,170 @@ def get_navigation_path_length(nav_paths, alt_dist_mat):
`pl_dis = get_navigation_path_length(nav_paths, D)`
D is Euclidean distance between node centroids.
- See Also
- --------
- netneurotools.metrics.navigation_wu
- """
- nav_path_len = np.zeros_like(alt_dist_mat)
- for nav_item in nav_paths:
- i, j, _, hop, path = nav_item
- if hop != -1:
- nav_path_len[i, j] = np.sum(
- [alt_dist_mat[path[_], path[_ + 1]] for _ in range(hop)]
- )
- else:
- nav_path_len[i, j] = np.inf
- return nav_path_len
+ See Also
+ --------
+ netneurotools.metrics.navigation_wu
+ """
+ nav_path_len = np.zeros_like(alt_dist_mat)
+ for nav_item in nav_paths:
+ i, j, _, hop, path = nav_item
+ if hop != -1:
+ nav_path_len[i, j] = np.sum(
+ [alt_dist_mat[path[_], path[_ + 1]] for _ in range(hop)]
+ )
+ else:
+ nav_path_len[i, j] = np.inf
+ return nav_path_len
+
+
+def communicability_bin(adjacency, normalize=False):
+ """
+ Compute the communicability of pairs of nodes in `adjacency`.
+
+ Parameters
+ ----------
+ adjacency : (N, N) array_like
+ Unweighted, direct/undirected connection weight/length array
+ normalize : bool, optional
+ Whether to normalize `adjacency` by largest eigenvalue prior to
+ calculation of communicability metric. Default: False
+
+ Returns
+ -------
+ comm : (N, N) numpy.ndarray
+ Symmetric array representing communicability of nodes {i, j}
+
+ References
+ ----------
+ Estrada, E., & Hatano, N. (2008). Communicability in complex networks.
+ Physical Review E, 77(3), 036111.
+
+ Examples
+ --------
+ >>> from netneurotools import metrics
+
+ >>> A = np.array([[1, 0, 1], [0, 1, 1], [1, 0, 1]])
+ >>> Q = metrics.communicability_bin(A)
+ >>> Q
+ array([[4.19452805, 0. , 3.19452805],
+ [1.47624622, 2.71828183, 3.19452805],
+ [3.19452805, 0. , 4.19452805]])
+ """
+ if not np.any(np.logical_or(adjacency == 0, adjacency == 1)):
+ raise ValueError('Provided adjancecy matrix must be unweighted.')
+
+ # normalize by largest eigenvalue to prevent communicability metric from
+ # "blowing up"
+ if normalize:
+ norm = np.linalg.eigvals(adjacency).max()
+ adjacency = adjacency / norm
+
+ return scipy.sparse.linalg.expm(adjacency)
+
+
+def communicability_wei(adjacency):
+ """
+ Compute the communicability of pairs of nodes in `adjacency`.
+
+ Parameters
+ ----------
+ adjacency : (N, N) array_like
+ Weighted, direct/undirected connection weight/length array
+
+ Returns
+ -------
+ cmc : (N, N) numpy.ndarray
+ Symmetric array representing communicability of nodes {i, j}
+
+ References
+ ----------
+ Crofts, J. J., & Higham, D. J. (2009). A weighted communicability measure
+ applied to complex brain networks. Journal of the Royal Society Interface,
+ 6(33), 411-414.
+
+ Examples
+ --------
+ >>> from netneurotools import metrics
+
+ >>> A = np.array([[2, 0, 3], [0, 2, 1], [0.5, 0, 1]])
+ >>> Q = metrics.communicability_wei(A)
+ >>> Q
+ array([[0. , 0. , 1.93581903],
+ [0.07810379, 0. , 0.94712177],
+ [0.32263651, 0. , 0. ]])
+ """
+ # negative square root of nodal degrees
+ row_sum = adjacency.sum(1)
+ neg_sqrt = np.power(row_sum, -0.5)
+ square_sqrt = np.diag(neg_sqrt)
+
+ # normalize input matrix
+ for_expm = square_sqrt @ adjacency @ square_sqrt
+
+ # calculate matrix exponential of normalized matrix
+ cmc = scipy.sparse.linalg.expm(for_expm)
+ cmc[np.diag_indices_from(cmc)] = 0
+
+ return cmc
+
+
+def path_transitivity(D):
+ """
+ Calculate path transitivity.
+
+ This function implements path transitivity, calculating the density of
+ local detours (triangles) that are available along the shortest paths
+ between all pairs of nodes.
+
+ This function is adapted and optimized from the Brain Connectivity Toolbox.
+
+ .. warning::
+ Test before use.
+
+ Parameters
+ ----------
+ D : (N, N) ndarray
+ Weight or connection length matrix. Length matrix is recommended and
+ transform should have been applied.
+
+ Returns
+ -------
+ T_mat : (N, N) ndarray
+ Path transitivity matrix
+
+ References
+ ----------
+ .. [1] Goñi, J., Van Den Heuvel, M. P., Avena-Koenigsberger,
+ A., Velez de Mendizabal, N., Betzel, R. F., Griffa, A., ... &
+ Sporns, O. (2014). Resting-brain functional connectivity predicted
+ by analytic measures of network communication. Proceedings of the
+ National Academy of Sciences, 111(2), 833-838.
+ """
+ n = len(D)
+ m = np.zeros((n, n))
+ T_mat = np.zeros((n, n))
+
+ deg_wu = np.sum(D, axis=0)
+
+ for i in range(n - 1):
+ for j in range(i + 1, n):
+ sig_and = np.logical_and(D[i, :], D[j, :])
+ m[i, j] = np.dot(D[i, :] + D[j, :], sig_and) \
+ / (deg_wu[i] + deg_wu[j] - 2 * D[i, j])
+ m += m.transpose()
+
+ _, p_mat = distance_wei_floyd(D)
+
+ for i in range(n - 1):
+ for j in range(i + 1, n):
+ path = retrieve_shortest_path(i, j, p_mat)
+ K = len(path)
+ T_mat[i, j] = 2 \
+ * sum([m[i, j] for i, j in itertools.combinations(path, 2)]) \
+ / (K * (K - 1))
+ T_mat += T_mat.transpose()
+
+ return T_mat
def search_information(W, D, has_memory=False):
@@ -580,116 +537,6 @@ def search_information(W, D, has_memory=False):
return SI
-def path_transitivity(D):
- """
- Calculate path transitivity.
-
- This function implements path transitivity, calculating the density of
- local detours (triangles) that are available along the shortest paths
- between all pairs of nodes.
-
- This function is adapted and optimized from the Brain Connectivity Toolbox.
-
- .. warning::
- Test before use.
-
- Parameters
- ----------
- D : (N, N) ndarray
- Weight or connection length matrix. Length matrix is recommended and
- transform should have been applied.
-
- Returns
- -------
- T_mat : (N, N) ndarray
- Path transitivity matrix
-
- References
- ----------
- .. [1] Goñi, J., Van Den Heuvel, M. P., Avena-Koenigsberger,
- A., Velez de Mendizabal, N., Betzel, R. F., Griffa, A., ... &
- Sporns, O. (2014). Resting-brain functional connectivity predicted
- by analytic measures of network communication. Proceedings of the
- National Academy of Sciences, 111(2), 833-838.
- """
- n = len(D)
- m = np.zeros((n, n))
- T_mat = np.zeros((n, n))
-
- deg_wu = np.sum(D, axis=0)
-
- for i in range(n - 1):
- for j in range(i + 1, n):
- sig_and = np.logical_and(D[i, :], D[j, :])
- m[i, j] = np.dot(D[i, :] + D[j, :], sig_and) \
- / (deg_wu[i] + deg_wu[j] - 2 * D[i, j])
- m += m.transpose()
-
- _, p_mat = distance_wei_floyd(D)
-
- for i in range(n - 1):
- for j in range(i + 1, n):
- path = retrieve_shortest_path(i, j, p_mat)
- K = len(path)
- T_mat[i, j] = 2 \
- * sum([m[i, j] for i, j in itertools.combinations(path, 2)]) \
- / (K * (K - 1))
- T_mat += T_mat.transpose()
-
- return T_mat
-
-
-def flow_graph(W, r=None, t=1):
- """
- Calculate flow graph.
-
- This function implements flow graph, instantiates a continuous
- time random walk on network. Waiting time for walkers at each
- node are distributed as Poisson with rate parameter r.
- This function returns the flow graph at time t.
-
- .. warning::
- Test before use.
-
- Parameters
- ----------
- W : (N, N) ndarray
- Symmetric adjacency matrix.
- r : (N,) or (N, 1) ndarray, optional
- Rate parameter. Will be set to np.ones((N, 1)) if not specified.
- Default: None
- t : int, optional
- Markov time. Default: 1
-
- Returns
- -------
- dyn : (N, N) ndarray
- flow graph at time T
-
- References
- ----------
- .. [1] Lambiotte, R., Sinatra, R., Delvenne, J. C., Evans, T. S.,
- Barahona, M., & Latora, V. (2011). Flow graphs: Interweaving
- dynamics and structure. Physical Review E, 84(1), 017102.
- .. [2] https://github.com/brain-networks/local_scfc/blob/main/fcn/fcn_flow_graph.m
- """
- if r is None:
- r = np.ones((W.shape[0], 1))
- else:
- if r.ndim == 1:
- r = r[:, None]
- deg_wu = np.sum(W, axis=0, keepdims=True) # (1, N)
- deg_rate = np.sum(deg_wu / r, axis=0, keepdims=True) # (N, N) => (1, N)
- ps = deg_wu / (deg_rate * r) # (1, N) / (N, N) => (N, N)
- laplacian = np.diagflat(r) - np.multiply(np.divide(W, deg_wu), r) # elementwise
- dyn = np.multiply(
- deg_rate * scipy.sparse.linalg.expm(-t * laplacian),
- ps
- ) # elementwise
- dyn = (dyn + dyn.T) / 2
- return dyn
-
-
def mean_first_passage_time(W, tol=1e-3):
"""
Calculate mean first passage time.
@@ -824,7 +671,7 @@ def resource_efficiency_bin(W_bin, lambda_prob=0.5):
morphospace of communication efficiency in complex networks. PLoS One,
8(3), e58070.
"""
- W_bin = _binarize(W_bin)
+ W_bin = _fast_binarize(W_bin)
if not (0 < lambda_prob < 1):
raise ValueError("lambda_prob must be between 0 and 1.")
@@ -871,6 +718,62 @@ def resource_efficiency_bin(W_bin, lambda_prob=0.5):
return E_res, prob_spl
+def flow_graph(W, r=None, t=1):
+ """
+ Calculate flow graph.
+
+ This function implements flow graph, instantiates a continuous
+ time random walk on network. Waiting time for walkers at each
+ node are distributed as Poisson with rate parameter r.
+ This function returns the flow graph at time t.
+
+ .. warning::
+ Test before use.
+
+ Parameters
+ ----------
+ W : (N, N) ndarray
+ Symmetric adjacency matrix.
+ r : (N,) or (N, 1) ndarray, optional
+ Rate parameter. Will be set to np.ones((N, 1)) if not specified.
+ Default: None
+ t : int, optional
+ Markov time. Default: 1
+
+ Returns
+ -------
+ dyn : (N, N) ndarray
+ flow graph at time T
+
+ References
+ ----------
+ .. [1] Lambiotte, R., Sinatra, R., Delvenne, J. C., Evans, T. S.,
+ Barahona, M., & Latora, V. (2011). Flow graphs: Interweaving
+ dynamics and structure. Physical Review E, 84(1), 017102.
+ .. [2] https://github.com/brain-networks/local_scfc/blob/main/fcn/fcn_flow_graph.m
+ """
+ if r is None:
+ r = np.ones((W.shape[0], 1))
+ else:
+ if r.ndim == 1:
+ r = r[:, None]
+ deg_wu = np.sum(W, axis=0, keepdims=True) # (1, N)
+ deg_rate = np.sum(deg_wu / r, axis=0, keepdims=True) # (N, N) => (1, N)
+ ps = deg_wu / (deg_rate * r) # (1, N) / (N, N) => (N, N)
+ laplacian = np.diagflat(r) - np.multiply(np.divide(W, deg_wu), r) # elementwise
+ dyn = np.multiply(
+ deg_rate * scipy.sparse.linalg.expm(-t * laplacian),
+ ps
+ ) # elementwise
+ dyn = (dyn + dyn.T) / 2
+ return dyn
+
+
+def assortativity(W, r=None):
+ """Calculate assortativity."""
+ pass
+
+
def matching_ind_und(W):
"""
Calculate undirected matching index.
@@ -928,37 +831,83 @@ def matching_ind_und(W):
return M0
-def _graph_laplacian(W):
- r"""
- Compute the graph Laplacian of a weighted adjacency matrix.
-
- Graph Laplacian is defined as the degree matrix minus the adjacency
- matrix :math:`L = D - W`, where :math:`D` is the degree matrix and
- is defined as :math:`D_{ii} = \sum_j W_{ij}`.
-
- The graph Laplacian matrix :math:`L` has the form of
-
- .. math::
- L = \begin{bmatrix}
- d_1 & -w_{12} & \cdots & -w_{1n} \\
- -w_{21} & d_2 & \cdots & -w_{2n} \\
- \vdots & \vdots & \ddots & \vdots \\
- -w_{n1} & -w_{n2} & \cdots & d_n
- \end{bmatrix}
+def rich_feeder_peripheral(x, sc, stat='median'):
+ """
+ Calculate connectivity values in rich, feeder, and peripheral edges.
Parameters
----------
- W : (N, N) array_like
- Weighted, directed/undirected connection weight/length array
+ x : (N, N) numpy.ndarray
+ Symmetric correlation or connectivity matrix
+ sc : (N, N) numpy.ndarray
+ Binary structural connectivity matrix
+ stat : {'mean', 'median'}, optional
+ Statistic to use over rich/feeder/peripheral links. Default: 'median'
Returns
-------
- L : (N, N) numpy.ndarray
- Graph Laplacian of `W`
+ rfp : (3, k) numpy.ndarray
+ Array of median rich (0), feeder (1), and peripheral (2)
+ values, defined by `x`. `k` is the maximum degree defined on `sc`.
+ pvals : (3, k) numpy.ndarray
+ p-value for each link, computed using Welch's t-test.
+ Rich links are compared against non-rich links. Feeder links are
+ compared against peripheral links. Peripheral links are compared
+ against feeder links. T-test is one-sided.
+
+ Notes
+ -----
+ This code was written by Justine Hansen who promises to fix and even
+ optimize the code should any issues arise, provided you let her know.
"""
- D = np.diag(np.sum(W, axis=0))
- return D - W
+ stats = ['mean', 'median']
+ if stat not in stats:
+ raise ValueError(f'Provided stat {stat} not valid.\
+ Must be one of {stats}')
+ nnodes = len(sc)
+ mask = np.triu(np.ones(nnodes), 1) > 0
+ node_degree = degrees_und(sc)
+ k = np.max(node_degree).astype(np.int64)
+ rfp_label = np.zeros((len(sc[mask]), k))
-if use_numba:
- _graph_laplacian = njit(_graph_laplacian) # ("float64[:,::1](float64[:,::1])")
+ for degthresh in range(k): # for each degree threshold
+ hub_idx = np.where(node_degree >= degthresh) # find the hubs
+ hub = np.zeros([nnodes, 1])
+ hub[hub_idx, :] = 1
+
+ rfp = np.zeros([nnodes, nnodes]) # for each link, define rfp
+ for edge1 in range(nnodes):
+ for edge2 in range(nnodes):
+ if hub[edge1] + hub[edge2] == 2:
+ rfp[edge1, edge2] = 1 # rich
+ if hub[edge1] + hub[edge2] == 1:
+ rfp[edge1, edge2] = 2 # feeder
+ if hub[edge1] + hub[edge2] == 0:
+ rfp[edge1, edge2] = 3 # peripheral
+ rfp_label[:, degthresh] = rfp[mask]
+
+ rfp = np.zeros([3, k])
+ pvals = np.zeros([3, k])
+ for degthresh in range(k):
+
+ redfunc = np.median if stat == 'median' else np.mean
+ for linktype in range(3):
+ rfp[linktype, degthresh] = redfunc(x[mask][rfp_label[:, degthresh]
+ == linktype + 1])
+
+ # p-value (one-sided Welch's t-test)
+ _, pvals[0, degthresh] = ttest_ind(
+ x[mask][rfp_label[:, degthresh] == 1],
+ x[mask][rfp_label[:, degthresh] != 1],
+ equal_var=False, alternative='greater')
+ _, pvals[1, degthresh] = ttest_ind(
+ x[mask][rfp_label[:, degthresh] == 2],
+ x[mask][rfp_label[:, degthresh] == 3],
+ equal_var=False, alternative='greater')
+ _, pvals[2, degthresh] = ttest_ind(
+ x[mask][rfp_label[:, degthresh] == 3],
+ x[mask][rfp_label[:, degthresh] == 2],
+ equal_var=False, alternative='greater')
+
+ return rfp, pvals
diff --git a/netneurotools/metrics/communication.py b/netneurotools/metrics/communication.py
new file mode 100644
index 0000000..6d126b9
--- /dev/null
+++ b/netneurotools/metrics/communication.py
@@ -0,0 +1 @@
+"""Functions for calculating network communication metrics."""
diff --git a/netneurotools/metrics/control.py b/netneurotools/metrics/control.py
new file mode 100644
index 0000000..b1e4b1c
--- /dev/null
+++ b/netneurotools/metrics/control.py
@@ -0,0 +1 @@
+"""Functions for calculating network control metrics."""
diff --git a/netneurotools/metrics/metrics_utils.py b/netneurotools/metrics/metrics_utils.py
new file mode 100644
index 0000000..b32f632
--- /dev/null
+++ b/netneurotools/metrics/metrics_utils.py
@@ -0,0 +1,66 @@
+"""Functions for supporting network metrics."""
+
+import numpy as np
+
+try:
+ from numba import njit
+ use_numba = True
+except ImportError:
+ use_numba = False
+
+
+def _fast_binarize(W):
+ """
+ Binarize a matrix.
+
+ Parameters
+ ----------
+ W : (N, N) array_like
+ Matrix to be binarized
+
+ Returns
+ -------
+ binarized : (N, N) numpy.ndarray
+ Binarized matrix
+ """
+ return (W > 0) * 1
+
+
+if use_numba:
+ _fast_binarize = njit(_fast_binarize)
+
+
+def _graph_laplacian(W):
+ r"""
+ Compute the graph Laplacian of a weighted adjacency matrix.
+
+ Graph Laplacian is defined as the degree matrix minus the adjacency
+ matrix :math:`L = D - W`, where :math:`D` is the degree matrix and
+ is defined as :math:`D_{ii} = \sum_j W_{ij}`.
+
+ The graph Laplacian matrix :math:`L` has the form of
+
+ .. math::
+ L = \begin{bmatrix}
+ d_1 & -w_{12} & \cdots & -w_{1n} \\
+ -w_{21} & d_2 & \cdots & -w_{2n} \\
+ \vdots & \vdots & \ddots & \vdots \\
+ -w_{n1} & -w_{n2} & \cdots & d_n
+ \end{bmatrix}
+
+ Parameters
+ ----------
+ W : (N, N) array_like
+ Weighted, directed/undirected connection weight/length array
+
+ Returns
+ -------
+ L : (N, N) numpy.ndarray
+ Graph Laplacian of `W`
+ """
+ D = np.diag(np.sum(W, axis=0))
+ return D - W
+
+
+if use_numba:
+ _graph_laplacian = njit(_graph_laplacian) # ("float64[:,::1](float64[:,::1])")
diff --git a/netneurotools/metrics/spreading.py b/netneurotools/metrics/spreading.py
new file mode 100644
index 0000000..8a5fb79
--- /dev/null
+++ b/netneurotools/metrics/spreading.py
@@ -0,0 +1,6 @@
+"""Functions for calculating network spreading models."""
+
+
+def simulate_atrophy():
+ """Simulate atrophy in a network."""
+ pass
diff --git a/netneurotools/metrics/statistical.py b/netneurotools/metrics/statistical.py
new file mode 100644
index 0000000..db1bed8
--- /dev/null
+++ b/netneurotools/metrics/statistical.py
@@ -0,0 +1,661 @@
+"""Functions for calculating statistical network metrics."""
+
+import numpy as np
+
+try:
+ from numba import njit
+ use_numba = True
+except ImportError:
+ use_numba = False
+
+from .metrics_utils import _graph_laplacian
+
+
+def network_pearsonr(annot1, annot2, weight):
+ r"""
+ Calculate pearson correlation between two annotation vectors.
+
+ .. warning::
+ Test before use.
+
+ Parameters
+ ----------
+ annot1 : (N,) array_like
+ First annotation vector, demean will be applied.
+ annot2 : (N,) array_like
+ Second annotation vector, demean will be applied.
+ weight : (N, N) array_like
+ Weight matrix. Diagonal elements should be 1.
+
+ Returns
+ -------
+ corr : float
+ Network correlation between `annot1` and `annot2`
+
+ Notes
+ -----
+ If Pearson correlation is represented as
+
+ .. math::
+ \rho_{x,y} = \dfrac{
+ \mathrm{sum}(I \times (\hat{x} \otimes \hat{y}))
+ }{
+ \sigma_x \sigma_y
+ }
+
+ The network correlation is defined analogously as
+
+ .. math::
+ \rho_{x,y,G} = \dfrac{
+ \mathrm{sum}(W \times (\hat{x} \otimes \hat{y}))
+ }{
+ \sigma_{x,W} \sigma_{y,W}
+ }
+
+ where :math:`\hat{x}` and :math:`\hat{y}` are the demeaned annotation vectors,
+
+ The weight matrix :math:`W` is used to represent the network structure.
+ It is usually in the form of :math:`W = \\exp(-kL)` where :math:`L` is the
+ length matrix and :math:`k` is a decay parameter.
+
+ Example using shortest path length as weight
+
+ .. code:: python
+
+ spl, _ = distance_wei_floyd(D) # input should be distance matrix
+ spl_wei = 1 / np.exp(spl)
+ netcorr = network_pearsonr(annot1, annot2, spl_wei)
+
+ Example using (inverse) effective resistance as weight
+
+ .. code:: python
+
+ R_eff = effective_resistance(W)
+ R_eff_norm = R_eff / np.max(R_eff)
+ W = 1 / R_eff_norm
+ W = W / np.max(W)
+ np.fill_diagonal(W, 1.0)
+ netcorr = network_pearsonr(annot1, annot2, W)
+
+ References
+ ----------
+ .. [1] Coscia, M. (2021). Pearson correlations on complex networks.
+ Journal of Complex Networks, 9(6), cnab036.
+ https://doi.org/10.1093/comnet/cnab036
+
+
+ See Also
+ --------
+ netneurotools.stats.network_pearsonr_pairwise
+ """
+ annot1 = annot1 - np.mean(annot1)
+ annot2 = annot2 - np.mean(annot2)
+ upper = np.sum(np.multiply(weight, np.outer(annot1, annot2)))
+ lower1 = np.sum(np.multiply(weight, np.outer(annot1, annot1)))
+ lower2 = np.sum(np.multiply(weight, np.outer(annot2, annot2)))
+ return upper / np.sqrt(lower1) / np.sqrt(lower2)
+
+
+def network_pearsonr_numba(annot1, annot2, weight):
+ """
+ Numba version of :meth:`netneurotools.stats.network_pearsonr`.
+
+ .. warning::
+ Test before use.
+
+ Parameters
+ ----------
+ annot1 : (N,) array_like
+ First annotation vector, demean will be applied.
+ annot2 : (N,) array_like
+ Second annotation vector, demean will be applied.
+ weight : (N, N) array_like
+ Weight matrix. Diagonal elements should be 1.
+
+ Returns
+ -------
+ corr : float
+ Network correlation between `annot1` and `annot2`
+ """
+ n = annot1.shape[0]
+ annot1 = annot1 - np.mean(annot1)
+ annot2 = annot2 - np.mean(annot2)
+ upper, lower1, lower2 = 0.0, 0.0, 0.0
+ for i in range(n):
+ for j in range(n):
+ upper += annot1[i] * annot2[j] * weight[i, j]
+ lower1 += annot1[i] * annot1[j] * weight[i, j]
+ lower2 += annot2[i] * annot2[j] * weight[i, j]
+ return upper / np.sqrt(lower1) / np.sqrt(lower2)
+
+
+if use_numba:
+ network_pearsonr_numba = njit(network_pearsonr_numba)
+
+
+def _cross_outer(annot_mat):
+ """
+ Calculate cross outer product of input matrix.
+
+ This functions is only used in `network_pearsonr_pairwise`.
+
+ Parameters
+ ----------
+ annot_mat : (N, D) array_like
+ Input matrix
+
+ Returns
+ -------
+ cross_outer : (N, N, D, D) numpy.ndarray
+ Cross outer product of `annot_mat`
+ """
+ n_samp, n_feat = annot_mat.shape
+ cross_outer = np.empty((n_samp, n_samp, n_feat, n_feat), annot_mat.dtype)
+ for a in range(n_samp):
+ for b in range(n_samp):
+ for c in range(n_feat):
+ for d in range(n_feat):
+ cross_outer[a, b, c, d] = annot_mat[a, c] * annot_mat[b, d]
+ return cross_outer
+
+
+if use_numba:
+ # ("float64[:,:,:,::1](float64[:,::1])")
+ _cross_outer = njit(_cross_outer)
+
+
+def _multiply_sum(cross_outer, weight):
+ """
+ Multiply and sum cross outer product.
+
+ This functions is only used in `network_pearsonr_pairwise`.
+
+ Parameters
+ ----------
+ cross_outer : (N, N, D, D) array_like
+ Cross outer product of `annot_mat`
+ weight : (D, D) array_like
+ Weight matrix
+
+ Returns
+ -------
+ cross_outer_after : (N, N) numpy.ndarray
+ Result of multiplying and summing `cross_outer`
+ """
+ n_samp, _, n_dim, _ = cross_outer.shape
+ cross_outer_after = np.empty((n_samp, n_samp), cross_outer.dtype)
+ for i in range(n_samp):
+ for j in range(n_samp):
+ curr_sum = 0.0
+ for k in range(n_dim):
+ for l in range(n_dim): # noqa: E741
+ curr_sum += weight[k, l] * cross_outer[i, j, k, l]
+ cross_outer_after[i, j] = curr_sum
+ return cross_outer_after
+
+
+if use_numba:
+ # ("float64[:,::1](float64[:,:,:,::1],float64[:,::1])")
+ _multiply_sum = njit(_multiply_sum)
+
+
+def network_pearsonr_pairwise(annot_mat, weight):
+ """
+ Calculate pairwise network correlation between rows of `annot_mat`.
+
+ .. warning::
+ Test before use.
+
+ Parameters
+ ----------
+ annot_mat : (N, D) array_like
+ Input matrix
+ weight : (D, D) array_like
+ Weight matrix. Diagonal elements should be 1.
+
+ Returns
+ -------
+ corr_mat : (N, N) numpy.ndarray
+ Pairwise network correlation matrix
+
+ Notes
+ -----
+ This is a faster version of :meth:`netneurotools.stats.network_pearsonr`
+ for calculating pairwise network correlation between rows of `annot_mat`.
+ Check :meth:`netneurotools.stats.network_pearsonr` for details.
+
+ See Also
+ --------
+ netneurotools.stats.network_pearsonr
+ """
+ annot_mat_demean = annot_mat - np.mean(annot_mat, axis=1, keepdims=True)
+ if use_numba:
+ cross_outer = _cross_outer(annot_mat_demean)
+ cross_outer_after = _multiply_sum(cross_outer, weight)
+ else:
+ # https://stackoverflow.com/questions/24839481/python-matrix-outer-product
+ cross_outer = np.einsum('ac,bd->abcd', annot_mat_demean, annot_mat_demean)
+ cross_outer_after = np.sum(np.multiply(cross_outer, weight), axis=(2, 3))
+ # translating the two lines below in numba does not speed up much
+ lower = np.sqrt(np.diagonal(cross_outer_after))
+ return cross_outer_after / np.einsum('i,j', lower, lower)
+
+
+def _onehot_quadratic_form_broadcast(Q_star):
+ """
+ Calculate one-hot quadratic form of input matrix.
+
+ This functions is only used in `effective_resistance`.
+
+ Parameters
+ ----------
+ Q_star : (N, N) array_like
+ Input matrix
+
+ Returns
+ -------
+ R_eff : (N, N) numpy.ndarray
+ One-hot quadratic form of `Q_star`
+ """
+ n = Q_star.shape[0]
+ R_eff = np.empty((n, n), Q_star.dtype)
+ for i in range(n):
+ for j in range(n):
+ R_eff[i, j] = Q_star[i, i] - Q_star[j, i] - Q_star[i, j] + Q_star[j, j]
+ return R_eff
+
+
+if use_numba:
+ # ("float64[:,::1](float64[:,::1])")
+ _onehot_quadratic_form_broadcast = njit(_onehot_quadratic_form_broadcast)
+
+
+def effective_resistance(W, directed=True):
+ """
+ Calculate effective resistance matrix.
+
+ The effective resistance between two nodes in a graph, often used in the context
+ of electrical networks, is a measure that stems from the inverse of the Laplacian
+ matrix of the graph.
+
+ .. warning::
+ Test before use.
+
+ Parameters
+ ----------
+ W : (N, N) array_like
+ Weight matrix.
+ directed : bool, optional
+ Whether the graph is directed. This is used to determine whether to turn on
+ the :code:`hermitian=True` option in :func:`numpy.linalg.pinv`. When you are
+ using a symmetric weight matrix (while real-valued implying hermitian), you
+ can set this to False for better performance. Default: True
+
+ Returns
+ -------
+ R_eff : (N, N) numpy.ndarray
+ Effective resistance matrix
+
+ Notes
+ -----
+ The effective resistance between two nodes :math:`i` and :math:`j` is defined as
+
+ .. math::
+ R_{ij} = (e_i - e_j)^T Q^* (e_i - e_j)
+
+ where :math:`Q^*` is the Moore-Penrose pseudoinverse of the Laplacian matrix
+ :math:`L` of the graph, and :math:`e_i` is the :math:`i`-th standard basis vector.
+
+ References
+ ----------
+ .. [1] Ellens, W., Spieksma, F. M., Van Mieghem, P., Jamakovic, A., & Kooij,
+ R. E. (2011). Effective graph resistance. Linear Algebra and Its Applications,
+ 435(10), 2491–2506. https://doi.org/10.1016/j.laa.2011.02.024
+
+ See Also
+ --------
+ netneurotools.stats.network_polarisation
+ """
+ L = _graph_laplacian(W)
+ Q_star = np.linalg.pinv(L, hermitian=not directed)
+ if use_numba:
+ R_eff = _onehot_quadratic_form_broadcast(Q_star)
+ else:
+ Q_star_diag = np.diag(Q_star)
+ R_eff = \
+ Q_star_diag[:, np.newaxis] \
+ - Q_star \
+ - Q_star.T \
+ + Q_star_diag[np.newaxis, :]
+ return R_eff
+
+
+def _polariz_diff(vec):
+ """
+ Calculate difference between positive and negative parts of a vector.
+
+ This functions is only used in `network_polarisation`.
+
+ Parameters
+ ----------
+ vec : (N,) array_like
+ Input vector. Must have both positive and negative values.
+
+ Returns
+ -------
+ vec_diff : (N,) numpy.ndarray
+ Difference between positive and negative parts of `vec`
+ """
+ #
+ vec_pos = np.maximum(vec, 0.0)
+ vec_pos /= np.max(vec_pos)
+ #
+ vec_neg = np.minimum(vec, 0.0)
+ vec_neg = np.abs(vec_neg)
+ vec_neg /= np.max(vec_neg)
+ return (vec_pos - vec_neg)
+
+
+if use_numba:
+ _polariz_diff = njit(_polariz_diff)
+
+
+def _quadratic_form(W, vec_left, vec_right, squared=False):
+ """
+ Calculate quadratic form :math:`v_{left}^T W v_{right}`.
+
+ Parameters
+ ----------
+ W : (N, N) array_like
+ Input matrix.
+ vec_left : (N,) array_like
+ Left weight vector.
+ vec_right : (N,) array_like
+ Right weight vector.
+ squared : bool, optional
+ Whether to square the input weight matrix. Default: False
+
+ Returns
+ -------
+ quadratic_form : float
+ Quadratic form from `W`, `vec_left`, and `vec_right`
+ """
+ # [numpy]
+
+ # (vec_left.T @ W @ vec_right)[0, 0]
+ # [numba]
+ # vec = np.ascontiguousarray(vec[np.newaxis, :])
+ n = W.shape[0]
+ ret = 0.0
+ for i in range(n):
+ for j in range(n):
+ if squared:
+ ret += vec_left[i] * vec_right[j] * W[i, j]**2
+ else:
+ ret += vec_left[i] * vec_right[j] * W[i, j]
+ return ret
+
+
+if use_numba:
+ _quadratic_form = njit(_quadratic_form)
+
+
+def network_polarisation(vec, W, directed=True):
+ r"""
+ Calculate polarisation of a vector on a graph.
+
+ Network polarisation is a measure of polizzartion taken into account all the
+ three factors below [1]_:
+
+ - how extreme the opinions of the people are
+ - how much they organize into echo chambers, and
+ - how these echo chambers organize in the network
+
+ .. warning::
+ Test before use.
+
+ Parameters
+ ----------
+ vec : (N,) array_like
+ Polarization vector. Must have both positive and negative values. Will be
+ normalized between -1 and 1 internally.
+ W : (N, N) array_like
+ Weight matrix.
+ directed : bool, optional
+ Whether the graph is directed. This is used to determine whether to turn on
+ the :code:`hermitian=True` option in :func:`numpy.linalg.pinv`. When you are
+ using a symmetric weight matrix (while real-valued implying hermitian), you
+ can set this to False for better performance. Default: True
+
+ Returns
+ -------
+ polariz : float
+ Polarization of `vec` on `W`
+
+ Notes
+ -----
+ The measure is based on the genralized Eucledian distance, defined as
+
+ .. math::
+ \delta_{G, o} = \sqrt{(o^+ - o^-)^T Q^* (o^+ - o^-)}
+
+ where :math:`o^+` and :math:`o^-` are the positive and negative parts of the
+ polarization vector, and :math:`Q^*` is the Moore-Penrose pseudoinverse
+ of the Laplacian matrix :math:`L` of the graph. Check :func:`effective_resistance`
+ for similarity.
+
+ References
+ ----------
+ .. [1] Hohmann, M., Devriendt, K., & Coscia, M. (2023). Quantifying ideological
+ polarization on a network using generalized Euclidean distance. Science Advances,
+ 9(9), eabq2044. https://doi.org/10.1126/sciadv.abq2044
+
+ See Also
+ --------
+ netneurotools.stats.effective_resistance
+ """
+ L = _graph_laplacian(W)
+ Q_star = np.linalg.pinv(L, hermitian=not directed)
+ diff = _polariz_diff(vec)
+ if use_numba:
+ polariz_sq = _quadratic_form(Q_star, diff, diff, squared=False)
+ else:
+ polariz_sq = (diff.T @ Q_star @ diff)
+ return np.sqrt(polariz_sq)
+
+
+def network_variance(vec, D):
+ r"""
+ Calculate variance of a vector on a graph.
+
+ Network variance is a measure of variance taken into account the network
+ structure.
+
+ .. warning::
+ Test before use.
+
+ Parameters
+ ----------
+ vec : (N,) array_like
+ Input vector. Must be all positive.
+ Will be normalized internally as a probability distribution.
+ D : (N, N) array_like
+ Distance matrix.
+
+ Returns
+ -------
+ network_variance : float
+ Network variance of `vec` on `D`
+
+ Notes
+ -----
+ The network variance is defined as
+
+ .. math::
+ var(p) = \frac{1}{2} \sum_{i, j} p(i) p(j) d^2(i,j)
+
+ where :math:`p` is the probability distribution of `vec`, and :math:`d(i,j)`
+ is the distance between node :math:`i` and :math:`j`.
+
+ The distance matrix :math:`D` can make use of effective resistance or its
+ square root.
+
+ Example using effective resistance as weight matrix
+
+ .. code:: python
+
+ R_eff = effective_resistance(W)
+ netvar = network_variance(vec, R_eff)
+
+ References
+ ----------
+ .. [1] Devriendt, K., Martin-Gutierrez, S., & Lambiotte, R. (2022).
+ Variance and covariance of distributions on graphs. SIAM Review, 64(2),
+ 343–359. https://doi.org/10.1137/20M1361328
+
+ See Also
+ --------
+ netneurotools.stats.network_covariance
+ """
+ p = vec / np.sum(vec)
+ return 0.5 * (p.T @ np.multiply(D, D) @ p)
+
+
+def network_variance_numba(vec, D):
+ """
+ Numba version of :meth:`netneurotools.stats.network_variance`.
+
+ Network variance is a measure of variance taken into account the network
+ structure.
+
+ .. warning::
+ Test before use.
+
+ Parameters
+ ----------
+ vec : (N,) array_like
+ Input vector. Must be all positive.
+ Will be normalized internally as a probability distribution.
+ D : (N, N) array_like
+ Distance matrix.
+
+ Returns
+ -------
+ network_variance : float
+ Network variance of `vec` on `D`
+ """
+ p = vec / np.sum(vec)
+ return 0.5 * _quadratic_form(D, p, p, squared=True)
+
+
+if use_numba:
+ network_variance_numba = njit(network_variance_numba)
+
+
+def network_covariance(joint_pmat, D, calc_marginal=True):
+ r"""
+ Calculate covariance of a joint probability matrix on a graph.
+
+ .. warning::
+ Test before use.
+
+ Parameters
+ ----------
+ joint_pmat : (N, N) array_like
+ Joint probability matrix. Please make sure that it is valid.
+ D : (N, N) array_like
+ Distance matrix.
+ calc_marginal : bool, optional
+ Whether to calculate marginal variance. It will be marginally faster if
+ :code:`calc_marginal=False` (returning marginal variances as 0). Default: True
+
+ Returns
+ -------
+ network_covariance : float
+ Covariance of `joint_pmat` on `D`
+ var_p : float
+ Marginal variance of `joint_pmat` on `D`.
+ Will be 0 if :code:`calc_marginal=False`
+ var_q : float
+ Marginal variance of `joint_pmat` on `D`.
+ Will be 0 if :code:`calc_marginal=False`
+
+ Notes
+ -----
+ The network variance is defined as
+
+ .. math::
+ cov(P) = \frac{1}{2} \sum_{i, j} [p(i) q(j) - P(i,j)] d^2(i,j)
+
+ where :math:`P` is the joint probability matrix, :math:`p` and :math:`q`
+ are the marginal probability distributions of `joint_pmat`, and :math:`d(i,j)`
+ is the distance between node :math:`i` and :math:`j`.
+
+ Check :func:`network_variance` for usage.
+
+ References
+ ----------
+ .. [1] Devriendt, K., Martin-Gutierrez, S., & Lambiotte, R. (2022).
+ Variance and covariance of distributions on graphs. SIAM Review, 64(2),
+ 343–359. https://doi.org/10.1137/20M1361328
+
+ See Also
+ --------
+ netneurotools.stats.network_variance
+ """
+ p = np.sum(joint_pmat, axis=1)
+ q = np.sum(joint_pmat, axis=0)
+ D_sq = np.multiply(D, D)
+ cov = p.T @ D_sq @ q - np.sum(np.multiply(joint_pmat, D_sq))
+ if calc_marginal:
+ var_p = p.T @ D_sq @ p
+ var_q = q.T @ D_sq @ q
+ else:
+ var_p, var_q = 0, 0
+ return 0.5 * cov, 0.5 * var_p, 0.5 * var_q
+
+
+def network_covariance_numba(joint_pmat, D, calc_marginal=True):
+ """
+ Numba version of :meth:`netneurotools.stats.network_covariance`.
+
+ .. warning::
+ Test before use.
+
+ Parameters
+ ----------
+ joint_pmat : (N, N) array_like
+ Joint probability matrix. Please make sure that it is valid.
+ D : (N, N) array_like
+ Distance matrix.
+ calc_marginal : bool, optional
+ Whether to calculate marginal variance. It will be marginally faster if
+ :code:`calc_marginal=False` (returning marginal variances as 0). Default: True
+
+ Returns
+ -------
+ network_covariance : float
+ Covariance of `joint_pmat` on `D`
+ var_p : float
+ Marginal variance of `joint_pmat` on `D`.
+ Will be 0 if :code:`calc_marginal=False`
+ var_q : float
+ Marginal variance of `joint_pmat` on `D`.
+ Will be 0 if :code:`calc_marginal=False`
+ """
+ n = joint_pmat.shape[0]
+ p = np.sum(joint_pmat, axis=1)
+ q = np.sum(joint_pmat, axis=0)
+ cov = 0.0
+ var_p, var_q = 0.0, 0.0
+ for i in range(n):
+ for j in range(n):
+ cov += (p[i] * q[j] - joint_pmat[i, j]) * D[i, j]**2
+ if calc_marginal:
+ var_p += p[i] * p[j] * D[i, j]**2
+ var_q += q[i] * q[j] * D[i, j]**2
+ return 0.5 * cov, 0.5 * var_p, 0.5 * var_q
+
+
+if use_numba:
+ network_covariance_numba = njit(network_covariance_numba)
diff --git a/netneurotools/metrics/tests/__init__.py b/netneurotools/metrics/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/netneurotools/tests/test_metrics.py b/netneurotools/metrics/tests/test_bct.py
similarity index 72%
rename from netneurotools/tests/test_metrics.py
rename to netneurotools/metrics/tests/test_bct.py
index 253da0f..f83ab6a 100644
--- a/netneurotools/tests/test_metrics.py
+++ b/netneurotools/metrics/tests/test_bct.py
@@ -1,15 +1,15 @@
-# -*- coding: utf-8 -*-
-"""For testing netneurotools.metrics functionality."""
+"""For testing netneurotools.metrics.bct functionality."""
-import numpy as np
import pytest
+import numpy as np
from netneurotools import metrics
rs = np.random.RandomState(1234)
-def test_communicability():
+def test_communicability_bin():
+ """Test communicability_bin function."""
comm = metrics.communicability_bin(rs.choice([0, 1], size=(100, 100)))
assert comm.shape == (100, 100)
@@ -18,6 +18,7 @@ def test_communicability():
def test_communicability_wei():
+ """Test communicability_wei function."""
comm = metrics.communicability_wei(rs.rand(100, 100))
assert comm.shape == (100, 100)
assert np.allclose(np.diag(comm), 0)
diff --git a/netneurotools/metrics/tests/test_communication.py b/netneurotools/metrics/tests/test_communication.py
new file mode 100644
index 0000000..dd066f8
--- /dev/null
+++ b/netneurotools/metrics/tests/test_communication.py
@@ -0,0 +1 @@
+"""For testing netneurotools.metrics.communication functionality."""
diff --git a/netneurotools/metrics/tests/test_control.py b/netneurotools/metrics/tests/test_control.py
new file mode 100644
index 0000000..28ad7c2
--- /dev/null
+++ b/netneurotools/metrics/tests/test_control.py
@@ -0,0 +1 @@
+"""For testing netneurotools.metrics.control functionality."""
diff --git a/netneurotools/metrics/tests/test_spreading.py b/netneurotools/metrics/tests/test_spreading.py
new file mode 100644
index 0000000..216c638
--- /dev/null
+++ b/netneurotools/metrics/tests/test_spreading.py
@@ -0,0 +1 @@
+"""For testing netneurotools.metrics.spreading functionality."""
diff --git a/netneurotools/metrics/tests/test_statistical.py b/netneurotools/metrics/tests/test_statistical.py
new file mode 100644
index 0000000..73caf2d
--- /dev/null
+++ b/netneurotools/metrics/tests/test_statistical.py
@@ -0,0 +1 @@
+"""For testing netneurotools.metrics.statistical functionality."""
diff --git a/netneurotools/modularity.py b/netneurotools/modularity.py
deleted file mode 100644
index 1831dd9..0000000
--- a/netneurotools/modularity.py
+++ /dev/null
@@ -1,316 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Functions for working with network modularity."""
-
-import bct
-import numpy as np
-from sklearn.utils.validation import check_random_state
-from . import cluster
-
-try:
- from numba import njit, prange
- use_numba = True
-except ImportError:
- prange = range
- use_numba = False
-
-
-def consensus_modularity(adjacency, gamma=1, B='modularity',
- repeats=250, null_func=np.mean, seed=None):
- """
- Find community assignments from `adjacency` through consensus.
-
- Performs `repeats` iterations of community detection on `adjacency` and
- then uses consensus clustering on the resulting community assignments.
-
- Parameters
- ----------
- adjacency : (N, N) array_like
- Adjacency matrix (weighted/non-weighted) on which to perform consensus
- community detection.
- gamma : float, optional
- Resolution parameter for modularity maximization. Default: 1
- B : str or (N, N) array_like, optional
- Null model to use for consensus clustering. If `str`, must be one of
- ['modularity', 'potts', 'negative_sym', 'negative_asym']. Default:
- 'modularity'
- repeats : int, optional
- Number of times to repeat Louvain algorithm clustering. Default: 250
- null_func : callable, optional
- Function used to generate null model when performing consensus-based
- clustering. Must accept a 2D array as input and return a single value.
- Default: `np.mean`
- seed : {int, np.random.RandomState instance, None}, optional
- Seed for random number generation. Default: None
-
- Returns
- -------
- consensus : (N,) np.ndarray
- Consensus-derived community assignments
- Q_all : array_like
- Optimized modularity over all `repeats` community assignments
- zrand_all : array_like
- z-Rand score over all pairs of `repeats` community assignment vectors
-
- References
- ----------
- Bassett, D. S., Porter, M. A., Wymbs, N. F., Grafton, S. T., Carlson,
- J. M., & Mucha, P. J. (2013). Robust detection of dynamic community
- structure in networks. Chaos: An Interdisciplinary Journal of Nonlinear
- Science, 23(1), 013142.
- """
- # generate community partitions `repeat` times
- comms, Q_all = zip(*[bct.community_louvain(adjacency, gamma=gamma, B=B)
- for i in range(repeats)])
- comms = np.column_stack(comms)
-
- # find consensus cluster assignments across all partitoning solutions
- consensus = cluster.find_consensus(comms, null_func=null_func, seed=seed)
-
- # get z-rand statistics for partition similarity (n.b. can take a while)
- zrand_all = _zrand_partitions(comms)
-
- return consensus, np.array(Q_all), zrand_all
-
-
-def _dummyvar(labels):
- """
- Generate dummy-coded array from provided community assignment `labels`.
-
- Parameters
- ----------
- labels : (N,) array_like
- Labels assigning `N` samples to `G` groups
-
- Returns
- -------
- ci : (N, G) numpy.ndarray
- Dummy-coded array where 1 indicates that a sample belongs to a group
- """
- comms = np.unique(labels)
-
- ci = np.zeros((len(labels), len(comms)))
- for n, grp in enumerate(comms):
- ci[:, n] = labels == grp
-
- return ci
-
-
-def zrand(X, Y):
- """
- Calculate the z-Rand index of two community assignments.
-
- Parameters
- ----------
- X, Y : (n, 1) array_like
- Community assignment vectors to compare
-
- Returns
- -------
- z_rand : float
- Z-rand index
-
- References
- ----------
- Amanda L. Traud, Eric D. Kelsic, Peter J. Mucha, and Mason A. Porter.
- (2011). Comparing Community Structure to Characteristics in Online
- Collegiate Social Networks. SIAM Review, 53, 526-543.
- """
- if X.ndim > 1 or Y.ndim > 1:
- if X.shape[-1] > 1 or Y.shape[-1] > 1:
- raise ValueError('X and Y must have only one-dimension each. '
- 'Please check inputs.')
-
- Xf = X.flatten()
- Yf = Y.flatten()
-
- n = len(Xf)
- indx, indy = _dummyvar(Xf), _dummyvar(Yf)
- Xa = indx.dot(indx.T)
- Ya = indy.dot(indy.T)
-
- M = n * (n - 1) / 2
- M1 = Xa.nonzero()[0].size / 2
- M2 = Ya.nonzero()[0].size / 2
-
- wab = np.logical_and(Xa, Ya).nonzero()[0].size / 2
-
- mod = n * (n**2 - 3 * n - 2)
- C1 = mod - (8 * (n + 1) * M1) + (4 * np.power(indx.sum(0), 3).sum())
- C2 = mod - (8 * (n + 1) * M2) + (4 * np.power(indy.sum(0), 3).sum())
-
- a = M / 16
- b = ((4 * M1 - 2 * M)**2) * ((4 * M2 - 2 * M)**2) / (256 * (M**2))
- c = C1 * C2 / (16 * n * (n - 1) * (n - 2))
- d = ((((4 * M1 - 2 * M)**2) - (4 * C1) - (4 * M))
- * (((4 * M2 - 2 * M)**2) - (4 * C2) - (4 * M))
- / (64 * n * (n - 1) * (n - 2) * (n - 3)))
-
- sigw2 = a - b + c + d
- # catch any negatives
- if sigw2 < 0:
- return 0
- z_rand = (wab - ((M1 * M2) / M)) / np.sqrt(sigw2)
-
- return z_rand
-
-
-def _zrand_partitions(communities):
- """
- Calculate z-Rand for all pairs of assignments in `communities`.
-
- Iterates through every pair of community assignment vectors in
- `communities` and calculates the z-Rand score to assess their similarity.
-
- Parameters
- ----------
- communities : (S, R) array_like
- Community assignments for `S` samples over `R` partitions
-
- Returns
- -------
- all_zrand : array_like
- z-Rand score over all pairs of `R` partitions of community assignments
- """
- n_partitions = communities.shape[-1]
- all_zrand = np.zeros(int(n_partitions * (n_partitions - 1) / 2))
-
- for c1 in prange(n_partitions):
- for c2 in prange(c1 + 1, n_partitions):
- idx = int((c1 * n_partitions) + c2 - ((c1 + 1) * (c1 + 2) // 2))
- all_zrand[idx] = zrand(communities[:, c1], communities[:, c2])
-
- return all_zrand
-
-
-if use_numba:
- _dummyvar = njit(_dummyvar)
- zrand = njit(zrand)
- _zrand_partitions = njit(_zrand_partitions, parallel=True)
-
-
-def get_modularity(adjacency, comm, gamma=1):
- """
- Calculate modularity contribution for each community in `comm`.
-
- Parameters
- ----------
- adjacency : (N, N) array_like
- Adjacency (e.g., correlation) matrix
- comm : (N,) array_like
- Community assignment vector splitting `N` subjects into `G` groups
- gamma : float, optional
- Resolution parameter used in original modularity maximization.
- Default: 1
-
- Returns
- -------
- comm_q : (G,) ndarray
- Relative modularity for each community
-
- See Also
- --------
- netneurotools.modularity.get_modularity_z
- netneurotools.modularity.get_modularity_sig
- """
- adjacency, comm = np.asarray(adjacency), np.asarray(comm)
- s = adjacency.sum()
- B = adjacency - (gamma * np.outer(adjacency.sum(axis=1),
- adjacency.sum(axis=0)) / s)
-
- # find modularity contribution of each community
- communities = np.unique(comm)
- comm_q = np.empty(shape=communities.size)
- for n, ci in enumerate(communities):
- inds = comm == ci
- comm_q[n] = B[np.ix_(inds, inds)].sum() / s
-
- return comm_q
-
-
-def get_modularity_z(adjacency, comm, gamma=1, n_perm=10000, seed=None):
- """
- Calculate average z-score of community assignments by permutation.
-
- Parameters
- ----------
- adjacency : (N, N) array_like
- Adjacency (correlation) matrix
- comm : (N,) array_like
- Community assignment vector splitting `N` subjects into `G` groups
- gamma : float, optional
- Resolution parameter used in original modularity maximization.
- Default: 1
- n_perm : int, optional
- Number of permutations. Default: 10000
- seed : {int, np.random.RandomState instance, None}, optional
- Seed for random number generation. Default: None
-
- Returns
- -------
- q_z : float
- Average Z-score of modularity of communities
-
- See Also
- --------
- netneurotools.modularity.get_modularity
- netneurotools.modularity.get_modularity_sig
- """
- rs = check_random_state(seed)
-
- real_qs = get_modularity(adjacency, comm, gamma)
- simu_qs = np.empty(shape=(np.unique(comm).size, n_perm))
- for perm in range(n_perm):
- simu_qs[:, perm] = get_modularity(adjacency,
- rs.permutation(comm),
- gamma)
-
- # avoid instances where dist.std(1) == 0
- std = simu_qs.std(axis=1)
- if std == 0:
- return np.mean(real_qs - simu_qs.mean(axis=1))
- else:
- return np.mean((real_qs - simu_qs.mean(axis=1)) / std)
-
-
-def get_modularity_sig(adjacency, comm, gamma=1, n_perm=10000, alpha=0.01,
- seed=None):
- """
- Calculate significance of community assignments in `comm` by permutation.
-
- Parameters
- ----------
- adjacency : (N, N) array_like
- Adjacency (correlation) matrix
- comm : (N,) array_like
- Community assignment vector
- gamma : float
- Resolution parameter used in original modularity maximization
- n_perm : int, optional
- Number of permutations to test against. Default: 10000
- alpha : (0,1) float, optional
- Alpha level to assess significance. Default: 0.01
- seed : {int, np.random.RandomState instance, None}, optional
- Seed for random number generation. Default: None
-
- Returns
- -------
- ndarray
- Significance of each community in `comm` (boolean)
-
- See Also
- --------
- netneurotools.modularity.get_modularity_z
- netneurotools.modularity.get_modularity_sig
- """
- rs = check_random_state(seed)
-
- real_qs = get_modularity(adjacency, comm, gamma)
- simu_qs = np.empty(shape=(np.unique(comm).size, n_perm))
- for perm in range(n_perm):
- simu_qs[:, perm] = get_modularity(adjacency,
- rs.permutation(comm),
- gamma)
-
- q_sig = real_qs > np.percentile(simu_qs, 100 * (1 - alpha), axis=1)
-
- return q_sig
diff --git a/netneurotools/modularity/__init__.py b/netneurotools/modularity/__init__.py
new file mode 100644
index 0000000..2fe84ad
--- /dev/null
+++ b/netneurotools/modularity/__init__.py
@@ -0,0 +1,25 @@
+"""Functions for working with network modularity."""
+
+
+from .modules import (
+ match_cluster_labels,
+ match_assignments,
+ reorder_assignments,
+ find_consensus,
+ consensus_modularity,
+ _dummyvar,
+ zrand,
+ _zrand_partitions,
+ get_modularity,
+ get_modularity_z,
+ get_modularity_sig,
+)
+
+
+__all__ = [
+ # modules
+ 'match_cluster_labels', 'match_assignments', 'reorder_assignments',
+ 'find_consensus', 'consensus_modularity', '_dummyvar', 'zrand',
+ '_zrand_partitions', 'get_modularity', 'get_modularity_z',
+ 'get_modularity_sig',
+]
diff --git a/netneurotools/cluster.py b/netneurotools/modularity/modules.py
similarity index 56%
rename from netneurotools/cluster.py
rename to netneurotools/modularity/modules.py
index 4b46a9d..120e984 100644
--- a/netneurotools/cluster.py
+++ b/netneurotools/modularity/modules.py
@@ -1,11 +1,17 @@
-# -*- coding: utf-8 -*-
-"""Functions for clustering and working with cluster solutions."""
+"""Functions for working with network modules."""
import bct
import numpy as np
+from sklearn.utils.validation import check_random_state
from scipy import optimize
from scipy.cluster import hierarchy
-from sklearn.utils.validation import check_random_state
+
+try:
+ from numba import njit, prange
+ use_numba = True
+except ImportError:
+ prange = range
+ use_numba = False
def _get_relabels(c1, c2):
@@ -64,14 +70,14 @@ def match_cluster_labels(source, target):
Examples
--------
- >>> from netneurotools import cluster
+ >>> from netneurotools import modularity
When cluster labels are perfectly matched but e.g., inverted the function
will find a perfect mapping:
>>> a = np.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0])
>>> b = np.array([0, 0, 0, 1, 1, 1, 1, 1, 1, 1])
- >>> cluster.match_cluster_labels(a, b)
+ >>> modularity.match_cluster_labels(a, b)
array([0, 0, 0, 1, 1, 1, 1, 1, 1, 1])
However, the mapping will work even when cluster assignments between the
@@ -80,13 +86,13 @@ def match_cluster_labels(source, target):
>>> a = np.array([0, 0, 0, 2, 2, 2, 2, 1, 1, 1])
>>> b = np.array([1, 1, 1, 0, 0, 0, 0, 0, 0, 0])
- >>> cluster.match_cluster_labels(a, b)
+ >>> modularity.match_cluster_labels(a, b)
array([1, 1, 1, 0, 0, 0, 0, 2, 2, 2])
If the source assignment has fewer clusters than the target the returned
values may be discontinuous:
- >>> cluster.match_cluster_labels(b, a)
+ >>> modularity.match_cluster_labels(b, a)
array([0, 0, 0, 2, 2, 2, 2, 2, 2, 2])
"""
# try and match the source to target
@@ -137,7 +143,7 @@ def match_assignments(assignments, target=None, seed=None):
Examples
--------
- >>> from netneurotools import cluster
+ >>> from netneurotools import modularity
First we can construct a matrix of `N` samples clustered `M` times (in this
case, `M` is three) . Since cluster labels are generally arbitrary we can
@@ -157,7 +163,7 @@ def match_assignments(assignments, target=None, seed=None):
of the columns will be randomly picked as the "target" solution, we provide
a `seed` to ensure reproducibility in the selection:
- >>> cluster.match_assignments(assignments, seed=1234)
+ >>> modularity.match_assignments(assignments, seed=1234)
array([[1, 1, 1],
[1, 1, 1],
[1, 1, 1],
@@ -179,7 +185,7 @@ def match_assignments(assignments, target=None, seed=None):
... [1, 2, 0],
... [1, 1, 2],
... [1, 1, 2]])
- >>> cluster.match_assignments(assignments)
+ >>> modularity.match_assignments(assignments)
array([[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
@@ -362,3 +368,305 @@ def find_consensus(assignments, null_func=np.mean, return_agreement=False,
return consensus.astype(int), agreement * (agreement > threshold)
return consensus.astype(int)
+
+
+def consensus_modularity(adjacency, gamma=1, B='modularity',
+ repeats=250, null_func=np.mean, seed=None):
+ """
+ Find community assignments from `adjacency` through consensus.
+
+ Performs `repeats` iterations of community detection on `adjacency` and
+ then uses consensus clustering on the resulting community assignments.
+
+ Parameters
+ ----------
+ adjacency : (N, N) array_like
+ Adjacency matrix (weighted/non-weighted) on which to perform consensus
+ community detection.
+ gamma : float, optional
+ Resolution parameter for modularity maximization. Default: 1
+ B : str or (N, N) array_like, optional
+ Null model to use for consensus clustering. If `str`, must be one of
+ ['modularity', 'potts', 'negative_sym', 'negative_asym']. Default:
+ 'modularity'
+ repeats : int, optional
+ Number of times to repeat Louvain algorithm clustering. Default: 250
+ null_func : callable, optional
+ Function used to generate null model when performing consensus-based
+ clustering. Must accept a 2D array as input and return a single value.
+ Default: `np.mean`
+ seed : {int, np.random.RandomState instance, None}, optional
+ Seed for random number generation. Default: None
+
+ Returns
+ -------
+ consensus : (N,) np.ndarray
+ Consensus-derived community assignments
+ Q_all : array_like
+ Optimized modularity over all `repeats` community assignments
+ zrand_all : array_like
+ z-Rand score over all pairs of `repeats` community assignment vectors
+
+ References
+ ----------
+ Bassett, D. S., Porter, M. A., Wymbs, N. F., Grafton, S. T., Carlson,
+ J. M., & Mucha, P. J. (2013). Robust detection of dynamic community
+ structure in networks. Chaos: An Interdisciplinary Journal of Nonlinear
+ Science, 23(1), 013142.
+ """
+ # generate community partitions `repeat` times
+ comms, Q_all = zip(*[bct.community_louvain(adjacency, gamma=gamma, B=B)
+ for i in range(repeats)])
+ comms = np.column_stack(comms)
+
+ # find consensus cluster assignments across all partitoning solutions
+ consensus = find_consensus(comms, null_func=null_func, seed=seed)
+
+ # get z-rand statistics for partition similarity (n.b. can take a while)
+ zrand_all = _zrand_partitions(comms)
+
+ return consensus, np.array(Q_all), zrand_all
+
+
+def _dummyvar(labels):
+ """
+ Generate dummy-coded array from provided community assignment `labels`.
+
+ Parameters
+ ----------
+ labels : (N,) array_like
+ Labels assigning `N` samples to `G` groups
+
+ Returns
+ -------
+ ci : (N, G) numpy.ndarray
+ Dummy-coded array where 1 indicates that a sample belongs to a group
+ """
+ comms = np.unique(labels)
+
+ ci = np.zeros((len(labels), len(comms)))
+ for n, grp in enumerate(comms):
+ ci[:, n] = labels == grp
+
+ return ci
+
+
+def zrand(X, Y):
+ """
+ Calculate the z-Rand index of two community assignments.
+
+ Parameters
+ ----------
+ X, Y : (n, 1) array_like
+ Community assignment vectors to compare
+
+ Returns
+ -------
+ z_rand : float
+ Z-rand index
+
+ References
+ ----------
+ Amanda L. Traud, Eric D. Kelsic, Peter J. Mucha, and Mason A. Porter.
+ (2011). Comparing Community Structure to Characteristics in Online
+ Collegiate Social Networks. SIAM Review, 53, 526-543.
+ """
+ if X.ndim > 1 or Y.ndim > 1:
+ if X.shape[-1] > 1 or Y.shape[-1] > 1:
+ raise ValueError('X and Y must have only one-dimension each. '
+ 'Please check inputs.')
+
+ Xf = X.flatten()
+ Yf = Y.flatten()
+
+ n = len(Xf)
+ indx, indy = _dummyvar(Xf), _dummyvar(Yf)
+ Xa = indx.dot(indx.T)
+ Ya = indy.dot(indy.T)
+
+ M = n * (n - 1) / 2
+ M1 = Xa.nonzero()[0].size / 2
+ M2 = Ya.nonzero()[0].size / 2
+
+ wab = np.logical_and(Xa, Ya).nonzero()[0].size / 2
+
+ mod = n * (n**2 - 3 * n - 2)
+ C1 = mod - (8 * (n + 1) * M1) + (4 * np.power(indx.sum(0), 3).sum())
+ C2 = mod - (8 * (n + 1) * M2) + (4 * np.power(indy.sum(0), 3).sum())
+
+ a = M / 16
+ b = ((4 * M1 - 2 * M)**2) * ((4 * M2 - 2 * M)**2) / (256 * (M**2))
+ c = C1 * C2 / (16 * n * (n - 1) * (n - 2))
+ d = ((((4 * M1 - 2 * M)**2) - (4 * C1) - (4 * M))
+ * (((4 * M2 - 2 * M)**2) - (4 * C2) - (4 * M))
+ / (64 * n * (n - 1) * (n - 2) * (n - 3)))
+
+ sigw2 = a - b + c + d
+ # catch any negatives
+ if sigw2 < 0:
+ return 0
+ z_rand = (wab - ((M1 * M2) / M)) / np.sqrt(sigw2)
+
+ return z_rand
+
+
+def _zrand_partitions(communities):
+ """
+ Calculate z-Rand for all pairs of assignments in `communities`.
+
+ Iterates through every pair of community assignment vectors in
+ `communities` and calculates the z-Rand score to assess their similarity.
+
+ Parameters
+ ----------
+ communities : (S, R) array_like
+ Community assignments for `S` samples over `R` partitions
+
+ Returns
+ -------
+ all_zrand : array_like
+ z-Rand score over all pairs of `R` partitions of community assignments
+ """
+ n_partitions = communities.shape[-1]
+ all_zrand = np.zeros(int(n_partitions * (n_partitions - 1) / 2))
+
+ for c1 in prange(n_partitions):
+ for c2 in prange(c1 + 1, n_partitions):
+ idx = int((c1 * n_partitions) + c2 - ((c1 + 1) * (c1 + 2) // 2))
+ all_zrand[idx] = zrand(communities[:, c1], communities[:, c2])
+
+ return all_zrand
+
+
+if use_numba:
+ _dummyvar = njit(_dummyvar)
+ zrand = njit(zrand)
+ _zrand_partitions = njit(_zrand_partitions, parallel=True)
+
+
+def get_modularity(adjacency, comm, gamma=1):
+ """
+ Calculate modularity contribution for each community in `comm`.
+
+ Parameters
+ ----------
+ adjacency : (N, N) array_like
+ Adjacency (e.g., correlation) matrix
+ comm : (N,) array_like
+ Community assignment vector splitting `N` subjects into `G` groups
+ gamma : float, optional
+ Resolution parameter used in original modularity maximization.
+ Default: 1
+
+ Returns
+ -------
+ comm_q : (G,) ndarray
+ Relative modularity for each community
+
+ See Also
+ --------
+ netneurotools.modularity.get_modularity_z
+ netneurotools.modularity.get_modularity_sig
+ """
+ adjacency, comm = np.asarray(adjacency), np.asarray(comm)
+ s = adjacency.sum()
+ B = adjacency - (gamma * np.outer(adjacency.sum(axis=1),
+ adjacency.sum(axis=0)) / s)
+
+ # find modularity contribution of each community
+ communities = np.unique(comm)
+ comm_q = np.empty(shape=communities.size)
+ for n, ci in enumerate(communities):
+ inds = comm == ci
+ comm_q[n] = B[np.ix_(inds, inds)].sum() / s
+
+ return comm_q
+
+
+def get_modularity_z(adjacency, comm, gamma=1, n_perm=10000, seed=None):
+ """
+ Calculate average z-score of community assignments by permutation.
+
+ Parameters
+ ----------
+ adjacency : (N, N) array_like
+ Adjacency (correlation) matrix
+ comm : (N,) array_like
+ Community assignment vector splitting `N` subjects into `G` groups
+ gamma : float, optional
+ Resolution parameter used in original modularity maximization.
+ Default: 1
+ n_perm : int, optional
+ Number of permutations. Default: 10000
+ seed : {int, np.random.RandomState instance, None}, optional
+ Seed for random number generation. Default: None
+
+ Returns
+ -------
+ q_z : float
+ Average Z-score of modularity of communities
+
+ See Also
+ --------
+ netneurotools.modularity.get_modularity
+ netneurotools.modularity.get_modularity_sig
+ """
+ rs = check_random_state(seed)
+
+ real_qs = get_modularity(adjacency, comm, gamma)
+ simu_qs = np.empty(shape=(np.unique(comm).size, n_perm))
+ for perm in range(n_perm):
+ simu_qs[:, perm] = get_modularity(adjacency,
+ rs.permutation(comm),
+ gamma)
+
+ # avoid instances where dist.std(1) == 0
+ std = simu_qs.std(axis=1)
+ if std == 0:
+ return np.mean(real_qs - simu_qs.mean(axis=1))
+ else:
+ return np.mean((real_qs - simu_qs.mean(axis=1)) / std)
+
+
+def get_modularity_sig(adjacency, comm, gamma=1, n_perm=10000, alpha=0.01,
+ seed=None):
+ """
+ Calculate significance of community assignments in `comm` by permutation.
+
+ Parameters
+ ----------
+ adjacency : (N, N) array_like
+ Adjacency (correlation) matrix
+ comm : (N,) array_like
+ Community assignment vector
+ gamma : float
+ Resolution parameter used in original modularity maximization
+ n_perm : int, optional
+ Number of permutations to test against. Default: 10000
+ alpha : (0,1) float, optional
+ Alpha level to assess significance. Default: 0.01
+ seed : {int, np.random.RandomState instance, None}, optional
+ Seed for random number generation. Default: None
+
+ Returns
+ -------
+ ndarray
+ Significance of each community in `comm` (boolean)
+
+ See Also
+ --------
+ netneurotools.modularity.get_modularity_z
+ netneurotools.modularity.get_modularity_sig
+ """
+ rs = check_random_state(seed)
+
+ real_qs = get_modularity(adjacency, comm, gamma)
+ simu_qs = np.empty(shape=(np.unique(comm).size, n_perm))
+ for perm in range(n_perm):
+ simu_qs[:, perm] = get_modularity(adjacency,
+ rs.permutation(comm),
+ gamma)
+
+ q_sig = real_qs > np.percentile(simu_qs, 100 * (1 - alpha), axis=1)
+
+ return q_sig
diff --git a/netneurotools/modularity/tests/__init__.py b/netneurotools/modularity/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/netneurotools/tests/test_cluster.py b/netneurotools/modularity/tests/test_modules.py
similarity index 58%
rename from netneurotools/tests/test_cluster.py
rename to netneurotools/modularity/tests/test_modules.py
index 59b9f8a..64248b9 100644
--- a/netneurotools/tests/test_cluster.py
+++ b/netneurotools/modularity/tests/test_modules.py
@@ -1,12 +1,13 @@
-# -*- coding: utf-8 -*-
-"""For testing netneurotools.cluster functionality."""
+"""For testing netneurotools.modularity.modules functionality."""
import bct
-import numpy as np
import pytest
+import numpy as np
from sklearn.cluster import k_means, spectral_clustering
-from netneurotools import cluster
+from netneurotools import modularity
+
+rs = np.random.RandomState(1234)
@pytest.mark.parametrize('c1, c2, out', [
@@ -28,10 +29,12 @@
np.array([1, 1, 1, 3, 3, 3, 2, 2, 2]))
])
def test_match_cluster_labels(c1, c2, out):
- assert np.all(cluster.match_cluster_labels(c1, c2) == out)
+ """Test matching of cluster labels."""
+ assert np.all(modularity.match_cluster_labels(c1, c2) == out)
def test_match_assignments():
+ """Test matching of clustering assignments."""
# generate some random data to be clustered (must be symmetric)
rs = np.random.RandomState(1234)
data = rs.rand(100, 100)
@@ -48,7 +51,7 @@ def test_match_assignments():
# match labels and assert that we got perfect matches (this is not 100%
# guaranteed with spectral clustering but it is...pretty likely)
- matched = cluster.match_assignments(assignments, seed=rs)
+ matched = modularity.match_assignments(assignments, seed=rs)
assert np.all(matched[:, [0]] == matched)
# check that we didn't _actually_ change cluster assignments with matching;
@@ -58,6 +61,7 @@ def test_match_assignments():
def test_reorder_assignments():
+ """Test re-ordering of clustering assignments."""
# generate a bunch of ~random(ish) clustering assignments that have a bit
# of consistency but aren't all identical
rs = np.random.RandomState(1234)
@@ -72,11 +76,11 @@ def test_reorder_assignments():
# (we're re-labelling the matrix but k-means does not provide stable
# clustering assignments so we shouldn't get identical assignments even
# after "matching")
- reordered, idx = cluster.reorder_assignments(assignments, seed=1234)
+ reordered, idx = modularity.reorder_assignments(assignments, seed=1234)
assert not np.all(reordered[:, [0]] == reordered)
# make sure that the returned idx does exactly what it's supposed to
- matched = cluster.match_assignments(assignments, seed=1234)[idx]
+ matched = modularity.match_assignments(assignments, seed=1234)[idx]
assert np.all(matched == reordered)
@@ -87,4 +91,44 @@ def test_reorder_assignments():
np.array([1, 1, 1, 2, 2, 2, 3, 3, 3]))
])
def test_find_consensus(assignments, clusters):
- assert np.all(cluster.find_consensus(assignments) == clusters)
+ """Test finding consensus clustering."""
+ assert np.all(modularity.find_consensus(assignments) == clusters)
+
+
+def test_dummyvar():
+ """Test generation of dummy variables."""
+ # generate small example dummy variable code
+ out = modularity._dummyvar(np.array([1, 1, 2, 3, 3]))
+ assert np.all(out == np.array([[1, 0, 0],
+ [1, 0, 0],
+ [0, 1, 0],
+ [0, 0, 1],
+ [0, 0, 1]]))
+
+ allones = np.array([1, 1, 1, 1, 1, 1, 1, 1])
+ assert np.all(modularity._dummyvar(allones) == allones)
+
+
+def test_zrand():
+ """Test calculation of zrand."""
+ # make the same two-group community assignments (with different labels)
+ label = np.ones((100, 1))
+ X, Y = np.vstack((label, label * 2)), np.vstack((label * 2, label))
+ # compare
+ assert modularity.zrand(X, Y) == modularity.zrand(X, Y[::-1])
+ random = rs.choice([0, 1], size=X.shape)
+ assert modularity.zrand(X, Y) > modularity.zrand(X, random)
+ assert modularity.zrand(X, Y) == modularity.zrand(X[:, 0], Y[:, 0])
+
+
+def test_zrand_partitions():
+ """Test calculation of zrand for partitions."""
+ # make random communities
+ comm = rs.choice(range(6), size=(10, 100))
+ all_diff = modularity._zrand_partitions(comm)
+ all_same = modularity._zrand_partitions(np.repeat(comm[:, [0]], 10, axis=1))
+
+ # partition of labels that are all the same should have higher average
+ # zrand and lower stdev zrand
+ assert np.nanmean(all_same) > np.nanmean(all_diff)
+ assert np.nanstd(all_same) < np.nanstd(all_diff)
diff --git a/netneurotools/networks.py b/netneurotools/networks.py
deleted file mode 100644
index 1803f8c..0000000
--- a/netneurotools/networks.py
+++ /dev/null
@@ -1,1222 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Functions for generating group-level networks from individual measurements."""
-
-import bct
-import numpy as np
-from tqdm import tqdm
-from scipy.sparse import csgraph
-from sklearn.utils.validation import (check_random_state, check_array,
- check_consistent_length)
-
-from . import utils
-
-try:
- from numba import njit
- use_numba = True
-except ImportError:
- use_numba = False
-
-
-def func_consensus(data, n_boot=1000, ci=95, seed=None):
- """
- Calculate thresholded group consensus functional connectivity graph.
-
- This function concatenates all time series in `data` and computes a group
- correlation matrix based on this extended time series. It then generates
- length `T` bootstrapped samples from the concatenated matrix and estimates
- confidence intervals for all correlations. Correlations whose sign is
- consistent across bootstraps are retained; inconsistent correlations are
- set to zero.
-
- If `n_boot` is set to 0 or None a simple, group-averaged functional
- connectivity matrix is estimated, instead.
-
- Parameters
- ----------
- data : (N, T, S) array_like (or a list of S arrays, each shaped as (N, T))
- Pre-processed functional time series, where `N` is the number of nodes,
- `T` is the number of volumes in the time series, and `S` is the number
- of subjects.
- n_boot : int, optional
- Number of bootstraps for which to generate correlation. Default: 1000
- ci : (0, 100) float, optional
- Confidence interval for which to assess the reliability of correlations
- with bootstraps. Default: 95
- seed : int, optional
- Random seed. Default: None
-
- Returns
- -------
- consensus : (N, N) numpy.ndarray
- Thresholded, group-level correlation matrix
-
- References
- ----------
- Mišić, B., Betzel, R. F., Nematzadeh, A., Goni, J., Griffa, A., Hagmann,
- P., Flammini, A., Ahn, Y.-Y., & Sporns, O. (2015). Cooperative and
- competitive spreading dynamics on the human connectome. Neuron, 86(6),
- 1518-1529.
- """
- # check inputs
- rs = check_random_state(seed)
- if ci > 100 or ci < 0:
- raise ValueError("`ci` must be between 0 and 100.")
-
- # group-average functional connectivity matrix desired instead of bootstrap
- if n_boot == 0 or n_boot is None:
- if isinstance(data, list):
- corrs = [np.corrcoef(sub) for sub in data]
- else:
- corrs = [np.corrcoef(data[..., sub]) for sub in
- range(data.shape[-1])]
- return np.nanmean(corrs, axis=0)
-
- if isinstance(data, list):
- collapsed_data = np.hstack(data)
- nsample = int(collapsed_data.shape[-1] / len(data))
- else:
- collapsed_data = data.reshape((len(data), -1), order='F')
- nsample = data.shape[1]
-
- consensus = np.corrcoef(collapsed_data)
-
- # only keep the upper triangle for the bootstraps to save on memory usage
- triu_inds = np.triu_indices_from(consensus, k=1)
- bootstrapped_corrmat = np.zeros((len(triu_inds[0]), n_boot))
-
- # generate `n_boot` bootstrap correlation matrices by sampling `t` time
- # points from the concatenated time series
- for boot in range(n_boot):
- inds = rs.randint(collapsed_data.shape[-1], size=nsample)
- bootstrapped_corrmat[..., boot] = \
- np.corrcoef(collapsed_data[:, inds])[triu_inds]
-
- # extract the CIs from the bootstrapped correlation matrices
- # we don't need the input anymore so overwrite it
- bootstrapped_ci = np.percentile(bootstrapped_corrmat, [100 - ci, ci],
- axis=-1, overwrite_input=True)
-
- # remove unreliable (i.e., CI zero-crossing) correlations
- # if the signs of the bootstrapped confidence intervals are different
- # (i.e., their signs sum to 0), then we want to remove them
- # so, take the logical not of the CI (CI = 0 ---> True) and create a mask
- # then, set all connections from the consensus array inside the mask to 0
- remove_inds = np.logical_not(np.sign(bootstrapped_ci).sum(axis=0))
- mask = np.zeros_like(consensus, dtype=bool)
- mask[triu_inds] = remove_inds
- consensus[mask + mask.T] = 0
-
- return consensus
-
-
-def _ecdf(data):
- """
- Estimate empirical cumulative distribution function of `data`.
-
- Taken directly from StackOverflow. See original answer at
- https://stackoverflow.com/questions/33345780.
-
- Parameters
- ----------
- data : array_like
-
- Returns
- -------
- prob : numpy.ndarray
- Cumulative probability
- quantiles : numpy.darray
- Quantiles
- """
- sample = np.atleast_1d(data)
-
- # find the unique values and their corresponding counts
- quantiles, counts = np.unique(sample, return_counts=True)
-
- # take the cumulative sum of the counts and divide by the sample size to
- # get the cumulative probabilities between 0 and 1
- prob = np.cumsum(counts).astype(float) / sample.size
-
- # match MATLAB
- prob, quantiles = np.append([0], prob), np.append(quantiles[0], quantiles)
-
- return prob, quantiles
-
-
-def struct_consensus(data, distance, hemiid,
- conn_num_inter=None,
- conn_num_intra=None,
- weighted=False):
- """
- Calculate distance-dependent group consensus structural connectivity graph.
-
- Takes as input a weighted stack of connectivity matrices with dimensions
- (N, N, S) where `N` is the number of nodes and `S` is the number of
- matrices or subjects. The matrices must be weighted, and ideally with
- continuous weights (e.g. fractional anisotropy rather than streamline
- count). The second input is a pairwise distance matrix, where distance(i,j)
- is the Euclidean distance between nodes i and j. The final input is an
- (N, 1) vector which labels nodes as belonging to the right (`hemiid==0`) or
- left (`hemiid=1`) hemisphere (note that these values can be flipped as long
- as `hemiid` contains only values of 0 and 1).
-
- This function estimates the average edge length distribution and builds
- a group-averaged connectivity matrix that approximates this distribution
- with density equal to the mean density across subjects.
-
- The algorithm works as follows:
-
- 1. Estimate the cumulative edge length distribution,
- 2. Divide the distribution into M length bins, one for each edge that will
- be added to the group-average matrix, and
- 3. Within each bin, select the edge that is most consistently expressed
- expressed across subjects, breaking ties according to average edge
- weight (which is why the input matrix `data` must be weighted).
-
- The algorithm works separately on within/between hemisphere links.
- M is the sum of `conn_num_inter` and `conn_num_intra`, if provided.
- Otherwise, M is estimated from the data.
-
- Parameters
- ----------
- data : (N, N, S) array_like
- Weighted connectivity matrices (i.e., fractional anisotropy), where `N`
- is nodes and `S` is subjects
- distance : (N, N) array_like
- Array where `distance[i, j]` is the Euclidean distance between nodes
- `i` and `j`
- hemiid : (N, 1) array_like
- Hemisphere designation for `N` nodes where a value of 0/1 indicates
- node `N_{i}` is in the right/left hemisphere, respectively
- conn_num_inter : int, optional
- Number of inter-hemispheric connections to include in the consensus
- matrix. If `None`, the number of inter-hemispheric connections will be
- estimated from the data. Default = `None`.
- conn_num_intra : int, optional
- Number of intra-hemispheric connections to include in the consensus
- matrix. If `None`, the number of intra-hemispheric connections will be
- estimated from the data. Default = `None`.
- weighted : bool
- Flag indicating whether or not to return a weighted consensus map. If
- `True`, the consensus will be multiplied by the mean of `data`.
-
- Returns
- -------
- consensus : (N, N) numpy.ndarray
- Binary (default) or mean-weighted group-level connectivity matrix
-
- References
- ----------
- Betzel, R. F., Griffa, A., Hagmann, P., & Mišić, B. (2018). Distance-
- dependent consensus thresholds for generating group-representative
- structural brain networks. Network Neuroscience, 1-22.
- """
- # confirm input shapes are as expected
- check_consistent_length(data, distance, hemiid)
- try:
- hemiid = check_array(hemiid, ensure_2d=True)
- except ValueError:
- raise ValueError('Provided hemiid must be a 2D array. Reshape your '
- 'data using array.reshape(-1, 1) and try again.') from None
-
- num_node, _, num_sub = data.shape # info on connectivity matrices
- pos_data = data > 0 # location of + values in matrix
- pos_data_count = pos_data.sum(axis=2) # num sub with + values at each node
-
- with np.errstate(divide='ignore', invalid='ignore'):
- average_weights = data.sum(axis=2) / pos_data_count
-
- # empty array to hold inter/intra hemispheric connections
- consensus = np.zeros((num_node, num_node, 2))
-
- for conn_type in range(2): # iterate through inter/intra hemisphere conn
- if conn_type == 0: # get inter hemisphere edges
- inter_hemi = (hemiid == 0) @ (hemiid == 1).T
- keep_conn = np.logical_or(inter_hemi, inter_hemi.T)
- else: # get intra hemisphere edges
- right_hemi = (hemiid == 0) @ (hemiid == 0).T
- left_hemi = (hemiid == 1) @ (hemiid == 1).T
- keep_conn = np.logical_or(right_hemi @ right_hemi.T,
- left_hemi @ left_hemi.T)
-
- # mask the distance array for only those edges we want to examine
- full_dist_conn = distance * keep_conn
- upper_dist_conn = np.atleast_3d(np.triu(full_dist_conn))
-
- # generate array of weighted (by distance), positive edges across subs
- pos_dist = pos_data * upper_dist_conn
- pos_dist = pos_dist[np.nonzero(pos_dist)]
-
- # determine average # of positive edges across subs
- # we will use this to bin the edge weights
- if conn_type == 0:
- if conn_num_inter is None:
- avg_conn_num = len(pos_dist) / num_sub
- else:
- avg_conn_num = conn_num_inter
- else:
- if conn_num_intra is None:
- avg_conn_num = len(pos_dist) / num_sub
- else:
- avg_conn_num = conn_num_intra
-
- # estimate empirical CDF of weighted, positive edges across subs
- cumprob, quantiles = _ecdf(pos_dist)
- cumprob = np.round(cumprob * avg_conn_num).astype(int)
-
- # empty array to hold group-average matrix for current connection type
- # (i.e., inter/intra hemispheric connections)
- group_conn_type = np.zeros((num_node, num_node))
-
- # iterate through bins (for edge weights)
- for n in range(1, int(avg_conn_num) + 1):
- # get current quantile of interest
- curr_quant = quantiles[np.logical_and(cumprob >= (n - 1),
- cumprob < n)]
- if curr_quant.size == 0:
- continue
-
- # find edges in distance connectivity matrix w/i current quantile
- mask = np.logical_and(full_dist_conn >= curr_quant.min(),
- full_dist_conn <= curr_quant.max())
- i, j = np.where(np.triu(mask)) # indices of edges of interest
-
- c = pos_data_count[i, j] # get num sub with + values at edges
- w = average_weights[i, j] # get averaged weight of edges
-
- # find locations of edges most commonly represented across subs
- indmax = np.argwhere(c == c.max())
-
- # determine index of most frequent edge; break ties with higher
- # weighted edge
- if indmax.size == 1: # only one edge found
- group_conn_type[i[indmax], j[indmax]] = 1
- else: # multiple edges found
- indmax = indmax[np.argmax(w[indmax])]
- group_conn_type[i[indmax], j[indmax]] = 1
-
- consensus[:, :, conn_type] = group_conn_type
-
- # collapse across hemispheric connections types and make symmetrical array
- consensus = consensus.sum(axis=2)
- consensus = np.logical_or(consensus, consensus.T).astype(int)
-
- if weighted:
- consensus = consensus * np.mean(data, axis=2)
- return consensus
-
-
-def binarize_network(network, retain=10, keep_diag=False):
- """
- Keep top `retain` % of connections in `network` and binarizes.
-
- Uses the upper triangle for determining connection percentage, which may
- result in disconnected nodes. If this behavior is not desired see
- :py:func:`netneurotools.networks.threshold_network`.
-
- Parameters
- ----------
- network : (N, N) array_like
- Input graph
- retain : [0, 100] float, optional
- Percent connections to retain. Default: 10
- keep_diag : bool, optional
- Whether to keep the diagonal instead of setting it to 0. Default: False
-
- Returns
- -------
- binarized : (N, N) numpy.ndarray
- Binarized, thresholded graph
-
- See Also
- --------
- netneurotools.networks.threshold_network
- """
- if retain < 0 or retain > 100:
- raise ValueError('Value provided for `retain` is outside [0, 100]: {}'
- .format(retain))
-
- prctile = 100 - retain
- triu = utils.get_triu(network)
- thresh = np.percentile(triu, prctile, axis=0, keepdims=True)
- binarized = np.array(network > thresh, dtype=int)
-
- if not keep_diag:
- binarized[np.diag_indices(len(binarized))] = 0
-
- return binarized
-
-
-def threshold_network(network, retain=10):
- """
- Keep top `retain` % of connections in `network` and binarizes.
-
- Uses a minimum spanning tree to ensure that no nodes are disconnected from
- the resulting thresholded graph
-
- Parameters
- ----------
- network : (N, N) array_like
- Input graph
- retain : [0, 100] float, optional
- Percent connections to retain. Default: 10
-
- Returns
- -------
- thresholded : (N, N) numpy.ndarray
- Binarized, thresholded graph
-
- See Also
- --------
- netneurotools.networks.binarize_network
- """
- if retain < 0 or retain > 100:
- raise ValueError('Value provided for `retain` must be a percent '
- 'in range [0, 100]. Provided: {}'.format(retain))
-
- # get number of nodes in graph and invert weights (MINIMUM spanning tree)
- nodes = len(network)
- graph = np.triu(network * -1)
-
- # find MST and count # of edges in graph
- mst = csgraph.minimum_spanning_tree(graph).todense()
- mst_edges = np.sum(mst != 0)
-
- # determine # of remaining edges and ensure we're not over the limit
- remain = int((retain / 100) * ((nodes * (nodes - 1)) / 2)) - mst_edges
- if remain < 0:
- raise ValueError('Minimum spanning tree with {} edges exceeds desired '
- 'connection density of {}% ({} edges). Cannot '
- 'proceed with graph creation.'
- .format(mst_edges, retain, remain + mst_edges))
-
- # zero out edges already in MST and then get indices of next best edges
- graph -= mst
- inds = utils.get_triu(graph).argsort()[:remain]
- inds = tuple(e[inds] for e in np.triu_indices_from(graph, k=1))
-
- # add edges to MST, symmetrize, and convert to binary matrix
- mst[inds] = graph[inds]
- mst = np.array((mst + mst.T) != 0, dtype=int)
-
- return mst
-
-
-def match_length_degree_distribution(W, D, nbins=10, nswap=1000,
- replacement=False, weighted=True,
- seed=None):
- """
- Generate degree- and edge length-preserving surrogate connectomes.
-
- Parameters
- ----------
- W : (N, N) array-like
- weighted or binary symmetric connectivity matrix.
- D : (N, N) array-like
- symmetric distance matrix.
- nbins : int
- number of distance bins (edge length matrix is performed by swapping
- connections in the same bin). Default = 10.
- nswap : int
- total number of edge swaps to perform. Recommended = nnodes * 20
- Default = 1000.
- replacement : bool, optional
- if True all the edges are available for swapping. Default= False
- weighted : bool, optional
- Whether to return weighted rewired connectivity matrix. Default = True
- seed : float, optional
- Random seed. Default = None
-
- Returns
- -------
- newB : (N, N) array-like
- binary rewired matrix
- newW: (N, N) array-like
- weighted rewired matrix. Returns matrix of zeros if weighted=False.
- nr : int
- number of successful rewires
-
- Notes
- -----
- Takes a weighted, symmetric connectivity matrix `data` and Euclidean/fiber
- length matrix `distance` and generates a randomized network with:
- 1. exactly the same degree sequence
- 2. approximately the same edge length distribution
- 3. exactly the same edge weight distribution
- 4. approximately the same weight-length relationship
-
- Reference
- ---------
- Betzel, R. F., Bassett, D. S. (2018) Specificity and robustness of
- long-distance connections in weighted, interareal connectomes. PNAS.
-
- """
- rs = check_random_state(seed)
- N = len(W)
- # divide the distances by lengths
- bins = np.linspace(D[D.nonzero()].min(), D[D.nonzero()].max(), nbins + 1)
- bins[-1] += 1
- L = np.zeros((N, N))
- for n in range(nbins):
- i, j = np.where(np.logical_and(bins[n] <= D, D < bins[n + 1]))
- L[i, j] = n + 1
-
- # binarized connectivity
- B = (W != 0).astype(np.int_)
-
- # existing edges (only upper triangular cause it's symmetric)
- cn_x, cn_y = np.where(np.triu((B != 0) * B, k=1))
-
- tries = 0
- nr = 0
- newB = np.copy(B)
-
- while ((len(cn_x) >= 2) & (nr < nswap)):
- # choose randomly the edge to be rewired
- r = rs.randint(len(cn_x))
- n_x, n_y = cn_x[r], cn_y[r]
- tries += 1
-
- # options to rewire with
- # connected nodes that doesn't involve (n_x, n_y)
- index = (cn_x != n_x) & (cn_y != n_y) & (cn_y != n_x) & (cn_x != n_y)
- if len(np.where(index)[0]) == 0:
- cn_x = np.delete(cn_x, r)
- cn_y = np.delete(cn_y, r)
-
- else:
- ops1_x, ops1_y = cn_x[index], cn_y[index]
- # options that will preserve the distances
- # (ops1_x, ops1_y) such that
- # L(n_x,n_y) = L(n_x, ops1_x) & L(ops1_x,ops1_y) = L(n_y, ops1_y)
- index = (L[n_x, n_y] == L[n_x, ops1_x]) & (
- L[ops1_x, ops1_y] == L[n_y, ops1_y])
- if len(np.where(index)[0]) == 0:
- cn_x = np.delete(cn_x, r)
- cn_y = np.delete(cn_y, r)
-
- else:
- ops2_x, ops2_y = ops1_x[index], ops1_y[index]
- # options of edges that didn't exist before
- index = [(newB[min(n_x, ops2_x[i])][max(n_x, ops2_x[i])] == 0)
- & (newB[min(n_y, ops2_y[i])][max(n_y,
- ops2_y[i])] == 0)
- for i in range(len(ops2_x))]
- if (len(np.where(index)[0]) == 0):
- cn_x = np.delete(cn_x, r)
- cn_y = np.delete(cn_y, r)
-
- else:
- ops3_x, ops3_y = ops2_x[index], ops2_y[index]
-
- # choose randomly one edge from the final options
- r1 = rs.randint(len(ops3_x))
- nn_x, nn_y = ops3_x[r1], ops3_y[r1]
-
- # Disconnect the existing edges
- newB[n_x, n_y] = 0
- newB[nn_x, nn_y] = 0
- # Connect the new edges
- newB[min(n_x, nn_x), max(n_x, nn_x)] = 1
- newB[min(n_y, nn_y), max(n_y, nn_y)] = 1
- # one successfull rewire!
- nr += 1
-
- # rewire with replacement
- if replacement:
- cn_x[r], cn_y[r] = min(n_x, nn_x), max(n_x, nn_x)
- index = np.where((cn_x == nn_x) & (cn_y == nn_y))[0]
- cn_x[index], cn_y[index] = min(
- n_y, nn_y), max(n_y, nn_y)
- # rewire without replacement
- else:
- cn_x = np.delete(cn_x, r)
- cn_y = np.delete(cn_y, r)
- index = np.where((cn_x == nn_x) & (cn_y == nn_y))[0]
- cn_x = np.delete(cn_x, index)
- cn_y = np.delete(cn_y, index)
-
- if nr < nswap:
- print(f"I didn't finish, out of rewirable edges: {len(cn_x)}")
-
- i, j = np.triu_indices(N, k=1)
- # Make the connectivity matrix symmetric
- newB[j, i] = newB[i, j]
-
- # check the number of edges is preserved
- if len(np.where(B != 0)[0]) != len(np.where(newB != 0)[0]):
- print(
- f"ERROR --- number of edges changed, \
- B:{len(np.where(B!=0)[0])}, newB:{len(np.where(newB!=0)[0])}")
- # check that the degree of the nodes it's the same
- for i in range(N):
- if np.sum(B[i]) != np.sum(newB[i]):
- print(
- f"ERROR --- node {i} changed k by: \
- {np.sum(B[i]) - np.sum(newB[i])}")
-
- newW = np.zeros((N, N))
- if weighted:
- # Reassign the weights
- mask = np.triu(B != 0, k=1)
- inids = D[mask]
- iniws = W[mask]
- inids_index = np.argsort(inids)
- # Weights from the shortest to largest edges
- iniws = iniws[inids_index]
- mask = np.triu(newB != 0, k=1)
- finds = D[mask]
- i, j = np.where(mask)
- # Sort the new edges from the shortest to the largest
- finds_index = np.argsort(finds)
- i_sort = i[finds_index]
- j_sort = j[finds_index]
- # Assign the initial sorted weights
- newW[i_sort, j_sort] = iniws
- # Make it symmetrical
- newW[j_sort, i_sort] = iniws
-
- return newB, newW, nr
-
-
-def randmio_und(W, itr):
- """
- Optimized version of randmio_und.
-
- This function randomizes an undirected network, while preserving the
- degree distribution. The function does not preserve the strength
- distribution in weighted networks.
-
- This function is significantly faster if numba is enabled, because
- the main overhead is `np.random.randint`, see `here `_
-
- Parameters
- ----------
- W : (N, N) array-like
- Undirected binary/weighted connection matrix
- itr : int
- rewiring parameter. Each edge is rewired approximately itr times.
-
- Returns
- -------
- W : (N, N) array-like
- Randomized network
- eff : int
- number of actual rewirings carried out
- """ # noqa: E501
- W = W.copy()
- n = len(W)
- i, j = np.where(np.triu(W > 0, 1))
- k = len(i)
- itr *= k
-
- # maximum number of rewiring attempts per iteration
- max_attempts = np.round(n * k / (n * (n - 1)))
- # actual number of successful rewirings
- eff = 0
-
- for _ in range(int(itr)):
- att = 0
- while att <= max_attempts: # while not rewired
- while True:
- e1, e2 = np.random.randint(k), np.random.randint(k)
- while e1 == e2:
- e2 = np.random.randint(k)
- a, b = i[e1], j[e1]
- c, d = i[e2], j[e2]
-
- if a != c and a != d and b != c and b != d:
- break # all 4 vertices must be different
-
- # flip edge c-d with 50% probability
- # to explore all potential rewirings
- if np.random.random() > .5:
- i[e2], j[e2] = d, c
- c, d = d, c
-
- # rewiring condition
- # not flipped
- # a--b a b
- # TO X
- # c--d c d
- # if flipped
- # a--b a--b a b
- # TO TO X
- # c--d d--c d c
- if not (W[a, d] or W[c, b]):
- W[a, d] = W[a, b]
- W[a, b] = 0
- W[d, a] = W[b, a]
- W[b, a] = 0
- W[c, b] = W[c, d]
- W[c, d] = 0
- W[b, c] = W[d, c]
- W[d, c] = 0
-
- j[e1] = d
- j[e2] = b # reassign edge indices
- eff += 1
- break
- att += 1
-
- return W, eff
-
-
-if use_numba:
- randmio_und = njit(randmio_und)
-
-
-def strength_preserving_rand_sa(A, rewiring_iter=10,
- nstage=100, niter=10000,
- temp=1000, frac=0.5,
- energy_type='sse', energy_func=None,
- R=None, connected=None,
- verbose=False, seed=None):
- """
- Strength-preserving network randomization using simulated annealing.
-
- Randomize an undirected weighted network, while preserving
- the degree and strength sequences using simulated annealing.
-
- This function allows for a flexible choice of energy function.
-
- Parameters
- ----------
- A : (N, N) array-like
- Undirected weighted connectivity matrix
- rewiring_iter : int, optional
- Rewiring parameter. Default = 10.
- Each edge is rewired approximately rewiring_iter times.
- nstage : int, optional
- Number of annealing stages. Default = 100.
- niter : int, optional
- Number of iterations per stage. Default = 10000.
- temp : float, optional
- Initial temperature. Default = 1000.
- frac : float, optional
- Fractional decrease in temperature per stage. Default = 0.5.
- energy_type: str, optional
- Energy function to minimize. Can be either:
- 'sse': Sum of squared errors between strength sequence vectors
- of the original network and the randomized network
- 'max': Maximum absolute error
- 'mae': Mean absolute error
- 'mse': Mean squared error
- 'rmse': Root mean squared error
- Default = 'sse'.
- energy_func: callable, optional
- Callable with two positional arguments corresponding to
- two strength sequence numpy arrays that returns an energy value.
- Overwrites “energy_type”.
- See “energy_type” for specifying a predefined energy type instead.
- R : (N, N) array-like, optional
- Pre-randomized connectivity matrix.
- If None, a rewired connectivity matrix is generated using the
- Maslov & Sneppen algorithm.
- Default = None.
- connected: bool, optional
- Whether to ensure connectedness of the randomized network.
- By default, this is inferred from data.
- verbose: bool, optional
- Whether to print status to screen at the end of every stage.
- Default = False.
- seed: float, optional
- Random seed. Default = None.
-
- Returns
- -------
- B : (N, N) array-like
- Randomized connectivity matrix
- min_energy : float
- Minimum energy obtained by annealing
-
- Notes
- -----
- Uses Maslov & Sneppen rewiring model to produce a
- surrogate connectivity matrix, B, with the same
- size, density, and degree sequence as A.
- The weights are then permuted to optimize the
- match between the strength sequences of A and B
- using simulated annealing.
-
- This function is adapted from a function written in MATLAB
- by Richard Betzel.
-
- References
- ----------
- Misic, B. et al. (2015) Cooperative and Competitive Spreading Dynamics
- on the Human Connectome. Neuron.
- Milisav, F. et al. (2024) A simulated annealing algorithm for
- randomizing weighted networks.
- """
- try:
- A = np.asarray(A)
- except TypeError as err:
- msg = ('A must be array_like. Received: {}.'.format(type(A)))
- raise TypeError(msg) from err
-
- if frac > 1 or frac <= 0:
- msg = ('frac must be between 0 and 1. '
- 'Received: {}.'.format(frac))
- raise ValueError(msg)
-
- rs = check_random_state(seed)
-
- n = A.shape[0]
- s = np.sum(A, axis=1) #strengths of A
-
- #Maslov & Sneppen rewiring
- if R is None:
- #ensuring connectedness if the original network is connected
- if connected is None:
- connected = False if bct.number_of_components(A) > 1 else True
- if connected:
- B = bct.randmio_und_connected(A, rewiring_iter, seed=seed)[0]
- else:
- B = bct.randmio_und(A, rewiring_iter, seed=seed)[0]
- else:
- B = R.copy()
-
- u, v = np.triu(B, k=1).nonzero() #upper triangle indices
- wts = np.triu(B, k=1)[(u, v)] #upper triangle values
- m = len(wts)
- sb = np.sum(B, axis=1) #strengths of B
-
- if energy_func is not None:
- energy = energy_func(s, sb)
- elif energy_type == 'sse':
- energy = np.sum((s - sb)**2)
- elif energy_type == 'max':
- energy = np.max(np.abs(s - sb))
- elif energy_type == 'mae':
- energy = np.mean(np.abs(s - sb))
- elif energy_type == 'mse':
- energy = np.mean((s - sb)**2)
- elif energy_type == 'rmse':
- energy = np.sqrt(np.mean((s - sb)**2))
- else:
- msg = ("energy_type must be one of 'sse', 'max', "
- "'mae', 'mse', or 'rmse'. Received: {}.".format(energy_type))
- raise ValueError(msg)
-
- energymin = energy
- wtsmin = wts.copy()
-
- if verbose:
- print('\ninitial energy {:.5f}'.format(energy))
-
- for istage in tqdm(range(nstage), desc='annealing progress'):
-
- naccept = 0
- for _ in range(niter):
-
- #permutation
- e1 = rs.randint(m)
- e2 = rs.randint(m)
-
- a, b = u[e1], v[e1]
- c, d = u[e2], v[e2]
-
- sb_prime = sb.copy()
- sb_prime[[a, b]] = sb_prime[[a, b]] - wts[e1] + wts[e2]
- sb_prime[[c, d]] = sb_prime[[c, d]] + wts[e1] - wts[e2]
-
- if energy_func is not None:
- energy_prime = energy_func(sb_prime, s)
- elif energy_type == 'sse':
- energy_prime = np.sum((sb_prime - s)**2)
- elif energy_type == 'max':
- energy_prime = np.max(np.abs(sb_prime - s))
- elif energy_type == 'mae':
- energy_prime = np.mean(np.abs(sb_prime - s))
- elif energy_type == 'mse':
- energy_prime = np.mean((sb_prime - s)**2)
- elif energy_type == 'rmse':
- energy_prime = np.sqrt(np.mean((sb_prime - s)**2))
- else:
- msg = ("energy_type must be one of 'sse', 'max', "
- "'mae', 'mse', or 'rmse'. "
- "Received: {}.".format(energy_type))
- raise ValueError(msg)
-
- #permutation acceptance criterion
- if (energy_prime < energy or
- rs.rand() < np.exp(-(energy_prime - energy)/temp)):
- sb = sb_prime.copy()
- wts[[e1, e2]] = wts[[e2, e1]]
- energy = energy_prime
- if energy < energymin:
- energymin = energy
- wtsmin = wts.copy()
- naccept = naccept + 1
-
- #temperature update
- temp = temp*frac
- if verbose:
- print('\nstage {:d}, temp {:.5f}, best energy {:.5f}, '
- 'frac of accepted moves {:.3f}'.format(istage, temp,
- energymin,
- naccept/niter))
-
- B = np.zeros((n, n))
- B[(u, v)] = wtsmin
- B = B + B.T
-
- return B, energymin
-
-
-def strength_preserving_rand_sa_mse_opt(A, rewiring_iter=10,
- nstage=100, niter=10000,
- temp=1000, frac=0.5,
- R=None, connected=None,
- verbose=False, seed=None):
- """
- Strength-preserving network randomization using simulated annealing.
-
- Randomize an undirected weighted network, while preserving
- the degree and strength sequences using simulated annealing.
-
- This function has been optimized for speed but only allows the
- mean squared error energy function.
-
- Parameters
- ----------
- A : (N, N) array-like
- Undirected weighted connectivity matrix
- rewiring_iter : int, optional
- Rewiring parameter. Default = 10.
- Each edge is rewired approximately rewiring_iter times.
- nstage : int, optional
- Number of annealing stages. Default = 100.
- niter : int, optional
- Number of iterations per stage. Default = 10000.
- temp : float, optional
- Initial temperature. Default = 1000.
- frac : float, optional
- Fractional decrease in temperature per stage. Default = 0.5.
- R : (N, N) array-like, optional
- Pre-randomized connectivity matrix.
- If None, a rewired connectivity matrix is generated using the
- Maslov & Sneppen algorithm.
- Default = None.
- connected: bool, optional
- Whether to ensure connectedness of the randomized network.
- By default, this is inferred from data.
- verbose: bool, optional
- Whether to print status to screen at the end of every stage.
- Default = False.
- seed: float, optional
- Random seed. Default = None.
-
- Returns
- -------
- B : (N, N) array-like
- Randomized connectivity matrix
- min_energy : float
- Minimum energy obtained by annealing
-
- Notes
- -----
- Uses Maslov & Sneppen rewiring model to produce a
- surrogate connectivity matrix, B, with the same
- size, density, and degree sequence as A.
- The weights are then permuted to optimize the
- match between the strength sequences of A and B
- using simulated annealing.
-
- This function is adapted from a function written in MATLAB
- by Richard Betzel and was optimized by Vincent Bazinet.
-
- References
- ----------
- Misic, B. et al. (2015) Cooperative and Competitive Spreading Dynamics
- on the Human Connectome. Neuron.
- Milisav, F. et al. (2024) A simulated annealing algorithm for
- randomizing weighted networks.
- """
- try:
- A = np.asarray(A)
- except TypeError as err:
- msg = ('A must be array_like. Received: {}.'.format(type(A)))
- raise TypeError(msg) from err
-
- if frac > 1 or frac <= 0:
- msg = ('frac must be between 0 and 1. '
- 'Received: {}.'.format(frac))
- raise ValueError(msg)
-
- rs = check_random_state(seed)
-
- n = A.shape[0]
- s = np.sum(A, axis=1) #strengths of A
-
- #Maslov & Sneppen rewiring
- if R is None:
- #ensuring connectedness if the original network is connected
- if connected is None:
- connected = False if bct.number_of_components(A) > 1 else True
- if connected:
- B = bct.randmio_und_connected(A, rewiring_iter, seed=seed)[0]
- else:
- B = bct.randmio_und(A, rewiring_iter, seed=seed)[0]
- else:
- B = R.copy()
-
- u, v = np.triu(B, k=1).nonzero() #upper triangle indices
- wts = np.triu(B, k=1)[(u, v)] #upper triangle values
- m = len(wts)
- sb = np.sum(B, axis=1) #strengths of B
-
- energy = np.mean((s - sb)**2)
-
- energymin = energy
- wtsmin = wts.copy()
-
- if verbose:
- print('\ninitial energy {:.5f}'.format(energy))
-
- for istage in tqdm(range(nstage), desc='annealing progress'):
- naccept = 0
- for (e1, e2), prob in zip(rs.randint(m, size=(niter, 2)),
- rs.rand(niter)
- ):
-
- #permutation
- a, b, c, d = u[e1], v[e1], u[e2], v[e2]
- wts_change = wts[e1] - wts[e2]
- delta_energy = (2 * wts_change *
- (2 * wts_change +
- (s[a] - sb[a]) +
- (s[b] - sb[b]) -
- (s[c] - sb[c]) -
- (s[d] - sb[d])
- )
- )/n
-
- #permutation acceptance criterion
- if (delta_energy < 0 or prob < np.e**(-(delta_energy)/temp)):
-
- sb[[a, b]] -= wts_change
- sb[[c, d]] += wts_change
- wts[[e1, e2]] = wts[[e2, e1]]
-
- energy = np.mean((sb - s)**2)
-
- if energy < energymin:
- energymin = energy
- wtsmin = wts.copy()
- naccept = naccept + 1
-
- #temperature update
- temp = temp*frac
- if verbose:
- print('\nstage {:d}, temp {:.5f}, best energy {:.5f}, '
- 'frac of accepted moves {:.3f}'.format(istage, temp,
- energymin,
- naccept/niter))
-
- B = np.zeros((n, n))
- B[(u, v)] = wtsmin
- B = B + B.T
-
- return B, energymin
-
-
-def strength_preserving_rand_sa_dir(A, rewiring_iter=10,
- nstage=100, niter=10000,
- temp=1000, frac=0.5,
- energy_type='sse', energy_func=None,
- connected=True, verbose=False,
- seed=None):
- """
- Strength-preserving network randomization using simulated annealing.
-
- Randomize a directed weighted network, while preserving
- the in- and out-degree and strength sequences using simulated annealing.
-
- Parameters
- ----------
- A : (N, N) array-like
- Directed weighted connectivity matrix
- rewiring_iter : int, optional
- Rewiring parameter. Default = 10.
- Each edge is rewired approximately rewiring_iter times.
- nstage : int, optional
- Number of annealing stages. Default = 100.
- niter : int, optional
- Number of iterations per stage. Default = 10000.
- temp : float, optional
- Initial temperature. Default = 1000.
- frac : float, optional
- Fractional decrease in temperature per stage. Default = 0.5.
- energy_type: str, optional
- Energy function to minimize. Can be either:
- 'sse': Sum of squared errors between strength sequence vectors
- of the original network and the randomized network
- 'max': Maximum absolute error
- 'mae': Mean absolute error
- 'mse': Mean squared error
- 'rmse': Root mean squared error
- Default = 'sse'.
- energy_func: callable, optional
- Callable with two positional arguments corresponding to
- two strength sequence numpy arrays that returns an energy value.
- Overwrites “energy_type”.
- See “energy_type” for specifying a predefined energy type instead.
- connected: bool, optional
- Whether to ensure connectedness of the randomized network.
- Default = True.
- verbose: bool, optional
- Whether to print status to screen at the end of every stage.
- Default = False.
- seed: float, optional
- Random seed. Default = None.
-
- Returns
- -------
- B : (N, N) array-like
- Randomized connectivity matrix
- min_energy : float
- Minimum energy obtained by annealing
-
- Notes
- -----
- Uses Maslov & Sneppen rewiring model to produce a
- surrogate connectivity matrix, B, with the same
- size, density, and in- and out-degree sequences as A.
- The weights are then permuted to optimize the
- match between the strength sequences of A and B
- using simulated annealing.
- Both in- and out-strengths are preserved.
-
- This function is adapted from a function written in MATLAB
- by Richard Betzel.
-
- References
- ----------
- Misic, B. et al. (2015) Cooperative and Competitive Spreading Dynamics
- on the Human Connectome. Neuron.
- Rubinov, M. (2016) Constraints and spandrels of interareal connectomes.
- Nature Communications.
- Milisav, F. et al. (2024) A simulated annealing algorithm for
- randomizing weighted networks.
- """
- try:
- A = np.asarray(A)
- except TypeError as err:
- msg = ('A must be array_like. Received: {}.'.format(type(A)))
- raise TypeError(msg) from err
-
- if frac > 1 or frac <= 0:
- msg = ('frac must be between 0 and 1. '
- 'Received: {}.'.format(frac))
- raise ValueError(msg)
-
- rs = check_random_state(seed)
-
- n = A.shape[0]
- s_in = np.sum(A, axis=0) #in-strengths of A
- s_out = np.sum(A, axis=1) #out-strengths of A
-
- #Maslov & Sneppen rewiring
- if connected:
- B = bct.randmio_dir_connected(A, rewiring_iter, seed=seed)[0]
- else:
- B = bct.randmio_dir(A, rewiring_iter, seed=seed)[0]
-
- u, v = B.nonzero() #nonzero indices of B
- wts = B[(u, v)] #nonzero values of B
- m = len(wts)
- sb_in = np.sum(B, axis=0) #in-strengths of B
- sb_out = np.sum(B, axis=1) #out-strengths of B
-
- if energy_func is not None:
- energy = energy_func(s_in, sb_in) + energy_func(s_out, sb_out)
- elif energy_type == 'sse':
- energy = np.sum((s_in - sb_in)**2) + np.sum((s_out - sb_out)**2)
- elif energy_type == 'max':
- energy = np.max(np.abs(s_in - sb_in)) + np.max(np.abs(s_out - sb_out))
- elif energy_type == 'mae':
- energy= np.mean(np.abs(s_in - sb_in)) + np.mean(np.abs(s_out - sb_out))
- elif energy_type == 'mse':
- energy = np.mean((s_in - sb_in)**2) + np.mean((s_out - sb_out)**2)
- elif energy_type == 'rmse':
- energy = (np.sqrt(np.mean((s_in - sb_in)**2)) +
- np.sqrt(np.mean((s_out - sb_out)**2)))
- else:
- msg = ("energy_type must be one of 'sse', 'max', "
- "'mae', 'mse', or 'rmse'. Received: {}.".format(energy_type))
- raise ValueError(msg)
-
- energymin = energy
- wtsmin = wts.copy()
-
- if verbose:
- print('\ninitial energy {:.5f}'.format(energy))
-
- for istage in tqdm(range(nstage), desc='annealing progress'):
-
- naccept = 0
- for _ in range(niter):
-
- #permutation
- e1 = rs.randint(m)
- e2 = rs.randint(m)
-
- a, b = u[e1], v[e1]
- c, d = u[e2], v[e2]
-
- sb_prime_in = sb_in.copy()
- sb_prime_out = sb_out.copy()
- sb_prime_in[b] = sb_prime_in[b] - wts[e1] + wts[e2]
- sb_prime_out[a] = sb_prime_out[a] - wts[e1] + wts[e2]
- sb_prime_in[d] = sb_prime_in[d] - wts[e2] + wts[e1]
- sb_prime_out[c] = sb_prime_out[c] - wts[e2] + wts[e1]
-
- if energy_func is not None:
- energy_prime = (energy_func(sb_prime_in, s_in) +
- energy_func(sb_prime_out, s_out))
- elif energy_type == 'sse':
- energy_prime = (np.sum((sb_prime_in - s_in)**2) +
- np.sum((sb_prime_out - s_out)**2))
- elif energy_type == 'max':
- energy_prime = (np.max(np.abs(sb_prime_in - s_in)) +
- np.max(np.abs(sb_prime_out - s_out)))
- elif energy_type == 'mae':
- energy_prime = (np.mean(np.abs(sb_prime_in - s_in)) +
- np.mean(np.abs(sb_prime_out - s_out)))
- elif energy_type == 'mse':
- energy_prime = (np.mean((sb_prime_in - s_in)**2) +
- np.mean((sb_prime_out - s_out)**2))
- elif energy_type == 'rmse':
- energy_prime = (np.sqrt(np.mean((sb_prime_in - s_in)**2)) +
- np.sqrt(np.mean((sb_prime_out - s_out)**2)))
- else:
- msg = ("energy_type must be one of 'sse', 'max', "
- "'mae', 'mse', or 'rmse'. "
- "Received: {}.".format(energy_type))
- raise ValueError(msg)
-
- #permutation acceptance criterion
- if (energy_prime < energy or
- rs.rand() < np.exp(-(energy_prime - energy)/temp)):
- sb_in = sb_prime_in.copy()
- sb_out = sb_prime_out.copy()
- wts[[e1, e2]] = wts[[e2, e1]]
- energy = energy_prime
- if energy < energymin:
- energymin = energy
- wtsmin = wts.copy()
- naccept = naccept + 1
-
- #temperature update
- temp = temp*frac
- if verbose:
- print('\nstage {:d}, temp {:.5f}, best energy {:.5f}, '
- 'frac of accepted moves {:.3f}'.format(istage, temp,
- energymin,
- naccept/niter))
-
- B = np.zeros((n, n))
- B[(u, v)] = wtsmin
-
- return B, energymin
diff --git a/netneurotools/networks/__init__.py b/netneurotools/networks/__init__.py
new file mode 100644
index 0000000..fb7d82a
--- /dev/null
+++ b/netneurotools/networks/__init__.py
@@ -0,0 +1,33 @@
+"""Functions for constucting networks."""
+
+
+from .consensus import (
+ func_consensus, struct_consensus
+)
+
+
+from .randomize import (
+ randmio_und,
+ match_length_degree_distribution,
+ strength_preserving_rand_sa,
+ strength_preserving_rand_sa_mse_opt,
+ strength_preserving_rand_sa_dir
+)
+
+
+from .networks_utils import (
+ binarize_network, threshold_network, get_triu
+)
+
+
+__all__ = [
+ # consensus
+ 'func_consensus', 'struct_consensus',
+ # generative
+ # randomize
+ 'randmio_und', 'match_length_degree_distribution',
+ 'strength_preserving_rand_sa', 'strength_preserving_rand_sa_mse_opt',
+ 'strength_preserving_rand_sa_dir',
+ # networks_utils
+ 'binarize_network', 'threshold_network', 'get_triu'
+]
diff --git a/netneurotools/networks/consensus.py b/netneurotools/networks/consensus.py
new file mode 100644
index 0000000..cc48baf
--- /dev/null
+++ b/netneurotools/networks/consensus.py
@@ -0,0 +1,294 @@
+"""Functions for generating consensus networks."""
+
+import numpy as np
+from sklearn.utils.validation import (
+ check_random_state, check_array, check_consistent_length
+)
+
+
+def func_consensus(data, n_boot=1000, ci=95, seed=None):
+ """
+ Calculate thresholded group consensus functional connectivity graph.
+
+ This function concatenates all time series in `data` and computes a group
+ correlation matrix based on this extended time series. It then generates
+ length `T` bootstrapped samples from the concatenated matrix and estimates
+ confidence intervals for all correlations. Correlations whose sign is
+ consistent across bootstraps are retained; inconsistent correlations are
+ set to zero.
+
+ If `n_boot` is set to 0 or None a simple, group-averaged functional
+ connectivity matrix is estimated, instead.
+
+ Parameters
+ ----------
+ data : (N, T, S) array_like (or a list of S arrays, each shaped as (N, T))
+ Pre-processed functional time series, where `N` is the number of nodes,
+ `T` is the number of volumes in the time series, and `S` is the number
+ of subjects.
+ n_boot : int, optional
+ Number of bootstraps for which to generate correlation. Default: 1000
+ ci : (0, 100) float, optional
+ Confidence interval for which to assess the reliability of correlations
+ with bootstraps. Default: 95
+ seed : int, optional
+ Random seed. Default: None
+
+ Returns
+ -------
+ consensus : (N, N) numpy.ndarray
+ Thresholded, group-level correlation matrix
+
+ References
+ ----------
+ Mišić, B., Betzel, R. F., Nematzadeh, A., Goni, J., Griffa, A., Hagmann,
+ P., Flammini, A., Ahn, Y.-Y., & Sporns, O. (2015). Cooperative and
+ competitive spreading dynamics on the human connectome. Neuron, 86(6),
+ 1518-1529.
+ """
+ # check inputs
+ rs = check_random_state(seed)
+ if ci > 100 or ci < 0:
+ raise ValueError("`ci` must be between 0 and 100.")
+
+ # group-average functional connectivity matrix desired instead of bootstrap
+ if n_boot == 0 or n_boot is None:
+ if isinstance(data, list):
+ corrs = [np.corrcoef(sub) for sub in data]
+ else:
+ corrs = [np.corrcoef(data[..., sub]) for sub in
+ range(data.shape[-1])]
+ return np.nanmean(corrs, axis=0)
+
+ if isinstance(data, list):
+ collapsed_data = np.hstack(data)
+ nsample = int(collapsed_data.shape[-1] / len(data))
+ else:
+ collapsed_data = data.reshape((len(data), -1), order='F')
+ nsample = data.shape[1]
+
+ consensus = np.corrcoef(collapsed_data)
+
+ # only keep the upper triangle for the bootstraps to save on memory usage
+ triu_inds = np.triu_indices_from(consensus, k=1)
+ bootstrapped_corrmat = np.zeros((len(triu_inds[0]), n_boot))
+
+ # generate `n_boot` bootstrap correlation matrices by sampling `t` time
+ # points from the concatenated time series
+ for boot in range(n_boot):
+ inds = rs.randint(collapsed_data.shape[-1], size=nsample)
+ bootstrapped_corrmat[..., boot] = \
+ np.corrcoef(collapsed_data[:, inds])[triu_inds]
+
+ # extract the CIs from the bootstrapped correlation matrices
+ # we don't need the input anymore so overwrite it
+ bootstrapped_ci = np.percentile(bootstrapped_corrmat, [100 - ci, ci],
+ axis=-1, overwrite_input=True)
+
+ # remove unreliable (i.e., CI zero-crossing) correlations
+ # if the signs of the bootstrapped confidence intervals are different
+ # (i.e., their signs sum to 0), then we want to remove them
+ # so, take the logical not of the CI (CI = 0 ---> True) and create a mask
+ # then, set all connections from the consensus array inside the mask to 0
+ remove_inds = np.logical_not(np.sign(bootstrapped_ci).sum(axis=0))
+ mask = np.zeros_like(consensus, dtype=bool)
+ mask[triu_inds] = remove_inds
+ consensus[mask + mask.T] = 0
+
+ return consensus
+
+
+def _ecdf(data):
+ """
+ Estimate empirical cumulative distribution function of `data`.
+
+ Taken directly from StackOverflow. See original answer at
+ https://stackoverflow.com/questions/33345780.
+
+ Parameters
+ ----------
+ data : array_like
+
+ Returns
+ -------
+ prob : numpy.ndarray
+ Cumulative probability
+ quantiles : numpy.darray
+ Quantiles
+ """
+ sample = np.atleast_1d(data)
+
+ # find the unique values and their corresponding counts
+ quantiles, counts = np.unique(sample, return_counts=True)
+
+ # take the cumulative sum of the counts and divide by the sample size to
+ # get the cumulative probabilities between 0 and 1
+ prob = np.cumsum(counts).astype(float) / sample.size
+
+ # match MATLAB
+ prob, quantiles = np.append([0], prob), np.append(quantiles[0], quantiles)
+
+ return prob, quantiles
+
+
+def struct_consensus(data, distance, hemiid,
+ conn_num_inter=None,
+ conn_num_intra=None,
+ weighted=False):
+ """
+ Calculate distance-dependent group consensus structural connectivity graph.
+
+ Takes as input a weighted stack of connectivity matrices with dimensions
+ (N, N, S) where `N` is the number of nodes and `S` is the number of
+ matrices or subjects. The matrices must be weighted, and ideally with
+ continuous weights (e.g. fractional anisotropy rather than streamline
+ count). The second input is a pairwise distance matrix, where distance(i,j)
+ is the Euclidean distance between nodes i and j. The final input is an
+ (N, 1) vector which labels nodes as belonging to the right (`hemiid==0`) or
+ left (`hemiid=1`) hemisphere (note that these values can be flipped as long
+ as `hemiid` contains only values of 0 and 1).
+
+ This function estimates the average edge length distribution and builds
+ a group-averaged connectivity matrix that approximates this distribution
+ with density equal to the mean density across subjects.
+
+ The algorithm works as follows:
+
+ 1. Estimate the cumulative edge length distribution,
+ 2. Divide the distribution into M length bins, one for each edge that will
+ be added to the group-average matrix, and
+ 3. Within each bin, select the edge that is most consistently expressed
+ expressed across subjects, breaking ties according to average edge
+ weight (which is why the input matrix `data` must be weighted).
+
+ The algorithm works separately on within/between hemisphere links.
+ M is the sum of `conn_num_inter` and `conn_num_intra`, if provided.
+ Otherwise, M is estimated from the data.
+
+ Parameters
+ ----------
+ data : (N, N, S) array_like
+ Weighted connectivity matrices (i.e., fractional anisotropy), where `N`
+ is nodes and `S` is subjects
+ distance : (N, N) array_like
+ Array where `distance[i, j]` is the Euclidean distance between nodes
+ `i` and `j`
+ hemiid : (N, 1) array_like
+ Hemisphere designation for `N` nodes where a value of 0/1 indicates
+ node `N_{i}` is in the right/left hemisphere, respectively
+ conn_num_inter : int, optional
+ Number of inter-hemispheric connections to include in the consensus
+ matrix. If `None`, the number of inter-hemispheric connections will be
+ estimated from the data. Default = `None`.
+ conn_num_intra : int, optional
+ Number of intra-hemispheric connections to include in the consensus
+ matrix. If `None`, the number of intra-hemispheric connections will be
+ estimated from the data. Default = `None`.
+ weighted : bool
+ Flag indicating whether or not to return a weighted consensus map. If
+ `True`, the consensus will be multiplied by the mean of `data`.
+
+ Returns
+ -------
+ consensus : (N, N) numpy.ndarray
+ Binary (default) or mean-weighted group-level connectivity matrix
+
+ References
+ ----------
+ Betzel, R. F., Griffa, A., Hagmann, P., & Mišić, B. (2018). Distance-
+ dependent consensus thresholds for generating group-representative
+ structural brain networks. Network Neuroscience, 1-22.
+ """
+ # confirm input shapes are as expected
+ check_consistent_length(data, distance, hemiid)
+ try:
+ hemiid = check_array(hemiid, ensure_2d=True)
+ except ValueError:
+ raise ValueError('Provided hemiid must be a 2D array. Reshape your '
+ 'data using array.reshape(-1, 1) and try again.') from None
+
+ num_node, _, num_sub = data.shape # info on connectivity matrices
+ pos_data = data > 0 # location of + values in matrix
+ pos_data_count = pos_data.sum(axis=2) # num sub with + values at each node
+
+ with np.errstate(divide='ignore', invalid='ignore'):
+ average_weights = data.sum(axis=2) / pos_data_count
+
+ # empty array to hold inter/intra hemispheric connections
+ consensus = np.zeros((num_node, num_node, 2))
+
+ for conn_type in range(2): # iterate through inter/intra hemisphere conn
+ if conn_type == 0: # get inter hemisphere edges
+ inter_hemi = (hemiid == 0) @ (hemiid == 1).T
+ keep_conn = np.logical_or(inter_hemi, inter_hemi.T)
+ else: # get intra hemisphere edges
+ right_hemi = (hemiid == 0) @ (hemiid == 0).T
+ left_hemi = (hemiid == 1) @ (hemiid == 1).T
+ keep_conn = np.logical_or(right_hemi @ right_hemi.T,
+ left_hemi @ left_hemi.T)
+
+ # mask the distance array for only those edges we want to examine
+ full_dist_conn = distance * keep_conn
+ upper_dist_conn = np.atleast_3d(np.triu(full_dist_conn))
+
+ # generate array of weighted (by distance), positive edges across subs
+ pos_dist = pos_data * upper_dist_conn
+ pos_dist = pos_dist[np.nonzero(pos_dist)]
+
+ # determine average # of positive edges across subs
+ # we will use this to bin the edge weights
+ if conn_type == 0:
+ if conn_num_inter is None:
+ avg_conn_num = len(pos_dist) / num_sub
+ else:
+ avg_conn_num = conn_num_inter
+ else:
+ if conn_num_intra is None:
+ avg_conn_num = len(pos_dist) / num_sub
+ else:
+ avg_conn_num = conn_num_intra
+
+ # estimate empirical CDF of weighted, positive edges across subs
+ cumprob, quantiles = _ecdf(pos_dist)
+ cumprob = np.round(cumprob * avg_conn_num).astype(int)
+
+ # empty array to hold group-average matrix for current connection type
+ # (i.e., inter/intra hemispheric connections)
+ group_conn_type = np.zeros((num_node, num_node))
+
+ # iterate through bins (for edge weights)
+ for n in range(1, int(avg_conn_num) + 1):
+ # get current quantile of interest
+ curr_quant = quantiles[np.logical_and(cumprob >= (n - 1),
+ cumprob < n)]
+ if curr_quant.size == 0:
+ continue
+
+ # find edges in distance connectivity matrix w/i current quantile
+ mask = np.logical_and(full_dist_conn >= curr_quant.min(),
+ full_dist_conn <= curr_quant.max())
+ i, j = np.where(np.triu(mask)) # indices of edges of interest
+
+ c = pos_data_count[i, j] # get num sub with + values at edges
+ w = average_weights[i, j] # get averaged weight of edges
+
+ # find locations of edges most commonly represented across subs
+ indmax = np.argwhere(c == c.max())
+
+ # determine index of most frequent edge; break ties with higher
+ # weighted edge
+ if indmax.size == 1: # only one edge found
+ group_conn_type[i[indmax], j[indmax]] = 1
+ else: # multiple edges found
+ indmax = indmax[np.argmax(w[indmax])]
+ group_conn_type[i[indmax], j[indmax]] = 1
+
+ consensus[:, :, conn_type] = group_conn_type
+
+ # collapse across hemispheric connections types and make symmetrical array
+ consensus = consensus.sum(axis=2)
+ consensus = np.logical_or(consensus, consensus.T).astype(int)
+
+ if weighted:
+ consensus = consensus * np.mean(data, axis=2)
+ return consensus
diff --git a/netneurotools/networks/generative.py b/netneurotools/networks/generative.py
new file mode 100644
index 0000000..7a7bff4
--- /dev/null
+++ b/netneurotools/networks/generative.py
@@ -0,0 +1 @@
+"""Functions for generative network models."""
diff --git a/netneurotools/networks/networks_utils.py b/netneurotools/networks/networks_utils.py
new file mode 100644
index 0000000..5085e55
--- /dev/null
+++ b/netneurotools/networks/networks_utils.py
@@ -0,0 +1,132 @@
+"""Functions for supporting network constuction."""
+
+import numpy as np
+from scipy.sparse import csgraph
+
+
+def get_triu(data, k=1):
+ """
+ Return vectorized version of upper triangle from `data`.
+
+ Parameters
+ ----------
+ data : (N, N) array_like
+ Input data
+ k : int, optional
+ Which diagonal to select from (where primary diagonal is 0). Default: 1
+
+ Returns
+ -------
+ triu : (N * N-1 / 2) numpy.ndarray
+ Upper triangle of `data`
+
+ Examples
+ --------
+ >>> from netneurotools import networks
+
+ >>> X = np.array([[1, 0.5, 0.25], [0.5, 1, 0.33], [0.25, 0.33, 1]])
+ >>> tri = networks.get_triu(X)
+ >>> tri
+ array([0.5 , 0.25, 0.33])
+ """
+ return data[np.triu_indices(len(data), k=k)].copy()
+
+
+def binarize_network(network, retain=10, keep_diag=False):
+ """
+ Keep top `retain` % of connections in `network` and binarizes.
+
+ Uses the upper triangle for determining connection percentage, which may
+ result in disconnected nodes. If this behavior is not desired see
+ :py:func:`netneurotools.networks.threshold_network`.
+
+ Parameters
+ ----------
+ network : (N, N) array_like
+ Input graph
+ retain : [0, 100] float, optional
+ Percent connections to retain. Default: 10
+ keep_diag : bool, optional
+ Whether to keep the diagonal instead of setting it to 0. Default: False
+
+ Returns
+ -------
+ binarized : (N, N) numpy.ndarray
+ Binarized, thresholded graph
+
+ See Also
+ --------
+ netneurotools.networks.threshold_network
+ """
+ if retain < 0 or retain > 100:
+ raise ValueError(
+ f'Value provided for `retain` is outside [0, 100]: {retain}'
+ )
+
+ prctile = 100 - retain
+ triu = get_triu(network)
+ thresh = np.percentile(triu, prctile, axis=0, keepdims=True)
+ binarized = np.array(network > thresh, dtype=int)
+
+ if not keep_diag:
+ binarized[np.diag_indices(len(binarized))] = 0
+
+ return binarized
+
+
+def threshold_network(network, retain=10):
+ """
+ Keep top `retain` % of connections in `network` and binarizes.
+
+ Uses a minimum spanning tree to ensure that no nodes are disconnected from
+ the resulting thresholded graph
+
+ Parameters
+ ----------
+ network : (N, N) array_like
+ Input graph
+ retain : [0, 100] float, optional
+ Percent connections to retain. Default: 10
+
+ Returns
+ -------
+ thresholded : (N, N) numpy.ndarray
+ Binarized, thresholded graph
+
+ See Also
+ --------
+ netneurotools.networks.binarize_network
+ """
+ if retain < 0 or retain > 100:
+ raise ValueError(
+ f'Value provided for `retain` must be a percent '
+ f'in range [0, 100]. Provided: {retain}'
+ )
+
+ # get number of nodes in graph and invert weights (MINIMUM spanning tree)
+ nodes = len(network)
+ graph = np.triu(network * -1)
+
+ # find MST and count # of edges in graph
+ mst = csgraph.minimum_spanning_tree(graph).todense()
+ mst_edges = np.sum(mst != 0)
+
+ # determine # of remaining edges and ensure we're not over the limit
+ remain = int((retain / 100) * ((nodes * (nodes - 1)) / 2)) - mst_edges
+ if remain < 0:
+ raise ValueError(
+ f'Minimum spanning tree with {mst_edges} edges exceeds desired '
+ f'connection density of {retain}% ({remain + mst_edges} edges). Cannot '
+ f'proceed with graph creation.'
+ )
+
+ # zero out edges already in MST and then get indices of next best edges
+ graph -= mst
+ inds = get_triu(graph).argsort()[:remain]
+ inds = tuple(e[inds] for e in np.triu_indices_from(graph, k=1))
+
+ # add edges to MST, symmetrize, and convert to binary matrix
+ mst[inds] = graph[inds]
+ mst = np.array((mst + mst.T) != 0, dtype=int)
+
+ return mst
diff --git a/netneurotools/networks/randomize.py b/netneurotools/networks/randomize.py
new file mode 100644
index 0000000..9778ea7
--- /dev/null
+++ b/netneurotools/networks/randomize.py
@@ -0,0 +1,873 @@
+"""Functions for generating randomized networks."""
+
+import bct
+import numpy as np
+from tqdm import tqdm
+from sklearn.utils.validation import check_random_state
+
+try:
+ from numba import njit
+
+ use_numba = True
+except ImportError:
+ use_numba = False
+
+
+def randmio_und(W, itr):
+ """
+ Optimized version of randmio_und.
+
+ This function randomizes an undirected network, while preserving the
+ degree distribution. The function does not preserve the strength
+ distribution in weighted networks.
+
+ This function is significantly faster if numba is enabled, because
+ the main overhead is `np.random.randint`, see `here `_
+
+ Parameters
+ ----------
+ W : (N, N) array-like
+ Undirected binary/weighted connection matrix
+ itr : int
+ rewiring parameter. Each edge is rewired approximately itr times.
+
+ Returns
+ -------
+ W : (N, N) array-like
+ Randomized network
+ eff : int
+ number of actual rewirings carried out
+ """ # noqa: E501
+ W = W.copy()
+ n = len(W)
+ i, j = np.where(np.triu(W > 0, 1))
+ k = len(i)
+ itr *= k
+
+ # maximum number of rewiring attempts per iteration
+ max_attempts = np.round(n * k / (n * (n - 1)))
+ # actual number of successful rewirings
+ eff = 0
+
+ for _ in range(int(itr)):
+ att = 0
+ while att <= max_attempts: # while not rewired
+ while True:
+ e1, e2 = np.random.randint(k), np.random.randint(k)
+ while e1 == e2:
+ e2 = np.random.randint(k)
+ a, b = i[e1], j[e1]
+ c, d = i[e2], j[e2]
+
+ if a != c and a != d and b != c and b != d:
+ break # all 4 vertices must be different
+
+ # flip edge c-d with 50% probability
+ # to explore all potential rewirings
+ if np.random.random() > 0.5:
+ i[e2], j[e2] = d, c
+ c, d = d, c
+
+ # rewiring condition
+ # not flipped
+ # a--b a b
+ # TO X
+ # c--d c d
+ # if flipped
+ # a--b a--b a b
+ # TO TO X
+ # c--d d--c d c
+ if not (W[a, d] or W[c, b]):
+ W[a, d] = W[a, b]
+ W[a, b] = 0
+ W[d, a] = W[b, a]
+ W[b, a] = 0
+ W[c, b] = W[c, d]
+ W[c, d] = 0
+ W[b, c] = W[d, c]
+ W[d, c] = 0
+
+ j[e1] = d
+ j[e2] = b # reassign edge indices
+ eff += 1
+ break
+ att += 1
+
+ return W, eff
+
+
+if use_numba:
+ randmio_und = njit(randmio_und)
+
+
+def match_length_degree_distribution(
+ W, D, nbins=10, nswap=1000, replacement=False, weighted=True, seed=None
+):
+ """
+ Generate degree- and edge length-preserving surrogate connectomes.
+
+ Parameters
+ ----------
+ W : (N, N) array-like
+ weighted or binary symmetric connectivity matrix.
+ D : (N, N) array-like
+ symmetric distance matrix.
+ nbins : int
+ number of distance bins (edge length matrix is performed by swapping connections
+ in the same bin). Default = 10.
+ nswap : int
+ total number of edge swaps to perform. Recommended = nnodes * 20.
+ Default = 1000.
+ replacement : bool, optional
+ if True all the edges are available for swapping. Default = False.
+ weighted : bool, optional
+ if True the function returns a weighted matrix. Default = True.
+ seed : float, optional
+ Random seed. Default = None
+
+ Returns
+ -------
+ newB : (N, N) array-like
+ binary rewired matrix
+ newW: (N, N) array-like
+ weighted rewired matrix. Returns matrix of zeros if weighted=False.
+ nr : int
+ number of successful rewires
+
+ Notes
+ -----
+ Takes a weighted, symmetric connectivity matrix `data` and Euclidean/fiber
+ length matrix `distance` and generates a randomized network with:
+ 1. exactly the same degree sequence
+ 2. approximately the same edge length distribution
+ 3. exactly the same edge weight distribution
+ 4. approximately the same weight-length relationship
+
+ Reference
+ ---------
+ Betzel, R. F., Bassett, D. S. (2018) Specificity and robustness of
+ long-distance connections in weighted, interareal connectomes. PNAS.
+ """
+ rs = check_random_state(seed)
+ N = len(W)
+ # divide the distances by lengths
+ bins = np.linspace(D[D.nonzero()].min(), D[D.nonzero()].max(), nbins + 1)
+ bins[-1] += 1
+ L = np.zeros((N, N))
+ for n in range(nbins):
+ i, j = np.where(np.logical_and(bins[n] <= D, D < bins[n + 1]))
+ L[i, j] = n + 1
+
+ # binarized connectivity
+ B = (W != 0).astype(np.int_)
+
+ # existing edges (only upper triangular cause it's symmetric)
+ cn_x, cn_y = np.where(np.triu((B != 0) * B, k=1))
+
+ tries = 0
+ nr = 0
+ newB = np.copy(B)
+
+ while (len(cn_x) >= 2) & (nr < nswap):
+ # choose randomly the edge to be rewired
+ r = rs.randint(len(cn_x))
+ n_x, n_y = cn_x[r], cn_y[r]
+ tries += 1
+
+ # options to rewire with
+ # connected nodes that doesn't involve (n_x, n_y)
+ index = (cn_x != n_x) & (cn_y != n_y) & (cn_y != n_x) & (cn_x != n_y)
+ if len(np.where(index)[0]) == 0:
+ cn_x = np.delete(cn_x, r)
+ cn_y = np.delete(cn_y, r)
+
+ else:
+ ops1_x, ops1_y = cn_x[index], cn_y[index]
+ # options that will preserve the distances
+ # (ops1_x, ops1_y) such that
+ # L(n_x,n_y) = L(n_x, ops1_x) & L(ops1_x,ops1_y) = L(n_y, ops1_y)
+ index = (L[n_x, n_y] == L[n_x, ops1_x]) & (
+ L[ops1_x, ops1_y] == L[n_y, ops1_y]
+ )
+ if len(np.where(index)[0]) == 0:
+ cn_x = np.delete(cn_x, r)
+ cn_y = np.delete(cn_y, r)
+
+ else:
+ ops2_x, ops2_y = ops1_x[index], ops1_y[index]
+ # options of edges that didn't exist before
+ index = [
+ (newB[min(n_x, ops2_x[i])][max(n_x, ops2_x[i])] == 0)
+ & (newB[min(n_y, ops2_y[i])][max(n_y, ops2_y[i])] == 0)
+ for i in range(len(ops2_x))
+ ]
+ if len(np.where(index)[0]) == 0:
+ cn_x = np.delete(cn_x, r)
+ cn_y = np.delete(cn_y, r)
+
+ else:
+ ops3_x, ops3_y = ops2_x[index], ops2_y[index]
+
+ # choose randomly one edge from the final options
+ r1 = rs.randint(len(ops3_x))
+ nn_x, nn_y = ops3_x[r1], ops3_y[r1]
+
+ # Disconnect the existing edges
+ newB[n_x, n_y] = 0
+ newB[nn_x, nn_y] = 0
+ # Connect the new edges
+ newB[min(n_x, nn_x), max(n_x, nn_x)] = 1
+ newB[min(n_y, nn_y), max(n_y, nn_y)] = 1
+ # one successfull rewire!
+ nr += 1
+
+ # rewire with replacement
+ if replacement:
+ cn_x[r], cn_y[r] = min(n_x, nn_x), max(n_x, nn_x)
+ index = np.where((cn_x == nn_x) & (cn_y == nn_y))[0]
+ cn_x[index], cn_y[index] = min(n_y, nn_y), max(n_y, nn_y)
+ # rewire without replacement
+ else:
+ cn_x = np.delete(cn_x, r)
+ cn_y = np.delete(cn_y, r)
+ index = np.where((cn_x == nn_x) & (cn_y == nn_y))[0]
+ cn_x = np.delete(cn_x, index)
+ cn_y = np.delete(cn_y, index)
+
+ if nr < nswap:
+ print(f"I didn't finish, out of rewirable edges: {len(cn_x)}")
+
+ i, j = np.triu_indices(N, k=1)
+ # Make the connectivity matrix symmetric
+ newB[j, i] = newB[i, j]
+
+ # check the number of edges is preserved
+ if len(np.where(B != 0)[0]) != len(np.where(newB != 0)[0]):
+ print(
+ f"ERROR --- number of edges changed, \
+ B:{len(np.where(B != 0)[0])}, newB:{len(np.where(newB != 0)[0])}"
+ )
+ # check that the degree of the nodes it's the same
+ for i in range(N):
+ if np.sum(B[i]) != np.sum(newB[i]):
+ print(
+ f"ERROR --- node {i} changed k by: \
+ {np.sum(B[i]) - np.sum(newB[i])}"
+ )
+
+ newW = np.zeros((N, N))
+ if weighted:
+ # Reassign the weights
+ mask = np.triu(B != 0, k=1)
+ inids = D[mask]
+ iniws = W[mask]
+ inids_index = np.argsort(inids)
+ # Weights from the shortest to largest edges
+ iniws = iniws[inids_index]
+ mask = np.triu(newB != 0, k=1)
+ finds = D[mask]
+ i, j = np.where(mask)
+ # Sort the new edges from the shortest to the largest
+ finds_index = np.argsort(finds)
+ i_sort = i[finds_index]
+ j_sort = j[finds_index]
+ # Assign the initial sorted weights
+ newW[i_sort, j_sort] = iniws
+ # Make it symmetrical
+ newW[j_sort, i_sort] = iniws
+
+ return newB, newW, nr
+
+
+def strength_preserving_rand_sa(
+ A,
+ rewiring_iter=10,
+ nstage=100,
+ niter=10000,
+ temp=1000,
+ frac=0.5,
+ energy_type="sse",
+ energy_func=None,
+ R=None,
+ connected=None,
+ verbose=False,
+ seed=None,
+):
+ """
+ Strength-preserving network randomization using simulated annealing.
+
+ Randomize an undirected weighted network, while preserving
+ the degree and strength sequences using simulated annealing.
+
+ This function allows for a flexible choice of energy function.
+
+ Parameters
+ ----------
+ A : (N, N) array-like
+ Undirected weighted connectivity matrix
+ rewiring_iter : int, optional
+ Rewiring parameter. Default = 10.
+ Each edge is rewired approximately rewiring_iter times.
+ nstage : int, optional
+ Number of annealing stages. Default = 100.
+ niter : int, optional
+ Number of iterations per stage. Default = 10000.
+ temp : float, optional
+ Initial temperature. Default = 1000.
+ frac : float, optional
+ Fractional decrease in temperature per stage. Default = 0.5.
+ energy_type: str, optional
+ Energy function to minimize. Can be either:
+ 'sse': Sum of squared errors between strength sequence vectors
+ of the original network and the randomized network
+ 'max': Maximum absolute error
+ 'mae': Mean absolute error
+ 'mse': Mean squared error
+ 'rmse': Root mean squared error
+ Default = 'sse'.
+ energy_func: callable, optional
+ Callable with two positional arguments corresponding to
+ two strength sequence numpy arrays that returns an energy value.
+ Overwrites “energy_type”.
+ See “energy_type” for specifying a predefined energy type instead.
+ R : (N, N) array-like, optional
+ Pre-randomized connectivity matrix.
+ If None, a rewired connectivity matrix is generated using the
+ Maslov & Sneppen algorithm.
+ Default = None.
+ connected: bool, optional
+ Whether to ensure connectedness of the randomized network.
+ By default, this is inferred from data.
+ verbose: bool, optional
+ Whether to print status to screen at the end of every stage.
+ Default = False.
+ seed: float, optional
+ Random seed. Default = None.
+
+ Returns
+ -------
+ B : (N, N) array-like
+ Randomized connectivity matrix
+ min_energy : float
+ Minimum energy obtained by annealing
+
+ Notes
+ -----
+ Uses Maslov & Sneppen rewiring model to produce a
+ surrogate connectivity matrix, B, with the same
+ size, density, and degree sequence as A.
+ The weights are then permuted to optimize the
+ match between the strength sequences of A and B
+ using simulated annealing.
+
+ This function is adapted from a function written in MATLAB
+ by Richard Betzel.
+
+ References
+ ----------
+ Misic, B. et al. (2015) Cooperative and Competitive Spreading Dynamics
+ on the Human Connectome. Neuron.
+ Milisav, F. et al. (2024) A simulated annealing algorithm for
+ randomizing weighted networks.
+ """
+ try:
+ A = np.asarray(A)
+ except TypeError as err:
+ msg = "A must be array_like. Received: {}.".format(type(A))
+ raise TypeError(msg) from err
+
+ if frac > 1 or frac <= 0:
+ msg = "frac must be between 0 and 1. " "Received: {}.".format(frac)
+ raise ValueError(msg)
+
+ rs = check_random_state(seed)
+
+ n = A.shape[0]
+ s = np.sum(A, axis=1) # strengths of A
+
+ # Maslov & Sneppen rewiring
+ if R is None:
+ # ensuring connectedness if the original network is connected
+ if connected is None:
+ connected = False if bct.number_of_components(A) > 1 else True
+ if connected:
+ B = bct.randmio_und_connected(A, rewiring_iter, seed=seed)[0]
+ else:
+ B = bct.randmio_und(A, rewiring_iter, seed=seed)[0]
+ else:
+ B = R.copy()
+
+ u, v = np.triu(B, k=1).nonzero() # upper triangle indices
+ wts = np.triu(B, k=1)[(u, v)] # upper triangle values
+ m = len(wts)
+ sb = np.sum(B, axis=1) # strengths of B
+
+ if energy_func is not None:
+ energy = energy_func(s, sb)
+ elif energy_type == "sse":
+ energy = np.sum((s - sb) ** 2)
+ elif energy_type == "max":
+ energy = np.max(np.abs(s - sb))
+ elif energy_type == "mae":
+ energy = np.mean(np.abs(s - sb))
+ elif energy_type == "mse":
+ energy = np.mean((s - sb) ** 2)
+ elif energy_type == "rmse":
+ energy = np.sqrt(np.mean((s - sb) ** 2))
+ else:
+ msg = (
+ "energy_type must be one of 'sse', 'max', "
+ "'mae', 'mse', or 'rmse'. Received: {}.".format(energy_type)
+ )
+ raise ValueError(msg)
+
+ energymin = energy
+ wtsmin = wts.copy()
+
+ if verbose:
+ print("\ninitial energy {:.5f}".format(energy))
+
+ for istage in tqdm(range(nstage), desc="annealing progress"):
+ naccept = 0
+ for _ in range(niter):
+ # permutation
+ e1 = rs.randint(m)
+ e2 = rs.randint(m)
+
+ a, b = u[e1], v[e1]
+ c, d = u[e2], v[e2]
+
+ sb_prime = sb.copy()
+ sb_prime[[a, b]] = sb_prime[[a, b]] - wts[e1] + wts[e2]
+ sb_prime[[c, d]] = sb_prime[[c, d]] + wts[e1] - wts[e2]
+
+ if energy_func is not None:
+ energy_prime = energy_func(sb_prime, s)
+ elif energy_type == "sse":
+ energy_prime = np.sum((sb_prime - s) ** 2)
+ elif energy_type == "max":
+ energy_prime = np.max(np.abs(sb_prime - s))
+ elif energy_type == "mae":
+ energy_prime = np.mean(np.abs(sb_prime - s))
+ elif energy_type == "mse":
+ energy_prime = np.mean((sb_prime - s) ** 2)
+ elif energy_type == "rmse":
+ energy_prime = np.sqrt(np.mean((sb_prime - s) ** 2))
+ else:
+ msg = (
+ "energy_type must be one of 'sse', 'max', "
+ "'mae', 'mse', or 'rmse'. "
+ "Received: {}.".format(energy_type)
+ )
+ raise ValueError(msg)
+
+ # permutation acceptance criterion
+ if energy_prime < energy or rs.rand() < np.exp(
+ -(energy_prime - energy) / temp
+ ):
+ sb = sb_prime.copy()
+ wts[[e1, e2]] = wts[[e2, e1]]
+ energy = energy_prime
+ if energy < energymin:
+ energymin = energy
+ wtsmin = wts.copy()
+ naccept = naccept + 1
+
+ # temperature update
+ temp = temp * frac
+ if verbose:
+ print(
+ "\nstage {:d}, temp {:.5f}, best energy {:.5f}, "
+ "frac of accepted moves {:.3f}".format(
+ istage, temp, energymin, naccept / niter
+ )
+ )
+
+ B = np.zeros((n, n))
+ B[(u, v)] = wtsmin
+ B = B + B.T
+
+ return B, energymin
+
+
+def strength_preserving_rand_sa_mse_opt(
+ A,
+ rewiring_iter=10,
+ nstage=100,
+ niter=10000,
+ temp=1000,
+ frac=0.5,
+ R=None,
+ connected=None,
+ verbose=False,
+ seed=None,
+):
+ """
+ Strength-preserving network randomization using simulated annealing.
+
+ Randomize an undirected weighted network, while preserving
+ the degree and strength sequences using simulated annealing.
+
+ This function has been optimized for speed but only allows the
+ mean squared error energy function.
+
+ Parameters
+ ----------
+ A : (N, N) array-like
+ Undirected weighted connectivity matrix
+ rewiring_iter : int, optional
+ Rewiring parameter. Default = 10.
+ Each edge is rewired approximately rewiring_iter times.
+ nstage : int, optional
+ Number of annealing stages. Default = 100.
+ niter : int, optional
+ Number of iterations per stage. Default = 10000.
+ temp : float, optional
+ Initial temperature. Default = 1000.
+ frac : float, optional
+ Fractional decrease in temperature per stage. Default = 0.5.
+ R : (N, N) array-like, optional
+ Pre-randomized connectivity matrix.
+ If None, a rewired connectivity matrix is generated using the
+ Maslov & Sneppen algorithm.
+ Default = None.
+ connected: bool, optional
+ Whether to ensure connectedness of the randomized network.
+ By default, this is inferred from data.
+ verbose: bool, optional
+ Whether to print status to screen at the end of every stage.
+ Default = False.
+ seed: float, optional
+ Random seed. Default = None.
+
+ Returns
+ -------
+ B : (N, N) array-like
+ Randomized connectivity matrix
+ min_energy : float
+ Minimum energy obtained by annealing
+
+ Notes
+ -----
+ Uses Maslov & Sneppen rewiring model to produce a
+ surrogate connectivity matrix, B, with the same
+ size, density, and degree sequence as A.
+ The weights are then permuted to optimize the
+ match between the strength sequences of A and B
+ using simulated annealing.
+
+ This function is adapted from a function written in MATLAB
+ by Richard Betzel and was optimized by Vincent Bazinet.
+
+ References
+ ----------
+ Misic, B. et al. (2015) Cooperative and Competitive Spreading Dynamics
+ on the Human Connectome. Neuron.
+ Milisav, F. et al. (2024) A simulated annealing algorithm for
+ randomizing weighted networks.
+ """
+ try:
+ A = np.asarray(A)
+ except TypeError as err:
+ msg = "A must be array_like. Received: {}.".format(type(A))
+ raise TypeError(msg) from err
+
+ if frac > 1 or frac <= 0:
+ msg = "frac must be between 0 and 1. " "Received: {}.".format(frac)
+ raise ValueError(msg)
+
+ rs = check_random_state(seed)
+
+ n = A.shape[0]
+ s = np.sum(A, axis=1) # strengths of A
+
+ # Maslov & Sneppen rewiring
+ if R is None:
+ # ensuring connectedness if the original network is connected
+ if connected is None:
+ connected = False if bct.number_of_components(A) > 1 else True
+ if connected:
+ B = bct.randmio_und_connected(A, rewiring_iter, seed=seed)[0]
+ else:
+ B = bct.randmio_und(A, rewiring_iter, seed=seed)[0]
+ else:
+ B = R.copy()
+
+ u, v = np.triu(B, k=1).nonzero() # upper triangle indices
+ wts = np.triu(B, k=1)[(u, v)] # upper triangle values
+ m = len(wts)
+ sb = np.sum(B, axis=1) # strengths of B
+
+ energy = np.mean((s - sb) ** 2)
+
+ energymin = energy
+ wtsmin = wts.copy()
+
+ if verbose:
+ print("\ninitial energy {:.5f}".format(energy))
+
+ for istage in tqdm(range(nstage), desc="annealing progress"):
+ naccept = 0
+ for (e1, e2), prob in zip(rs.randint(m, size=(niter, 2)), rs.rand(niter)):
+ # permutation
+ a, b, c, d = u[e1], v[e1], u[e2], v[e2]
+ wts_change = wts[e1] - wts[e2]
+ delta_energy = (
+ 2
+ * wts_change
+ * (
+ 2 * wts_change
+ + (s[a] - sb[a])
+ + (s[b] - sb[b])
+ - (s[c] - sb[c])
+ - (s[d] - sb[d])
+ )
+ ) / n
+
+ # permutation acceptance criterion
+ if delta_energy < 0 or prob < np.e ** (-(delta_energy) / temp):
+ sb[[a, b]] -= wts_change
+ sb[[c, d]] += wts_change
+ wts[[e1, e2]] = wts[[e2, e1]]
+
+ energy = np.mean((sb - s) ** 2)
+
+ if energy < energymin:
+ energymin = energy
+ wtsmin = wts.copy()
+ naccept = naccept + 1
+
+ # temperature update
+ temp = temp * frac
+ if verbose:
+ print(
+ "\nstage {:d}, temp {:.5f}, best energy {:.5f}, "
+ "frac of accepted moves {:.3f}".format(
+ istage, temp, energymin, naccept / niter
+ )
+ )
+
+ B = np.zeros((n, n))
+ B[(u, v)] = wtsmin
+ B = B + B.T
+
+ return B, energymin
+
+
+def strength_preserving_rand_sa_dir(
+ A,
+ rewiring_iter=10,
+ nstage=100,
+ niter=10000,
+ temp=1000,
+ frac=0.5,
+ energy_type="sse",
+ energy_func=None,
+ connected=True,
+ verbose=False,
+ seed=None,
+):
+ """
+ Strength-preserving network randomization using simulated annealing.
+
+ Randomize a directed weighted network, while preserving
+ the in- and out-degree and strength sequences using simulated annealing.
+
+ Parameters
+ ----------
+ A : (N, N) array-like
+ Directed weighted connectivity matrix
+ rewiring_iter : int, optional
+ Rewiring parameter. Default = 10.
+ Each edge is rewired approximately rewiring_iter times.
+ nstage : int, optional
+ Number of annealing stages. Default = 100.
+ niter : int, optional
+ Number of iterations per stage. Default = 10000.
+ temp : float, optional
+ Initial temperature. Default = 1000.
+ frac : float, optional
+ Fractional decrease in temperature per stage. Default = 0.5.
+ energy_type: str, optional
+ Energy function to minimize. Can be either:
+ 'sse': Sum of squared errors between strength sequence vectors
+ of the original network and the randomized network
+ 'max': Maximum absolute error
+ 'mae': Mean absolute error
+ 'mse': Mean squared error
+ 'rmse': Root mean squared error
+ Default = 'sse'.
+ energy_func: callable, optional
+ Callable with two positional arguments corresponding to
+ two strength sequence numpy arrays that returns an energy value.
+ Overwrites “energy_type”.
+ See “energy_type” for specifying a predefined energy type instead.
+ connected: bool, optional
+ Whether to ensure connectedness of the randomized network.
+ Default = True.
+ verbose: bool, optional
+ Whether to print status to screen at the end of every stage.
+ Default = False.
+ seed: float, optional
+ Random seed. Default = None.
+
+ Returns
+ -------
+ B : (N, N) array-like
+ Randomized connectivity matrix
+ min_energy : float
+ Minimum energy obtained by annealing
+
+ Notes
+ -----
+ Uses Maslov & Sneppen rewiring model to produce a
+ surrogate connectivity matrix, B, with the same
+ size, density, and in- and out-degree sequences as A.
+ The weights are then permuted to optimize the
+ match between the strength sequences of A and B
+ using simulated annealing.
+ Both in- and out-strengths are preserved.
+
+ This function is adapted from a function written in MATLAB
+ by Richard Betzel.
+
+ References
+ ----------
+ Misic, B. et al. (2015) Cooperative and Competitive Spreading Dynamics
+ on the Human Connectome. Neuron.
+ Rubinov, M. (2016) Constraints and spandrels of interareal connectomes.
+ Nature Communications.
+ Milisav, F. et al. (2024) A simulated annealing algorithm for
+ randomizing weighted networks.
+ """
+ try:
+ A = np.asarray(A)
+ except TypeError as err:
+ msg = "A must be array_like. Received: {}.".format(type(A))
+ raise TypeError(msg) from err
+
+ if frac > 1 or frac <= 0:
+ msg = "frac must be between 0 and 1. " "Received: {}.".format(frac)
+ raise ValueError(msg)
+
+ rs = check_random_state(seed)
+
+ n = A.shape[0]
+ s_in = np.sum(A, axis=0) # in-strengths of A
+ s_out = np.sum(A, axis=1) # out-strengths of A
+
+ # Maslov & Sneppen rewiring
+ if connected:
+ B = bct.randmio_dir_connected(A, rewiring_iter, seed=seed)[0]
+ else:
+ B = bct.randmio_dir(A, rewiring_iter, seed=seed)[0]
+
+ u, v = B.nonzero() # nonzero indices of B
+ wts = B[(u, v)] # nonzero values of B
+ m = len(wts)
+ sb_in = np.sum(B, axis=0) # in-strengths of B
+ sb_out = np.sum(B, axis=1) # out-strengths of B
+
+ if energy_func is not None:
+ energy = energy_func(s_in, sb_in) + energy_func(s_out, sb_out)
+ elif energy_type == "sse":
+ energy = np.sum((s_in - sb_in) ** 2) + np.sum((s_out - sb_out) ** 2)
+ elif energy_type == "max":
+ energy = np.max(np.abs(s_in - sb_in)) + np.max(np.abs(s_out - sb_out))
+ elif energy_type == "mae":
+ energy = np.mean(np.abs(s_in - sb_in)) + np.mean(np.abs(s_out - sb_out))
+ elif energy_type == "mse":
+ energy = np.mean((s_in - sb_in) ** 2) + np.mean((s_out - sb_out) ** 2)
+ elif energy_type == "rmse":
+ energy = np.sqrt(np.mean((s_in - sb_in) ** 2)) + np.sqrt(
+ np.mean((s_out - sb_out) ** 2)
+ )
+ else:
+ msg = (
+ "energy_type must be one of 'sse', 'max', "
+ "'mae', 'mse', or 'rmse'. Received: {}.".format(energy_type)
+ )
+ raise ValueError(msg)
+
+ energymin = energy
+ wtsmin = wts.copy()
+
+ if verbose:
+ print("\ninitial energy {:.5f}".format(energy))
+
+ for istage in tqdm(range(nstage), desc="annealing progress"):
+ naccept = 0
+ for _ in range(niter):
+ # permutation
+ e1 = rs.randint(m)
+ e2 = rs.randint(m)
+
+ a, b = u[e1], v[e1]
+ c, d = u[e2], v[e2]
+
+ sb_prime_in = sb_in.copy()
+ sb_prime_out = sb_out.copy()
+ sb_prime_in[b] = sb_prime_in[b] - wts[e1] + wts[e2]
+ sb_prime_out[a] = sb_prime_out[a] - wts[e1] + wts[e2]
+ sb_prime_in[d] = sb_prime_in[d] - wts[e2] + wts[e1]
+ sb_prime_out[c] = sb_prime_out[c] - wts[e2] + wts[e1]
+
+ if energy_func is not None:
+ energy_prime = energy_func(sb_prime_in, s_in) + energy_func(
+ sb_prime_out, s_out
+ )
+ elif energy_type == "sse":
+ energy_prime = np.sum((sb_prime_in - s_in) ** 2) + np.sum(
+ (sb_prime_out - s_out) ** 2
+ )
+ elif energy_type == "max":
+ energy_prime = np.max(np.abs(sb_prime_in - s_in)) + np.max(
+ np.abs(sb_prime_out - s_out)
+ )
+ elif energy_type == "mae":
+ energy_prime = np.mean(np.abs(sb_prime_in - s_in)) + np.mean(
+ np.abs(sb_prime_out - s_out)
+ )
+ elif energy_type == "mse":
+ energy_prime = np.mean((sb_prime_in - s_in) ** 2) + np.mean(
+ (sb_prime_out - s_out) ** 2
+ )
+ elif energy_type == "rmse":
+ energy_prime = np.sqrt(np.mean((sb_prime_in - s_in) ** 2)) + np.sqrt(
+ np.mean((sb_prime_out - s_out) ** 2)
+ )
+ else:
+ msg = (
+ "energy_type must be one of 'sse', 'max', "
+ "'mae', 'mse', or 'rmse'. "
+ "Received: {}.".format(energy_type)
+ )
+ raise ValueError(msg)
+
+ # permutation acceptance criterion
+ if energy_prime < energy or rs.rand() < np.exp(
+ -(energy_prime - energy) / temp
+ ):
+ sb_in = sb_prime_in.copy()
+ sb_out = sb_prime_out.copy()
+ wts[[e1, e2]] = wts[[e2, e1]]
+ energy = energy_prime
+ if energy < energymin:
+ energymin = energy
+ wtsmin = wts.copy()
+ naccept = naccept + 1
+
+ # temperature update
+ temp = temp * frac
+ if verbose:
+ print(
+ "\nstage {:d}, temp {:.5f}, best energy {:.5f}, "
+ "frac of accepted moves {:.3f}".format(
+ istage, temp, energymin, naccept / niter
+ )
+ )
+
+ B = np.zeros((n, n))
+ B[(u, v)] = wtsmin
+
+ return B, energymin
diff --git a/netneurotools/networks/tests/__init__.py b/netneurotools/networks/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/netneurotools/networks/tests/test_consensus.py b/netneurotools/networks/tests/test_consensus.py
new file mode 100644
index 0000000..2c0fc23
--- /dev/null
+++ b/netneurotools/networks/tests/test_consensus.py
@@ -0,0 +1 @@
+"""For testing netneurotools.networks.consensus functionality."""
diff --git a/netneurotools/networks/tests/test_generative.py b/netneurotools/networks/tests/test_generative.py
new file mode 100644
index 0000000..223d4bb
--- /dev/null
+++ b/netneurotools/networks/tests/test_generative.py
@@ -0,0 +1 @@
+"""For testing netneurotools.networks.generative functionality."""
diff --git a/netneurotools/networks/tests/test_networks_utils.py b/netneurotools/networks/tests/test_networks_utils.py
new file mode 100644
index 0000000..7bbcfef
--- /dev/null
+++ b/netneurotools/networks/tests/test_networks_utils.py
@@ -0,0 +1,12 @@
+"""For testing netneurotools.networks.networks_utils functionality."""
+
+import numpy as np
+
+from netneurotools import networks
+
+
+def test_get_triu():
+ """Test that get_triu returns correct values."""
+ arr = np.arange(9).reshape(3, 3)
+ assert np.all(networks.get_triu(arr) == np.array([1, 2, 5]))
+ assert np.all(networks.get_triu(arr, k=0) == np.array([0, 1, 2, 4, 5, 8]))
diff --git a/netneurotools/networks/tests/test_randomize.py b/netneurotools/networks/tests/test_randomize.py
new file mode 100644
index 0000000..c2a4be2
--- /dev/null
+++ b/netneurotools/networks/tests/test_randomize.py
@@ -0,0 +1 @@
+"""For testing netneurotools.networks.randomize functionality."""
diff --git a/netneurotools/plotting/__init__.py b/netneurotools/plotting/__init__.py
new file mode 100644
index 0000000..2124fe3
--- /dev/null
+++ b/netneurotools/plotting/__init__.py
@@ -0,0 +1,34 @@
+"""Functions for making pretty plots and whatnot."""
+
+
+from .pysurfer_plotters import (
+ plot_conte69, plot_fslr, plot_fsaverage, plot_fsvertex
+)
+
+
+from .pyvista_plotters import (
+ pv_plot_surface
+)
+
+
+from .mpl_plotters import (
+ _grid_communities, _sort_communities,
+ plot_point_brain, plot_mod_heatmap,
+)
+
+
+from .color_utils import (
+ available_cmaps
+)
+
+__all__ = [
+ # pysurfer_plotters
+ 'plot_conte69', 'plot_fslr', 'plot_fsaverage', 'plot_fsvertex',
+ # pyvista_plotters
+ 'pv_plot_surface',
+ # mpl_plotters
+ '_grid_communities', '_sort_communities',
+ 'plot_point_brain', 'plot_mod_heatmap',
+ # color_utils
+ 'available_cmaps'
+]
diff --git a/netneurotools/colors.py b/netneurotools/plotting/color_utils.py
similarity index 98%
rename from netneurotools/colors.py
rename to netneurotools/plotting/color_utils.py
index 753c601..f9fb3d0 100644
--- a/netneurotools/colors.py
+++ b/netneurotools/plotting/color_utils.py
@@ -1,5 +1,4 @@
-# -*- coding: utf-8 -*-
-"""Useful colormaps."""
+"""Functions for working with colors and colormaps."""
import matplotlib
from matplotlib.colors import LinearSegmentedColormap, ListedColormap
diff --git a/netneurotools/plotting/mpl_plotters.py b/netneurotools/plotting/mpl_plotters.py
new file mode 100644
index 0000000..e5637dd
--- /dev/null
+++ b/netneurotools/plotting/mpl_plotters.py
@@ -0,0 +1,293 @@
+"""Functions for matplotlib-based plotting."""
+
+from typing import Iterable
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+
+
+def _grid_communities(communities):
+ """
+ Generate boundaries of `communities`.
+
+ Parameters
+ ----------
+ communities : array_like
+ Community assignment vector
+
+ Returns
+ -------
+ bounds : list
+ Boundaries of communities
+ """
+ communities = np.asarray(communities)
+ if 0 in communities:
+ communities = communities + 1
+
+ comm = communities[np.argsort(communities)]
+ bounds = []
+ for i in np.unique(comm):
+ ind = np.where(comm == i)
+ if len(ind) > 0:
+ bounds.append(np.min(ind))
+
+ bounds.append(len(communities))
+
+ return bounds
+
+
+def _sort_communities(consensus, communities):
+ """
+ Sort `communities` in `consensus` according to strength.
+
+ Parameters
+ ----------
+ consensus : array_like
+ Correlation matrix
+ communities : array_like
+ Community assignments for `consensus`
+
+ Returns
+ -------
+ inds : np.ndarray
+ Index array for sorting `consensus`
+ """
+ communities = np.asarray(communities)
+ if 0 in communities:
+ communities = communities + 1
+
+ bounds = _grid_communities(communities)
+ inds = np.argsort(communities)
+
+ for n, f in enumerate(bounds[:-1]):
+ i = inds[f:bounds[n + 1]]
+ cco = i[consensus[np.ix_(i, i)].mean(axis=1).argsort()[::-1]]
+ inds[f:bounds[n + 1]] = cco
+
+ return inds
+
+
+def plot_mod_heatmap(data, communities, *, inds=None, edgecolor='black',
+ ax=None, figsize=(6.4, 4.8), xlabels=None, ylabels=None,
+ xlabelrotation=90, ylabelrotation=0, cbar=True,
+ square=True, xticklabels=None, yticklabels=None,
+ mask_diagonal=True, **kwargs):
+ """
+ Plot `data` as heatmap with borders drawn around `communities`.
+
+ Parameters
+ ----------
+ data : (N, N) array_like
+ Correlation matrix
+ communities : (N,) array_like
+ Community assignments for `data`
+ inds : (N,) array_like, optional
+ Index array for sorting `data` within `communities`. If None, these
+ will be generated from `data`. Default: None
+ edgecolor : str, optional
+ Color for lines demarcating community boundaries. Default: 'black'
+ ax : matplotlib.axes.Axes, optional
+ Axis on which to plot the heatmap. If none provided, a new figure and
+ axis will be created. Default: None
+ figsize : tuple, optional
+ Size of figure to create if `ax` is not provided. Default: (20, 20)
+ {x,y}labels : list, optional
+ List of labels on {x,y}-axis for each community in `communities`. The
+ number of labels should match the number of unique communities.
+ Default: None
+ {x,y}labelrotation : float, optional
+ Angle of the rotation of the labels. Available only if `{x,y}labels`
+ provided. Default : xlabelrotation: 90, ylabelrotation: 0
+ square : bool, optional
+ Setting the matrix with equal aspect. Default: True
+ {x,y}ticklabels : list, optional
+ Incompatible with `{x,y}labels`. List of labels for each entry (not
+ community) in `data`. Default: None
+ cbar : bool, optional
+ Whether to plot colorbar. Default: True
+ mask_diagonal : bool, optional
+ Whether to mask the diagonal in the plotted heatmap. Default: True
+ kwargs : key-value mapping
+ Keyword arguments for `plt.pcolormesh()`
+
+ Returns
+ -------
+ ax : matplotlib.axes.Axes
+ Axis object containing plot
+ """
+ for t, label in zip([xticklabels, yticklabels], [xlabels, ylabels]):
+ if t is not None and label is not None:
+ raise ValueError('Cannot set both {x,y}labels and {x,y}ticklabels')
+
+ # get indices for sorting consensus
+ if inds is None:
+ inds = _sort_communities(data, communities)
+
+ if ax is None:
+ _, ax = plt.subplots(1, 1, figsize=figsize)
+
+ # plot data re-ordered based on community and node strength
+ if mask_diagonal:
+ plot_data = np.ma.masked_where(np.eye(len(data)),
+ data[np.ix_(inds, inds)])
+ else:
+ plot_data = data[np.ix_(inds, inds)]
+
+ coll = ax.pcolormesh(plot_data, edgecolor='none', **kwargs)
+ ax.set(xlim=(0, plot_data.shape[1]), ylim=(0, plot_data.shape[0]))
+
+ # set equal aspect
+ if square:
+ ax.set_aspect('equal')
+
+ for side in ['top', 'right', 'left', 'bottom']:
+ ax.spines[side].set_visible(False)
+
+ # invert the y-axis so it looks "as expected"
+ ax.invert_yaxis()
+
+ # plot the colorbar
+ if cbar:
+ cb = ax.figure.colorbar(coll)
+ if kwargs.get('rasterized', False):
+ cb.solids.set_rasterized(True)
+
+ # draw borders around communities
+ bounds = _grid_communities(communities)
+ bounds[0] += 0.2
+ bounds[-1] -= 0.2
+ for n, edge in enumerate(np.diff(bounds)):
+ ax.add_patch(mpatches.Rectangle((bounds[n], bounds[n]),
+ edge, edge, fill=False, linewidth=2,
+ edgecolor=edgecolor))
+
+ if xlabels is not None or ylabels is not None:
+ # find the tick locations
+ initloc = _grid_communities(communities)
+ tickloc = []
+ for loc in range(len(initloc) - 1):
+ tickloc.append(np.mean((initloc[loc], initloc[loc + 1])))
+
+ if xlabels is not None:
+ # make sure number of labels match the number of ticks
+ if len(tickloc) != len(xlabels):
+ raise ValueError('Number of labels do not match the number of '
+ 'unique communities.')
+ else:
+ ax.set_xticks(tickloc)
+ ax.set_xticklabels(labels=xlabels, rotation=xlabelrotation)
+ ax.tick_params(left=False, bottom=False)
+ if ylabels is not None:
+ # make sure number of labels match the number of ticks
+ if len(tickloc) != len(ylabels):
+ raise ValueError('Number of labels do not match the number of '
+ 'unique communities.')
+ else:
+ ax.set_yticks(tickloc)
+ ax.set_yticklabels(labels=ylabels, rotation=ylabelrotation)
+ ax.tick_params(left=False, bottom=False)
+
+ if xticklabels is not None:
+ labels_ind = [xticklabels[i] for i in inds]
+ ax.set_xticks(np.arange(len(labels_ind)) + 0.5)
+ ax.set_xticklabels(labels_ind, rotation=90)
+ if yticklabels is not None:
+ labels_ind = [yticklabels[i] for i in inds]
+ ax.set_yticks(np.arange(len(labels_ind)) + 0.5)
+ ax.set_yticklabels(labels_ind)
+
+ return ax
+
+
+def plot_point_brain(data, coords, views=None, views_orientation='vertical',
+ views_size=(4, 2.4), cbar=False, robust=True, size=50,
+ **kwargs):
+ """
+ Plot `data` as a cloud of points in 3D space based on specified `coords`.
+
+ Parameters
+ ----------
+ data : (N,) array_like
+ Data for an `N` node parcellation; determines color of points
+ coords : (N, 3) array_like
+ x, y, z coordinates for `N` node parcellation
+ views : list, optional
+ List specifying which views to use. Can be any of {'sagittal', 'sag',
+ 'coronal', 'cor', 'axial', 'ax'}. If not specified will use 'sagittal'
+ and 'axial'. Default: None
+ views_orientation: str, optional
+ Orientation of the views. Can be either 'vertical' or 'horizontal'.
+ Default: 'vertical'.
+ views_size : tuple, optional
+ Figure size of each view. Default: (4, 2.4)
+ cbar : bool, optional
+ Whether to also show colorbar. Default: False
+ robust : bool, optional
+ Whether to use robust calculation of `vmin` and `vmax` for color scale.
+ size : int, optional
+ Size of points on plot. Default: 50
+ **kwargs
+ Key-value pairs passed to `matplotlib.axes.Axis.scatter`
+
+ Returns
+ -------
+ fig : :class:`matplotlib.figure.Figure`
+ """
+ _views = dict(sagittal=(0, 180), sag=(0, 180),
+ axial=(90, 180), ax=(90, 180),
+ coronal=(0, 90), cor=(0, 90))
+
+ x, y, z = coords[:, 0], coords[:, 1], coords[:, 2]
+
+ if views is None:
+ views = [_views[f] for f in ['sagittal', 'axial']]
+ else:
+ if not isinstance(views, Iterable) or isinstance(views, str):
+ views = [views]
+ views = [_views[f] for f in views]
+
+ if views_orientation == 'vertical':
+ ncols, nrows = 1, len(views)
+ elif views_orientation == 'horizontal':
+ ncols, nrows = len(views), 1
+ figsize = (ncols * views_size[0], nrows * views_size[1])
+
+ # create figure and axes (3d projections)
+ fig, axes = plt.subplots(ncols=ncols, nrows=nrows,
+ figsize=figsize,
+ subplot_kw=dict(projection='3d'))
+
+ opts = dict(linewidth=0.5, edgecolor='gray', cmap='viridis')
+ if robust:
+ vmin, vmax = np.percentile(data, [2.5, 97.5])
+ opts.update(dict(vmin=vmin, vmax=vmax))
+ opts.update(kwargs)
+
+ # iterate through saggital/axial views and plot, rotating as needed
+ for n, view in enumerate(views):
+ # if only one view then axes is not a list!
+ ax = axes[n] if len(views) > 1 else axes
+ # make the actual scatterplot and update the view / aspect ratios
+ col = ax.scatter(x, y, z, c=data, s=size, **opts)
+ ax.view_init(*view)
+ ax.axis('off')
+ scaling = np.array([ax.get_xlim(),
+ ax.get_ylim(),
+ ax.get_zlim()])
+ ax.set_box_aspect(tuple(scaling[:, 1] - scaling[:, 0]))
+
+ fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0, wspace=0)
+
+ # add colorbar to axes
+ if cbar:
+ cbar = fig.colorbar(col, ax=axes.flatten(),
+ drawedges=False, shrink=0.7)
+ cbar.outline.set_linewidth(0)
+
+ return fig
+
+
+def plot_simple_brain():
+ """Plot a simple brain using matplotlib."""
+ # https://github.com/dutchconnectomelab/Simple-Brain-Plot
+ pass
diff --git a/netneurotools/plotting.py b/netneurotools/plotting/pysurfer_plotters.py
similarity index 61%
rename from netneurotools/plotting.py
rename to netneurotools/plotting/pysurfer_plotters.py
index 3886547..50e830b 100644
--- a/netneurotools/plotting.py
+++ b/netneurotools/plotting/pysurfer_plotters.py
@@ -1,209 +1,10 @@
-# -*- coding: utf-8 -*-
-"""Functions for making pretty plots and whatnot."""
+"""Functions for pysurfer-based plotting."""
import os
-from typing import Iterable
-
-import matplotlib.patches as patches
-import matplotlib.pyplot as plt
-from mpl_toolkits.mplot3d import Axes3D # noqa
-import nibabel as nib
import numpy as np
+import nibabel as nib
-from .freesurfer import FSIGNORE, _decode_list
-
-
-def _grid_communities(communities):
- """
- Generate boundaries of `communities`.
-
- Parameters
- ----------
- communities : array_like
- Community assignment vector
-
- Returns
- -------
- bounds : list
- Boundaries of communities
- """
- communities = np.asarray(communities)
- if 0 in communities:
- communities = communities + 1
-
- comm = communities[np.argsort(communities)]
- bounds = []
- for i in np.unique(comm):
- ind = np.where(comm == i)
- if len(ind) > 0:
- bounds.append(np.min(ind))
-
- bounds.append(len(communities))
-
- return bounds
-
-
-def sort_communities(consensus, communities):
- """
- Sort `communities` in `consensus` according to strength.
-
- Parameters
- ----------
- consensus : array_like
- Correlation matrix
- communities : array_like
- Community assignments for `consensus`
-
- Returns
- -------
- inds : np.ndarray
- Index array for sorting `consensus`
- """
- communities = np.asarray(communities)
- if 0 in communities:
- communities = communities + 1
-
- bounds = _grid_communities(communities)
- inds = np.argsort(communities)
-
- for n, f in enumerate(bounds[:-1]):
- i = inds[f:bounds[n + 1]]
- cco = i[consensus[np.ix_(i, i)].mean(axis=1).argsort()[::-1]]
- inds[f:bounds[n + 1]] = cco
-
- return inds
-
-
-def plot_mod_heatmap(data, communities, *, inds=None, edgecolor='black',
- ax=None, figsize=(6.4, 4.8), xlabels=None, ylabels=None,
- xlabelrotation=90, ylabelrotation=0, cbar=True,
- square=True, xticklabels=None, yticklabels=None,
- mask_diagonal=True, **kwargs):
- """
- Plot `data` as heatmap with borders drawn around `communities`.
-
- Parameters
- ----------
- data : (N, N) array_like
- Correlation matrix
- communities : (N,) array_like
- Community assignments for `data`
- inds : (N,) array_like, optional
- Index array for sorting `data` within `communities`. If None, these
- will be generated from `data`. Default: None
- edgecolor : str, optional
- Color for lines demarcating community boundaries. Default: 'black'
- ax : matplotlib.axes.Axes, optional
- Axis on which to plot the heatmap. If none provided, a new figure and
- axis will be created. Default: None
- figsize : tuple, optional
- Size of figure to create if `ax` is not provided. Default: (20, 20)
- {x,y}labels : list, optional
- List of labels on {x,y}-axis for each community in `communities`. The
- number of labels should match the number of unique communities.
- Default: None
- {x,y}labelrotation : float, optional
- Angle of the rotation of the labels. Available only if `{x,y}labels`
- provided. Default : xlabelrotation: 90, ylabelrotation: 0
- square : bool, optional
- Setting the matrix with equal aspect. Default: True
- {x,y}ticklabels : list, optional
- Incompatible with `{x,y}labels`. List of labels for each entry (not
- community) in `data`. Default: None
- cbar : bool, optional
- Whether to plot colorbar. Default: True
- mask_diagonal : bool, optional
- Whether to mask the diagonal in the plotted heatmap. Default: True
- kwargs : key-value mapping
- Keyword arguments for `plt.pcolormesh()`
-
- Returns
- -------
- ax : matplotlib.axes.Axes
- Axis object containing plot
- """
- for t, label in zip([xticklabels, yticklabels], [xlabels, ylabels]):
- if t is not None and label is not None:
- raise ValueError('Cannot set both {x,y}labels and {x,y}ticklabels')
-
- # get indices for sorting consensus
- if inds is None:
- inds = sort_communities(data, communities)
-
- if ax is None:
- fig, ax = plt.subplots(1, 1, figsize=figsize)
-
- # plot data re-ordered based on community and node strength
- if mask_diagonal:
- plot_data = np.ma.masked_where(np.eye(len(data)),
- data[np.ix_(inds, inds)])
- else:
- plot_data = data[np.ix_(inds, inds)]
-
- coll = ax.pcolormesh(plot_data, edgecolor='none', **kwargs)
- ax.set(xlim=(0, plot_data.shape[1]), ylim=(0, plot_data.shape[0]))
-
- # set equal aspect
- if square:
- ax.set_aspect('equal')
-
- for side in ['top', 'right', 'left', 'bottom']:
- ax.spines[side].set_visible(False)
-
- # invert the y-axis so it looks "as expected"
- ax.invert_yaxis()
-
- # plot the colorbar
- if cbar:
- cb = ax.figure.colorbar(coll)
- if kwargs.get('rasterized', False):
- cb.solids.set_rasterized(True)
-
- # draw borders around communities
- bounds = _grid_communities(communities)
- bounds[0] += 0.2
- bounds[-1] -= 0.2
- for n, edge in enumerate(np.diff(bounds)):
- ax.add_patch(patches.Rectangle((bounds[n], bounds[n]),
- edge, edge, fill=False, linewidth=2,
- edgecolor=edgecolor))
-
- if xlabels is not None or ylabels is not None:
- # find the tick locations
- initloc = _grid_communities(communities)
- tickloc = []
- for loc in range(len(initloc) - 1):
- tickloc.append(np.mean((initloc[loc], initloc[loc + 1])))
-
- if xlabels is not None:
- # make sure number of labels match the number of ticks
- if len(tickloc) != len(xlabels):
- raise ValueError('Number of labels do not match the number of '
- 'unique communities.')
- else:
- ax.set_xticks(tickloc)
- ax.set_xticklabels(labels=xlabels, rotation=xlabelrotation)
- ax.tick_params(left=False, bottom=False)
- if ylabels is not None:
- # make sure number of labels match the number of ticks
- if len(tickloc) != len(ylabels):
- raise ValueError('Number of labels do not match the number of '
- 'unique communities.')
- else:
- ax.set_yticks(tickloc)
- ax.set_yticklabels(labels=ylabels, rotation=ylabelrotation)
- ax.tick_params(left=False, bottom=False)
-
- if xticklabels is not None:
- labels_ind = [xticklabels[i] for i in inds]
- ax.set_xticks(np.arange(len(labels_ind)) + 0.5)
- ax.set_xticklabels(labels_ind, rotation=90)
- if yticklabels is not None:
- labels_ind = [yticklabels[i] for i in inds]
- ax.set_yticks(np.arange(len(labels_ind)) + 0.5)
- ax.set_yticklabels(labels_ind)
-
- return ax
+from ..datasets import FREESURFER_IGNORE, _get_freesurfer_subjid
def plot_conte69(data, lhlabel, rhlabel, surf='midthickness',
@@ -319,7 +120,7 @@ def plot_fslr(data, lhlabel, rhlabel, surf_atlas='conte69',
scene : mayavi.Scene
Scene object containing plot
"""
- from .datasets import fetch_conte69, fetch_yerkes19
+ from ..datasets import fetch_conte69, fetch_yerkes19
try:
from mayavi import mlab
except ImportError:
@@ -388,44 +189,6 @@ def plot_fslr(data, lhlabel, rhlabel, surf_atlas='conte69',
return lhplot, rhplot
-def _get_fs_subjid(subject_id, subjects_dir=None):
- """
- Get fsaverage version `subject_id`, fetching if required.
-
- Parameters
- ----------
- subject_id : str
- FreeSurfer subject ID
- subjects_dir : str, optional
- Path to FreeSurfer subject directory. If not set, will inherit from
- the environmental variable $SUBJECTS_DIR. Default: None
-
- Returns
- -------
- subject_id : str
- FreeSurfer subject ID
- subjects_dir : str
- Path to subject directory with `subject_id`
- """
- from netneurotools.utils import check_fs_subjid
-
- # check for FreeSurfer install w/fsaverage; otherwise, fetch required
- try:
- subject_id, subjects_dir = check_fs_subjid(subject_id, subjects_dir)
- except FileNotFoundError:
- if 'fsaverage' not in subject_id:
- raise ValueError('Provided subject {} does not exist in provided '
- 'subjects_dir {}'
- .format(subject_id, subjects_dir)) from None
- from netneurotools.datasets import fetch_fsaverage
- from netneurotools.datasets.utils import _get_data_dir
- fetch_fsaverage(subject_id)
- subjects_dir = os.path.join(_get_data_dir(), 'tpl-fsaverage')
- subject_id, subjects_dir = check_fs_subjid(subject_id, subjects_dir)
-
- return subject_id, subjects_dir
-
-
def plot_fsaverage(data, *, lhannot, rhannot, order='lr', mask=None,
noplot=None, subject_id='fsaverage', subjects_dir=None,
vmin=None, vmax=None, **kwargs):
@@ -503,7 +266,11 @@ def plot_fsaverage(data, *, lhannot, rhannot, order='lr', mask=None,
... rhannot=schaefer.rh) # doctest: +SKIP
"""
- subject_id, subjects_dir = _get_fs_subjid(subject_id, subjects_dir)
+ def _decode_list(vals):
+ """List decoder."""
+ return [val.decode() if hasattr(val, 'decode') else val for val in vals]
+
+ subject_id, subjects_dir = _get_freesurfer_subjid(subject_id, subjects_dir)
# cast data to float (required for NaNs)
data = np.asarray(data, dtype='float')
@@ -521,7 +288,7 @@ def plot_fsaverage(data, *, lhannot, rhannot, order='lr', mask=None,
vmax = np.nanpercentile(data, 97.5)
# parcels that should not be included in parcellation
- drop = FSIGNORE.copy()
+ drop = FREESURFER_IGNORE.copy()
if noplot is not None:
if isinstance(noplot, str):
noplot = [noplot]
@@ -533,7 +300,7 @@ def plot_fsaverage(data, *, lhannot, rhannot, order='lr', mask=None,
# loads annotation data for hemisphere, including vertex `labels`!
if not annot.startswith(os.path.abspath(os.sep)):
annot = os.path.join(subjects_dir, subject_id, 'label', annot)
- labels, ctab, names = nib.freesurfer.read_annot(annot)
+ labels, _, names = nib.freesurfer.read_annot(annot)
names = _decode_list(names)
# get appropriate data, accounting for hemispheric asymmetry
@@ -637,7 +404,7 @@ def plot_fsvertex(data, *, order='lr', surf='pial', views='lat',
raise ImportError('Cannot use plot_fsaverage() if pysurfer is not '
'installed. Please install pysurfer and try again.') from None
- subject_id, subjects_dir = _get_fs_subjid(subject_id, subjects_dir)
+ subject_id, subjects_dir = _get_freesurfer_subjid(subject_id, subjects_dir)
# cast data to float (required for NaNs)
data = np.asarray(data, dtype='float')
@@ -712,91 +479,3 @@ def plot_fsvertex(data, *, order='lr', surf='pial', views='lat',
surf[n].render()
return brain
-
-
-def plot_point_brain(data, coords, views=None, views_orientation='vertical',
- views_size=(4, 2.4), cbar=False, robust=True, size=50,
- **kwargs):
- """
- Plot `data` as a cloud of points in 3D space based on specified `coords`.
-
- Parameters
- ----------
- data : (N,) array_like
- Data for an `N` node parcellation; determines color of points
- coords : (N, 3) array_like
- x, y, z coordinates for `N` node parcellation
- views : list, optional
- List specifying which views to use. Can be any of {'sagittal', 'sag',
- 'coronal', 'cor', 'axial', 'ax'}. If not specified will use 'sagittal'
- and 'axial'. Default: None
- views_orientation: str, optional
- Orientation of the views. Can be either 'vertical' or 'horizontal'.
- Default: 'vertical'.
- views_size : tuple, optional
- Figure size of each view. Default: (4, 2.4)
- cbar : bool, optional
- Whether to also show colorbar. Default: False
- robust : bool, optional
- Whether to use robust calculation of `vmin` and `vmax` for color scale.
- size : int, optional
- Size of points on plot. Default: 50
- **kwargs
- Key-value pairs passed to `matplotlib.axes.Axis.scatter`
-
- Returns
- -------
- fig : :class:`matplotlib.figure.Figure`
- """
- _views = dict(sagittal=(0, 180), sag=(0, 180),
- axial=(90, 180), ax=(90, 180),
- coronal=(0, 90), cor=(0, 90))
-
- x, y, z = coords[:, 0], coords[:, 1], coords[:, 2]
-
- if views is None:
- views = [_views[f] for f in ['sagittal', 'axial']]
- else:
- if not isinstance(views, Iterable) or isinstance(views, str):
- views = [views]
- views = [_views[f] for f in views]
-
- if views_orientation == 'vertical':
- ncols, nrows = 1, len(views)
- elif views_orientation == 'horizontal':
- ncols, nrows = len(views), 1
- figsize = (ncols * views_size[0], nrows * views_size[1])
-
- # create figure and axes (3d projections)
- fig, axes = plt.subplots(ncols=ncols, nrows=nrows,
- figsize=figsize,
- subplot_kw=dict(projection='3d'))
-
- opts = dict(linewidth=0.5, edgecolor='gray', cmap='viridis')
- if robust:
- vmin, vmax = np.percentile(data, [2.5, 97.5])
- opts.update(dict(vmin=vmin, vmax=vmax))
- opts.update(kwargs)
-
- # iterate through saggital/axial views and plot, rotating as needed
- for n, view in enumerate(views):
- # if only one view then axes is not a list!
- ax = axes[n] if len(views) > 1 else axes
- # make the actual scatterplot and update the view / aspect ratios
- col = ax.scatter(x, y, z, c=data, s=size, **opts)
- ax.view_init(*view)
- ax.axis('off')
- scaling = np.array([ax.get_xlim(),
- ax.get_ylim(),
- ax.get_zlim()])
- ax.set_box_aspect(tuple(scaling[:, 1] - scaling[:, 0]))
-
- fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0, wspace=0)
-
- # add colorbar to axes
- if cbar:
- cbar = fig.colorbar(col, ax=axes.flatten(),
- drawedges=False, shrink=0.7)
- cbar.outline.set_linewidth(0)
-
- return fig
diff --git a/netneurotools/plotting/pyvista_plotters.py b/netneurotools/plotting/pyvista_plotters.py
new file mode 100644
index 0000000..e711a5d
--- /dev/null
+++ b/netneurotools/plotting/pyvista_plotters.py
@@ -0,0 +1,423 @@
+"""Functions for pyvista-based plotting."""
+
+from pathlib import Path
+
+import nibabel as nib
+import numpy as np
+
+try:
+ import pyvista as pv
+except ImportError:
+ _has_pyvista = False
+else:
+ _has_pyvista = True
+
+from netneurotools.datasets import (
+ fetch_civet_curated,
+ fetch_fsaverage_curated,
+ fetch_fslr_curated,
+)
+
+
+def _pv_fetch_template(template, surf="inflated", data_dir=None, verbose=0):
+ if template in ["fsaverage", "fsaverage6", "fsaverage5", "fsaverage4"]:
+ _fetch_curr_tpl = fetch_fsaverage_curated
+ elif template in ["fslr4k", "fslr8k", "fslr32k", "fslr164k"]:
+ _fetch_curr_tpl = fetch_fslr_curated
+ elif template in ["civet41k", "civet164k"]:
+ _fetch_curr_tpl = fetch_civet_curated
+ else:
+ raise ValueError(f"Unknown template: {template}")
+
+ curr_tpl_surf = _fetch_curr_tpl(
+ version=template, data_dir=data_dir, verbose=verbose
+ )[surf]
+
+ return curr_tpl_surf
+
+
+def _pv_make_surface(template, surf="inflated", hemi=None, data_dir=None, verbose=0):
+ curr_tpl_surf = _pv_fetch_template(
+ template=template, surf=surf, data_dir=data_dir, verbose=verbose
+ )
+
+ def _gifti_to_polydata(gifti_file):
+ vertices, faces = nib.load(gifti_file).agg_data()
+ return pv.PolyData(
+ vertices, np.c_[np.ones((faces.shape[0],), dtype=int) * 3, faces]
+ )
+
+ if hemi == "L":
+ return _gifti_to_polydata(curr_tpl_surf.L)
+ elif hemi == "R":
+ return _gifti_to_polydata(curr_tpl_surf.R)
+ else:
+ return (
+ _gifti_to_polydata(curr_tpl_surf.L),
+ _gifti_to_polydata(curr_tpl_surf.R),
+ )
+
+
+def _mask_medial_wall(data, template, hemi=None, data_dir=None, verbose=0):
+ curr_medial = _pv_fetch_template(
+ template=template, surf="medial", data_dir=data_dir, verbose=verbose
+ )
+ if isinstance(data, tuple):
+ curr_medial_data = (
+ nib.load(curr_medial.L).agg_data(),
+ nib.load(curr_medial.R).agg_data(),
+ )
+ ret_L = data[0].copy()
+ ret_R = data[1].copy()
+ ret_L[np.where(1 - curr_medial_data[0])] = np.nan
+ ret_R[np.where(1 - curr_medial_data[1])] = np.nan
+ ret = (ret_L, ret_R)
+ else:
+ if hemi == "L":
+ curr_medial_data = nib.load(curr_medial.L).agg_data()
+ elif hemi == "R":
+ curr_medial_data = nib.load(curr_medial.R).agg_data()
+ else:
+ curr_medial_data = np.concatenate(
+ [
+ nib.load(curr_medial.L).agg_data(),
+ nib.load(curr_medial.R).agg_data(),
+ ],
+ axis=1,
+ )
+ ret = data.copy()
+ ret[np.where(1 - curr_medial_data)] = np.nan
+ return ret
+
+
+def pv_plot_surface(
+ vertex_data,
+ template,
+ surf="inflated",
+ hemi="both",
+ layout="default",
+ mask_medial=True,
+ cmap="viridis",
+ clim=None,
+ zoom_ratio=1.0,
+ show_colorbar=True,
+ cbar_title=None,
+ show_plot=True,
+ jupyter_backend="html",
+ lighting_style="default",
+ save_fig=None,
+ plotter_kws=None,
+ mesh_kws=None,
+ cbar_kws=None,
+ silhouette_kws=None,
+ data_dir=None,
+ verbose=0,
+):
+ """
+ Plot surface data using PyVista.
+
+ Parameters
+ ----------
+ vertex_data : array-like or tuple of array-like
+ Data array(s) to be plotted on the surface. If `hemi` is "both", this
+ should be a tuple of two arrays. Otherwise, a single array.
+ template : str
+ Template to use for plotting. Options include 'fsaverage', 'fsaverage6',
+ 'fsaverage5', 'fsaverage4', 'fslr4k', 'fslr8k', 'fslr32k', 'fslr164k',
+ 'civet41k', 'civet164k'.
+ surf : str, optional
+ Surface to plot. Default is 'inflated'.
+ hemi : str, optional
+ Hemisphere to plot. Options include 'L', 'R', 'both'. Default is 'both'.
+ layout : str, optional
+ Layout of the plot. Options include 'default', 'single', 'row', 'column'.
+ Default is 'default'.
+ mask_medial : bool, optional
+ Mask medial wall. Default is True.
+ cmap : str, optional
+ Colormap to use. Default is 'viridis'.
+ clim : tuple, optional
+ Colorbar limits. If None, will be set to 2.5th and 97.5th percentiles.
+ Default is None.
+ zoom_ratio : float, optional
+ Zoom ratio for the camera. Default is 1.0.
+ show_colorbar : bool, optional
+ Whether to show the colorbar. Default is True.
+ cbar_title : str, optional
+ Title for the colorbar. Default is None.
+ show_plot : bool, optional
+ Whether to show the plot. Default is True.
+ jupyter_backend : str, optional
+ Jupyter backend to use. See `PyVista documentation
+ `_
+ for more details. Default is 'html'.
+ lighting_style : str, optional
+ Lighting style to use. Options include 'default', 'lightkit', 'threelights',
+ 'silhouette', 'metallic', 'plastic', 'shiny', 'glossy', 'ambient', 'plain'.
+ Default is 'default'.
+ save_fig : str or Path, optional
+ Path (include file name) to save the figure. Default is None.
+
+ Returns
+ -------
+ pl : PyVista.Plotter
+ PyVista plotter object.
+
+ Other Parameters
+ ----------------
+ plotter_kws : dict, optional
+ Additional keyword arguments to pass to the `PyVista plotter
+ `_.
+ Default is None.
+ mesh_kws : dict, optional
+ Additional keyword arguments to pass to the `PyVista mesh
+ `_.
+ Default is None.
+ cbar_kws : dict, optional
+ Additional keyword arguments to pass to the `PyVista colorbar
+ `_.
+ Default is None.
+ silhouette_kws : dict, optional
+ Additional keyword arguments to pass to the `PyVista silhouette
+ `_.
+ Default is None.
+ data_dir : str or Path, optional
+ Path to use as data directory. If not specified, will check for
+ environmental variable 'NNT_DATA'; if that is not set, will use
+ `~/nnt-data` instead. Default: None
+ verbose : int, optional
+ Modifies verbosity of download, where higher numbers mean more updates.
+ Default: 0
+ """
+ if not _has_pyvista:
+ raise ImportError("PyVista is required for this function")
+
+ # setup data
+ # could be a single array or a tuple of two arrays
+ if hemi == "both": # both hemispheres
+ surf_pair = _pv_make_surface(
+ template=template, surf=surf, data_dir=data_dir, verbose=verbose
+ )
+ if len(vertex_data) == 2: # tuple or list of two arrays
+ # check if data length matches number of vertices
+ if not all(len(vertex_data[i]) == surf_pair[i].n_points for i in range(2)):
+ raise ValueError("Data length mismatch")
+ else: # combined array
+ # check if data length matches number of vertices
+ if len(vertex_data) != surf_pair[0].n_points + surf_pair[1].n_points:
+ raise ValueError("Data length mismatch")
+ # convert long array to tuple
+ vertex_data = (
+ vertex_data[: surf_pair[0].n_points],
+ vertex_data[surf_pair[0].n_points :],
+ )
+
+ if mask_medial:
+ vertex_data = _mask_medial_wall(
+ vertex_data, template, hemi=None, data_dir=data_dir, verbose=verbose
+ )
+ surf_pair[0].point_data["vertex_data"] = vertex_data[0]
+ surf_pair[1].point_data["vertex_data"] = vertex_data[1]
+ elif hemi in ["L", "R"]:
+ # single hemisphere
+ surf = _pv_make_surface(
+ template=template, surf=surf, hemi=hemi, data_dir=data_dir, verbose=verbose
+ )
+ if len(vertex_data) != surf.n_points:
+ raise ValueError("Data length mismatch")
+
+ if mask_medial:
+ vertex_data = _mask_medial_wall(
+ vertex_data, template, hemi=hemi, data_dir=data_dir, verbose=verbose
+ )
+ surf.point_data["vertex_data"] = vertex_data
+ else:
+ raise ValueError(f"Unknown hemi: {hemi}")
+
+ # setup plotter shape based on layout
+ if layout == "default":
+ if hemi == "both":
+ plotter_shape = (2, 2)
+ else:
+ plotter_shape = (1, 2)
+ elif layout == "single":
+ plotter_shape = (1, 1)
+ elif layout == "row":
+ if hemi == "both":
+ plotter_shape = (1, 4)
+ else:
+ plotter_shape = (2, 1)
+ elif layout == "column":
+ if hemi == "both":
+ plotter_shape = (4, 1)
+ else:
+ plotter_shape = (1, 2)
+ else:
+ raise ValueError(f"Unknown layout: {layout}")
+
+ # setup color limits
+ if clim is not None:
+ _vmin, _vmax = clim
+ else:
+ if len(vertex_data) == 2:
+ _values = np.c_[vertex_data[0], vertex_data[1]]
+ else:
+ _values = vertex_data
+ _vmin, _vmax = np.nanpercentile(_values, [2.5, 97.5])
+
+ # default plotter settings
+ plotter_settings = dict(
+ window_size=(350 * plotter_shape[1], 250 * plotter_shape[0]),
+ border=False,
+ lighting="three lights",
+ )
+ # notebook plotting
+ if jupyter_backend is not None:
+ plotter_settings.update(dict(notebook=True, off_screen=True))
+
+ # default mesh settings
+ mesh_settings = dict(
+ scalars="vertex_data",
+ smooth_shading=True,
+ cmap=cmap,
+ clim=(_vmin, _vmax),
+ show_scalar_bar=False,
+ )
+
+ # lighting styles
+ lighting_style_keys = ["ambient", "diffuse", "specular", "specular_power"]
+ lighting_style_presets = {
+ "metallic": [0.1, 0.3, 1.0, 10],
+ "plastic": [0.3, 0.4, 0.3, 5],
+ "shiny": [0.2, 0.6, 0.8, 50],
+ "glossy": [0.1, 0.7, 0.9, 90],
+ "ambient": [0.8, 0.1, 0.0, 1],
+ "plain": [0.1, 1.0, 0.05, 5],
+ }
+
+ if lighting_style in ["default", "lightkit"]:
+ mesh_settings["lighting"] = "light kit"
+ elif lighting_style == "threelights":
+ mesh_settings["lighting"] = "three lights"
+ elif lighting_style == "silhouette":
+ mesh_settings["lighting"] = "light kit"
+ elif lighting_style in lighting_style_presets.keys():
+ mesh_settings.update(
+ {
+ k: v
+ for k, v in zip(
+ lighting_style_keys, lighting_style_presets[lighting_style]
+ )
+ }
+ )
+ mesh_settings["lighting"] = "light kit"
+ else:
+ raise ValueError(f"Unknown lighting style: {lighting_style}")
+
+ # default colorbar settings
+ cbar_settings = dict(
+ title=cbar_title,
+ n_labels=2,
+ label_font_size=10,
+ title_font_size=12,
+ font_family="arial",
+ height=0.15,
+ )
+
+ # default silhouette settings
+ silhouette_settings = dict(color="white", feature_angle=40)
+
+ # update if provided with custom settings
+ if plotter_kws is not None:
+ plotter_settings.update(plotter_kws)
+ if mesh_kws is not None:
+ mesh_settings.update(mesh_kws)
+ if cbar_kws is not None:
+ cbar_settings.update(cbar_kws)
+ if silhouette_kws is not None:
+ silhouette_settings.update(silhouette_kws)
+
+ pl = pv.Plotter(shape=plotter_shape, **plotter_settings)
+
+ if layout == "single": # single panel (1, 1)
+ if hemi == "both":
+ _surf = surf_pair[0].rotate_z(180)
+ pl.subplot(0, 0)
+ pl.add_mesh(_surf, **mesh_settings)
+ pl.camera_position = "yz"
+ pl.zoom_camera(zoom_ratio)
+ if lighting_style == "silhouette":
+ pl.add_silhouette(_surf, **silhouette_settings)
+ else: # multiple panels
+ if hemi == "both": # both hemi, 4 panels
+ if layout == "default":
+ _pos = [(0, 0), (0, 1), (1, 0), (1, 1)]
+ elif layout == "row":
+ _pos = [(0, 0), (0, 2), (0, 1), (0, 3)]
+ elif layout == "column":
+ _pos = [(0, 0), (2, 0), (1, 0), (3, 0)]
+ else:
+ raise ValueError(f"Unknown layout: {layout}")
+ _surf_list = [
+ surf_pair[0].rotate_z(180),
+ surf_pair[1],
+ surf_pair[0],
+ surf_pair[1].rotate_z(180),
+ ]
+ for _xy, _surf in zip(_pos, _surf_list):
+ pl.subplot(*_xy)
+ pl.add_mesh(_surf, **mesh_settings)
+ pl.camera_position = "yz"
+ pl.zoom_camera(zoom_ratio)
+ if lighting_style == "silhouette":
+ pl.add_silhouette(_surf, **silhouette_settings)
+ else: # single hemi, 2 panels
+ if layout == "default":
+ _pos = [(0, 0), (0, 1)]
+ elif layout == "row":
+ _pos = [(0, 0), (0, 1)]
+ elif layout == "column":
+ _pos = [(0, 0), (1, 0)]
+ else:
+ raise ValueError(f"Unknown layout: {layout}")
+
+ if hemi == "L":
+ _surf_list = [surf.rotate_z(180), surf]
+ else:
+ _surf_list = [surf, surf.rotate_z(180)]
+
+ for _xy, _surf in zip(_pos, _surf_list):
+ pl.subplot(*_xy)
+ pl.add_mesh(_surf, **mesh_settings)
+ pl.camera_position = "yz"
+ pl.zoom_camera(zoom_ratio)
+ if lighting_style == "silhouette":
+ pl.add_silhouette(_surf, **silhouette_settings)
+
+ if show_colorbar:
+ cbar = pl.add_scalar_bar(**cbar_settings)
+ cbar.GetLabelTextProperty().SetItalic(True)
+
+ # setting the headlight (by default applied to all scenes)
+ if lighting_style in ["default", "silhouette"] + list(
+ lighting_style_presets.keys()
+ ):
+ light = pv.Light(light_type="headlight", intensity=0.2)
+ pl.add_light(light)
+
+ if show_plot:
+ if jupyter_backend is not None:
+ pl.show(jupyter_backend=jupyter_backend)
+ else:
+ pl.show()
+
+ if save_fig is not None:
+ _fname = Path(save_fig)
+ if _fname.suffix in [".png", ".jpeg", ".jpg", ".bmp", ".tif", ".tiff"]:
+ pl.screenshot(_fname, return_img=False)
+ elif _fname.suffix in [".svg", ".eps", ".ps", ".pdf", ".tex"]:
+ pl.save_graphic(_fname)
+ else:
+ raise ValueError(f"Unknown file format: {save_fig}")
+
+ return pl
diff --git a/netneurotools/plotting/tests/__init__.py b/netneurotools/plotting/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/netneurotools/plotting/tests/test_color_utils.py b/netneurotools/plotting/tests/test_color_utils.py
new file mode 100644
index 0000000..3bd55c1
--- /dev/null
+++ b/netneurotools/plotting/tests/test_color_utils.py
@@ -0,0 +1,10 @@
+"""For testing netneurotools.plotting.color_utils functionality."""
+
+
+def test_register_cmaps():
+ """Test registering colormaps."""
+ import matplotlib
+ if "justine" in matplotlib.colormaps:
+ assert True
+ else:
+ assert False
diff --git a/netneurotools/plotting/tests/test_mpl.py b/netneurotools/plotting/tests/test_mpl.py
new file mode 100644
index 0000000..1d7d79a
--- /dev/null
+++ b/netneurotools/plotting/tests/test_mpl.py
@@ -0,0 +1,38 @@
+"""For testing netneurotools.plotting.mpl_plotters functionality."""
+
+import numpy as np
+import matplotlib.pyplot as plt
+from netneurotools import plotting
+
+
+def test_grid_communities():
+ """Test _grid_communities function."""
+ comms = np.asarray([0, 0, 0, 0, 1, 1, 1, 1, 2, 2])
+ # check that comms with / without 0 community label yields same output
+ assert np.allclose(plotting._grid_communities(comms), [0, 4, 8, 10])
+ assert np.allclose(plotting._grid_communities(comms + 1), [0, 4, 8, 10])
+
+
+def test_sort_communities():
+ """Test sort_communities function."""
+ data = np.arange(9).reshape(3, 3)
+ comms = np.asarray([0, 0, 2])
+ # check that comms with / without 0 community label yields same output
+ assert np.allclose(plotting._sort_communities(data, comms), [1, 0, 2])
+ assert np.allclose(plotting._sort_communities(data, comms + 1), [1, 0, 2])
+
+
+def test_plot_mod_heatmap():
+ """Test plot_mod_heatmap function."""
+ data = np.random.rand(100, 100)
+ comms = np.random.choice(4, size=(100,))
+ ax = plotting.plot_mod_heatmap(data, comms)
+ assert isinstance(ax, plt.Axes)
+
+
+def test_plot_point_brain():
+ """Test plot_point_brain function."""
+ data = np.random.rand(100)
+ coords = np.random.rand(100, 3)
+ out = plotting.plot_point_brain(data, coords)
+ assert isinstance(out, plt.Figure)
diff --git a/netneurotools/plotting/tests/test_pysurfer.py b/netneurotools/plotting/tests/test_pysurfer.py
new file mode 100644
index 0000000..3133b3c
--- /dev/null
+++ b/netneurotools/plotting/tests/test_pysurfer.py
@@ -0,0 +1,28 @@
+"""For testing netneurotools.plotting.pysurfer_plotters functionality."""
+
+import pytest
+import numpy as np
+from netneurotools import datasets, plotting
+
+
+@pytest.mark.filterwarnings('ignore')
+def test_plot_fsvertex():
+ """Test plotting on a freesurfer vertex."""
+ surfer = pytest.importorskip('surfer')
+
+ data = np.random.rand(20484)
+ brain = plotting.plot_fsvertex(data, subject_id='fsaverage5',
+ offscreen=True)
+ assert isinstance(brain, surfer.Brain)
+
+
+@pytest.mark.filterwarnings('ignore')
+def test_plot_fsaverage():
+ """Test plotting on a freesurfer average brain."""
+ surfer = pytest.importorskip('surfer')
+
+ data = np.random.rand(68)
+ lhannot, rhannot = datasets.fetch_cammoun2012('fsaverage5')['scale033']
+ brain = plotting.plot_fsaverage(data, lhannot=lhannot, rhannot=rhannot,
+ subject_id='fsaverage5', offscreen=True)
+ assert isinstance(brain, surfer.Brain)
diff --git a/netneurotools/plotting/tests/test_pyvista.py b/netneurotools/plotting/tests/test_pyvista.py
new file mode 100644
index 0000000..0b87931
--- /dev/null
+++ b/netneurotools/plotting/tests/test_pyvista.py
@@ -0,0 +1 @@
+"""For testing netneurotools.plotting.pyvista_plotters functionality."""
diff --git a/netneurotools/spatial/__init__.py b/netneurotools/spatial/__init__.py
new file mode 100644
index 0000000..a958655
--- /dev/null
+++ b/netneurotools/spatial/__init__.py
@@ -0,0 +1,12 @@
+"""Functions for handling spatial brain data."""
+
+
+from .spatial_stats import (
+ morans_i, local_morans_i
+)
+
+
+__all__ = [
+ # spatial_stats
+ 'morans_i', 'local_morans_i'
+]
diff --git a/netneurotools/spatial/gaussian_random_field.py b/netneurotools/spatial/gaussian_random_field.py
new file mode 100644
index 0000000..7b40565
--- /dev/null
+++ b/netneurotools/spatial/gaussian_random_field.py
@@ -0,0 +1 @@
+"""Functions for working with Gaussian random fields."""
diff --git a/netneurotools/spatial/spatial_stats.py b/netneurotools/spatial/spatial_stats.py
new file mode 100644
index 0000000..54baddf
--- /dev/null
+++ b/netneurotools/spatial/spatial_stats.py
@@ -0,0 +1,11 @@
+"""Functions for calculating spatial statistics."""
+
+
+def morans_i():
+ """Calculate Moran's I for spatial autocorrelation."""
+ pass
+
+
+def local_morans_i():
+ """Calculate local Moran's I for spatial autocorrelation."""
+ pass
diff --git a/netneurotools/spatial/tests/__init__.py b/netneurotools/spatial/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/netneurotools/spatial/tests/test_grf.py b/netneurotools/spatial/tests/test_grf.py
new file mode 100644
index 0000000..962bdb9
--- /dev/null
+++ b/netneurotools/spatial/tests/test_grf.py
@@ -0,0 +1 @@
+"""For testing netneurotools.spatial.gaussian_random_field functionality."""
diff --git a/netneurotools/spatial/tests/test_spatialstats.py b/netneurotools/spatial/tests/test_spatialstats.py
new file mode 100644
index 0000000..fa9c7f6
--- /dev/null
+++ b/netneurotools/spatial/tests/test_spatialstats.py
@@ -0,0 +1 @@
+"""For testing netneurotools.spatial.spatial_stats functionality."""
diff --git a/netneurotools/stats.py b/netneurotools/stats.py
deleted file mode 100644
index 9367cb7..0000000
--- a/netneurotools/stats.py
+++ /dev/null
@@ -1,1593 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Functions for performing statistical preprocessing and analyses."""
-
-import warnings
-
-import numpy as np
-from tqdm import tqdm
-from itertools import combinations
-from scipy import optimize, spatial, special, stats as sstats
-try: # scipy >= 1.8.0
- from scipy.stats._stats_py import _chk2_asarray
-except ImportError: # scipy < 1.8.0
- from scipy.stats.stats import _chk2_asarray
-from sklearn.utils.validation import check_random_state
-from sklearn.linear_model import LinearRegression
-from joblib import Parallel, delayed
-
-
-from . import utils
-from .metrics import _graph_laplacian
-
-try:
- from numba import njit
- use_numba = True
-except ImportError:
- use_numba = False
-
-
-def residualize(X, Y, Xc=None, Yc=None, normalize=True, add_intercept=True):
- """
- Return residuals of regression equation from `Y ~ X`.
-
- Parameters
- ----------
- X : (N[, R]) array_like
- Coefficient matrix of `R` variables for `N` subjects
- Y : (N[, F]) array_like
- Dependent variable matrix of `F` variables for `N` subjects
- Xc : (M[, R]) array_like, optional
- Coefficient matrix of `R` variables for `M` subjects. If not specified
- then `X` is used to estimate betas. Default: None
- Yc : (M[, F]) array_like, optional
- Dependent variable matrix of `F` variables for `M` subjects. If not
- specified then `Y` is used to estimate betas. Default: None
- normalize : bool, optional
- Whether to normalize (i.e., z-score) residuals. Will use residuals from
- `Yc ~ Xc` for generating mean and variance. Default: True
- add_intercept : bool, optional
- Whether to add intercept to `X` (and `Xc`, if provided). The intercept
- will not be removed, just used in beta estimation. Default: True
-
- Returns
- -------
- Yr : (N, F) numpy.ndarray
- Residuals of `Y ~ X`
-
- Notes
- -----
- If both `Xc` and `Yc` are provided, these are used to calculate betas which
- are then applied to `X` and `Y`.
- """
- if ((Yc is None and Xc is not None) or (Yc is not None and Xc is None)):
- raise ValueError('If processing against a comparative group, you must '
- 'provide both `Xc` and `Yc`.')
-
- X, Y = np.asarray(X), np.asarray(Y)
-
- if Yc is None:
- Xc, Yc = X.copy(), Y.copy()
- else:
- Xc, Yc = np.asarray(Xc), np.asarray(Yc)
-
- # add intercept to regressors if requested and calculate fit
- if add_intercept:
- X, Xc = utils.add_constant(X), utils.add_constant(Xc)
- betas, *rest = np.linalg.lstsq(Xc, Yc, rcond=None)
-
- # remove intercept from regressors and betas for calculation of residuals
- if add_intercept:
- betas = betas[:-1]
- X, Xc = X[:, :-1], Xc[:, :-1]
-
- # calculate residuals
- Yr = Y - (X @ betas)
- Ycr = Yc - (Xc @ betas)
-
- if normalize:
- Yr = sstats.zmap(Yr, compare=Ycr)
-
- return Yr
-
-
-def get_mad_outliers(data, thresh=3.5):
- """
- Determine which samples in `data` are outliers.
-
- Uses the Median Absolute Deviation for determining whether datapoints are
- outliers
-
- Parameters
- ----------
- data : (N, M) array_like
- Data array where `N` is samples and `M` is features
- thresh : float, optional
- Modified z-score. Observations with a modified z-score (based on the
- median absolute deviation) greater than this value will be classified
- as outliers. Default: 3.5
-
- Returns
- -------
- outliers : (N,) numpy.ndarray
- Boolean array where True indicates an outlier
-
- Notes
- -----
- Taken directly from https://stackoverflow.com/a/22357811
-
- References
- ----------
- Boris Iglewicz and David Hoaglin (1993), "Volume 16: How to Detect and
- Handle Outliers", The ASQC Basic References in Quality Control: Statistical
- Techniques, Edward F. Mykytka, Ph.D., Editor.
-
- Examples
- --------
- >>> from netneurotools import stats
-
- Create array with three samples of four features each:
-
- >>> X = np.array([[0, 5, 10, 15], [1, 4, 11, 16], [100, 100, 100, 100]])
- >>> X
- array([[ 0, 5, 10, 15],
- [ 1, 4, 11, 16],
- [100, 100, 100, 100]])
-
- Determine which sample(s) is outlier:
-
- >>> outliers = stats.get_mad_outliers(X)
- >>> outliers
- array([False, False, True])
- """
- data = np.asarray(data)
-
- if data.ndim == 1:
- data = np.vstack(data)
- if data.ndim > 2:
- data = data.reshape(len(data), -1)
-
- median = np.nanmedian(data, axis=0)
- diff = np.nansum((data - median)**2, axis=-1)
- diff = np.sqrt(diff)
- med_abs_deviation = np.median(diff)
-
- modified_z_score = 0.6745 * diff / med_abs_deviation
-
- return modified_z_score > thresh
-
-
-def permtest_1samp(a, popmean, axis=0, n_perm=1000, seed=0):
- """
- Non-parametric equivalent of :py:func:`scipy.stats.ttest_1samp`.
-
- Generates two-tailed p-value for hypothesis of whether `a` differs from
- `popmean` using permutation tests
-
- Parameters
- ----------
- a : array_like
- Sample observations
- popmean : float or array_like
- Expected valued in null hypothesis. If array_like then it must have the
- same shape as `a` excluding the `axis` dimension
- axis : int or None, optional
- Axis along which to compute test. If None, compute over the whole array
- of `a`. Default: 0
- n_perm : int, optional
- Number of permutations to assess. Unless `a` is very small along `axis`
- this will approximate a randomization test via Monte Carlo simulations.
- Default: 1000
- seed : {int, np.random.RandomState instance, None}, optional
- Seed for random number generation. Set to None for "randomness".
- Default: 0
-
- Returns
- -------
- stat : float or numpy.ndarray
- Difference from `popmean`
- pvalue : float or numpy.ndarray
- Non-parametric p-value
-
- Notes
- -----
- Providing multiple values to `popmean` to run *independent* tests in
- parallel is not currently supported.
-
- The lowest p-value that can be returned by this function is equal to 1 /
- (`n_perm` + 1).
-
- Examples
- --------
- >>> from netneurotools import stats
- >>> np.random.seed(7654567) # set random seed for reproducible results
- >>> rvs = np.random.normal(loc=5, scale=10, size=(50, 2))
-
- Test if mean of random sample is equal to true mean, and different mean. We
- reject the null hypothesis in the second case and don't reject it in the
- first case.
-
- >>> stats.permtest_1samp(rvs, 5.0)
- (array([-0.985602 , -0.05204969]), array([0.48551449, 0.95904096]))
- >>> stats.permtest_1samp(rvs, 0.0)
- (array([4.014398 , 4.94795031]), array([0.00699301, 0.000999 ]))
-
- Example using axis and non-scalar dimension for population mean
-
- >>> stats.permtest_1samp(rvs, [5.0, 0.0])
- (array([-0.985602 , 4.94795031]), array([0.48551449, 0.000999 ]))
- >>> stats.permtest_1samp(rvs.T, [5.0, 0.0], axis=1)
- (array([-0.985602 , 4.94795031]), array([0.51548452, 0.000999 ]))
- """
- a, popmean, axis = _chk2_asarray(a, popmean, axis)
- rs = check_random_state(seed)
-
- if a.size == 0:
- return np.nan, np.nan
-
- # ensure popmean will broadcast to `a` correctly
- if popmean.ndim != a.ndim:
- popmean = np.expand_dims(popmean, axis=axis)
-
- # center `a` around `popmean` and calculate original mean
- zeroed = a - popmean
- true_mean = zeroed.mean(axis=axis) / 1
- abs_mean = np.abs(true_mean)
-
- # this for loop is not _the fastest_ but is memory efficient
- # the broadcasting alt. would mean storing zeroed.size * n_perm in memory
- permutations = np.ones(true_mean.shape)
- for _ in range(n_perm):
- flipped = zeroed * rs.choice([-1, 1], size=zeroed.shape) # sign flip
- permutations += np.abs(flipped.mean(axis=axis)) >= abs_mean
-
- pvals = permutations / (n_perm + 1) # + 1 in denom accounts for true_mean
-
- return true_mean, pvals
-
-
-def permtest_rel(a, b, axis=0, n_perm=1000, seed=0):
- """
- Non-parametric equivalent of :py:func:`scipy.stats.ttest_rel`.
-
- Generates two-tailed p-value for hypothesis of whether related samples `a`
- and `b` differ using permutation tests
-
- Parameters
- ----------
- a, b : array_like
- Sample observations. These arrays must have the same shape.
- axis : int or None, optional
- Axis along which to compute test. If None, compute over whole arrays
- of `a` and `b`. Default: 0
- n_perm : int, optional
- Number of permutations to assess. Unless `a` and `b` are very small
- along `axis` this will approximate a randomization test via Monte
- Carlo simulations. Default: 1000
- seed : {int, np.random.RandomState instance, None}, optional
- Seed for random number generation. Set to None for "randomness".
- Default: 0
-
- Returns
- -------
- stat : float or numpy.ndarray
- Average difference between `a` and `b`
- pvalue : float or numpy.ndarray
- Non-parametric p-value
-
- Notes
- -----
- The lowest p-value that can be returned by this function is equal to 1 /
- (`n_perm` + 1).
-
- Examples
- --------
- >>> from netneurotools import stats
-
- >>> np.random.seed(12345678) # set random seed for reproducible results
- >>> rvs1 = np.random.normal(loc=5, scale=10, size=500)
- >>> rvs2 = (np.random.normal(loc=5, scale=10, size=500)
- ... + np.random.normal(scale=0.2, size=500))
- >>> stats.permtest_rel(rvs1, rvs2) # doctest: +SKIP
- (-0.16506275161572695, 0.8021978021978022)
-
- >>> rvs3 = (np.random.normal(loc=8, scale=10, size=500)
- ... + np.random.normal(scale=0.2, size=500))
- >>> stats.permtest_rel(rvs1, rvs3) # doctest: +SKIP
- (2.40533726097883, 0.000999000999000999)
- """
- a, b, axis = _chk2_asarray(a, b, axis)
- rs = check_random_state(seed)
-
- if a.shape[axis] != b.shape[axis]:
- raise ValueError('Provided arrays do not have same length along axis')
-
- if a.size == 0 or b.size == 0:
- return np.nan, np.nan
-
- # calculate original difference in means
- ab = np.stack([a, b], axis=0)
- if ab.ndim < 3:
- ab = np.expand_dims(ab, axis=-1)
- true_diff = np.squeeze(np.diff(ab, axis=0)).mean(axis=axis) / 1
- abs_true = np.abs(true_diff)
-
- # idx array
- reidx = list(np.meshgrid(*[range(f) for f in ab.shape], indexing='ij'))
-
- permutations = np.ones(true_diff.shape)
- for _ in range(n_perm):
- # use this to re-index (i.e., swap along) the first axis of `ab`
- swap = rs.random_sample(ab.shape[:-1]).argsort(axis=axis)
- reidx[0] = np.repeat(swap[..., np.newaxis], ab.shape[-1], axis=-1)
- # recompute difference between `a` and `b` (i.e., first axis of `ab`)
- pdiff = np.squeeze(np.diff(ab[tuple(reidx)], axis=0)).mean(axis=axis)
- permutations += np.abs(pdiff) >= abs_true
-
- pvals = permutations / (n_perm + 1) # + 1 in denom accounts for true_diff
-
- return true_diff, pvals
-
-
-def permtest_pearsonr(a, b, axis=0, n_perm=1000, resamples=None, seed=0):
- """
- Non-parametric equivalent of :py:func:`scipy.stats.pearsonr`.
-
- Generates two-tailed p-value for hypothesis of whether samples `a` and `b`
- are correlated using permutation tests
-
- Parameters
- ----------
- a,b : (N[, M]) array_like
- Sample observations. These arrays must have the same length and either
- an equivalent number of columns or be broadcastable
- axis : int or None, optional
- Axis along which to compute test. If None, compute over whole arrays
- of `a` and `b`. Default: 0
- n_perm : int, optional
- Number of permutations to assess. Unless `a` and `b` are very small
- along `axis` this will approximate a randomization test via Monte
- Carlo simulations. Default: 1000
- resamples : (N, P) array_like, optional
- Resampling array used to shuffle `a` when generating null distribution
- of correlations. This array must have the same length as `a` and `b`
- and should have at least the same number of columns as `n_perm` (if it
- has more then only `n_perm` columns will be used. When not specified a
- standard permutation is used to shuffle `a`. Default: None
- seed : {int, np.random.RandomState instance, None}, optional
- Seed for random number generation. Set to None for "randomness".
- Default: 0
-
- Returns
- -------
- corr : float or numpyndarray
- Correlations
- pvalue : float or numpy.ndarray
- Non-parametric p-value
-
- Notes
- -----
- The lowest p-value that can be returned by this function is equal to 1 /
- (`n_perm` + 1).
-
- Examples
- --------
- >>> from netneurotools import datasets, stats
-
- >>> np.random.seed(12345678) # set random seed for reproducible results
- >>> x, y = datasets.make_correlated_xy(corr=0.1, size=100)
- >>> stats.permtest_pearsonr(x, y) # doctest: +SKIP
- (0.10032564626876286, 0.3046953046953047)
-
- >>> x, y = datasets.make_correlated_xy(corr=0.5, size=100)
- >>> stats.permtest_pearsonr(x, y) # doctest: +SKIP
- (0.500040365781984, 0.000999000999000999)
-
- Also works with multiple columns by either broadcasting the smaller array
- to the larger:
-
- >>> z = x + np.random.normal(loc=1, size=100)
- >>> stats.permtest_pearsonr(x, np.column_stack([y, z]))
- (array([0.50004037, 0.25843187]), array([0.000999 , 0.01098901]))
-
- or by using matching columns in the two arrays (e.g., `x` and `y` vs
- `a` and `b`):
-
- >>> a, b = datasets.make_correlated_xy(corr=0.9, size=100)
- >>> stats.permtest_pearsonr(np.column_stack([x, a]), np.column_stack([y, b]))
- (array([0.50004037, 0.89927523]), array([0.000999, 0.000999]))
- """ # noqa
- a, b, axis = _chk2_asarray(a, b, axis)
- rs = check_random_state(seed)
-
- if len(a) != len(b):
- raise ValueError('Provided arrays do not have same length')
-
- if a.size == 0 or b.size == 0:
- return np.nan, np.nan
-
- if resamples is not None:
- if n_perm > resamples.shape[-1]:
- raise ValueError('Number of permutations requested exceeds size '
- 'of resampling array.')
-
- # divide by one forces coercion to float if ndim = 0
- true_corr = efficient_pearsonr(a, b)[0] / 1
- abs_true = np.abs(true_corr)
-
- permutations = np.ones(true_corr.shape)
- for perm in range(n_perm):
- # permute `a` and determine whether correlations exceed original
- if resamples is None:
- ap = a[rs.permutation(len(a))]
- else:
- ap = a[resamples[:, perm]]
- permutations += np.abs(efficient_pearsonr(ap, b)[0]) >= abs_true
-
- pvals = permutations / (n_perm + 1) # + 1 in denom accounts for true_corr
-
- return true_corr, pvals
-
-
-def efficient_pearsonr(a, b, ddof=1, nan_policy='propagate'):
- """
- Compute correlation of matching columns in `a` and `b`.
-
- Parameters
- ----------
- a,b : array_like
- Sample observations. These arrays must have the same length and either
- an equivalent number of columns or be broadcastable
- ddof : int, optional
- Degrees of freedom correction in the calculation of the standard
- deviation. Default: 1
- nan_policy : bool, optional
- Defines how to handle when input contains nan. 'propagate' returns nan,
- 'raise' throws an error, 'omit' performs the calculations ignoring nan
- values. Default: 'propagate'
-
- Returns
- -------
- corr : float or numpy.ndarray
- Pearson's correlation coefficient between matching columns of inputs
- pval : float or numpy.ndarray
- Two-tailed p-values
-
- Notes
- -----
- If either input contains nan and nan_policy is set to 'omit', both arrays
- will be masked to omit the nan entries.
-
- Examples
- --------
- >>> from netneurotools import datasets, stats
-
- Generate some not-very-correlated and some highly-correlated data:
-
- >>> np.random.seed(12345678) # set random seed for reproducible results
- >>> x1, y1 = datasets.make_correlated_xy(corr=0.1, size=100)
- >>> x2, y2 = datasets.make_correlated_xy(corr=0.8, size=100)
-
- Calculate both correlations simultaneously:
-
- >>> stats.efficient_pearsonr(np.c_[x1, x2], np.c_[y1, y2])
- (array([0.10032565, 0.79961189]), array([3.20636135e-01, 1.97429944e-23]))
- """
- a, b, axis = _chk2_asarray(a, b, 0)
- if len(a) != len(b):
- raise ValueError('Provided arrays do not have same length')
-
- if a.size == 0 or b.size == 0:
- return np.nan, np.nan
-
- if nan_policy not in ('propagate', 'raise', 'omit'):
- raise ValueError(f'Value for nan_policy "{nan_policy}" not allowed')
-
- a, b = a.reshape(len(a), -1), b.reshape(len(b), -1)
- if (a.shape[1] != b.shape[1]):
- a, b = np.broadcast_arrays(a, b)
-
- mask = np.logical_or(np.isnan(a), np.isnan(b))
- if nan_policy == 'raise' and np.any(mask):
- raise ValueError('Input cannot contain NaN when nan_policy is "omit"')
- elif nan_policy == 'omit':
- # avoid making copies of the data, if possible
- a = np.ma.masked_array(a, mask, copy=False, fill_value=np.nan)
- b = np.ma.masked_array(b, mask, copy=False, fill_value=np.nan)
-
- with np.errstate(invalid='ignore'):
- corr = (sstats.zscore(a, ddof=ddof, nan_policy=nan_policy)
- * sstats.zscore(b, ddof=ddof, nan_policy=nan_policy))
-
- sumfunc, n_obs = np.sum, len(a)
- if nan_policy == 'omit':
- corr = corr.filled(np.nan)
- sumfunc = np.nansum
- n_obs = np.squeeze(np.sum(np.logical_not(np.isnan(corr)), axis=0))
-
- corr = sumfunc(corr, axis=0) / (n_obs - 1)
- corr = np.squeeze(np.clip(corr, -1, 1)) / 1
-
- # taken from scipy.stats
- ab = (n_obs / 2) - 1
- prob = 2 * special.btdtr(ab, ab, 0.5 * (1 - np.abs(corr)))
-
- return corr, prob
-
-
-def _gen_rotation(seed=None):
- """
- Generate random matrix for rotating spherical coordinates.
-
- Parameters
- ----------
- seed : {int, np.random.RandomState instance, None}, optional
- Seed for random number generation
-
- Returns
- -------
- rotate_{l,r} : (3, 3) numpy.ndarray
- Rotations for left and right hemisphere coordinates, respectively
- """
- rs = check_random_state(seed)
-
- # for reflecting across Y-Z plane
- reflect = np.array([[-1, 0, 0], [0, 1, 0], [0, 0, 1]])
-
- # generate rotation for left
- rotate_l, temp = np.linalg.qr(rs.normal(size=(3, 3)))
- rotate_l = rotate_l @ np.diag(np.sign(np.diag(temp)))
- if np.linalg.det(rotate_l) < 0:
- rotate_l[:, 0] = -rotate_l[:, 0]
-
- # reflect the left rotation across Y-Z plane
- rotate_r = reflect @ rotate_l @ reflect
-
- return rotate_l, rotate_r
-
-
-def gen_spinsamples(coords, hemiid, n_rotate=1000, check_duplicates=True,
- method='original', exact=False, seed=None, verbose=False,
- return_cost=False):
- """
- Return a resampling array for `coords` obtained from rotations / spins.
-
- Using the method initially proposed in [ST1]_ (and later modified + updated
- based on findings in [ST2]_ and [ST3]_), this function applies random
- rotations to the user-supplied `coords` in order to generate a resampling
- array that preserves its spatial embedding. Rotations are generated for one
- hemisphere and mirrored for the other (see `hemiid` for more information).
-
- Due to irregular sampling of `coords` and the randomness of the rotations
- it is possible that some "rotations" may resample with replacement (i.e.,
- will not be a true permutation). The likelihood of this can be reduced by
- either increasing the sampling density of `coords` or changing the
- ``method`` parameter (see Notes for more information on the latter).
-
- Parameters
- ----------
- coords : (N, 3) array_like
- X, Y, Z coordinates of `N` nodes/parcels/regions/vertices defined on a
- sphere
- hemiid : (N,) array_like
- Array denoting hemisphere designation of coordinates in `coords`, where
- values should be {0, 1} denoting the different hemispheres. Rotations
- are generated for one hemisphere and mirrored across the y-axis for the
- other hemisphere.
- n_rotate : int, optional
- Number of rotations to generate. Default: 1000
- check_duplicates : bool, optional
- Whether to check for and attempt to avoid duplicate resamplings. A
- warnings will be raised if duplicates cannot be avoided. Setting to
- True may increase the runtime of this function! Default: True
- method : {'original', 'vasa', 'hungarian'}, optional
- Method by which to match non- and rotated coordinates. Specifying
- 'original' will use the method described in [ST1]_. Specfying 'vasa'
- will use the method described in [ST4]_. Specfying 'hungarian' will use
- the Hungarian algorithm to minimize the global cost of reassignment
- (will dramatically increase runtime). Default: 'original'
- seed : {int, np.random.RandomState instance, None}, optional
- Seed for random number generation. Default: None
- verbose : bool, optional
- Whether to print occasional status messages. Default: False
- return_cost : bool, optional
- Whether to return cost array (specified as Euclidean distance) for each
- coordinate for each rotation Default: True
-
- Returns
- -------
- spinsamples : (N, `n_rotate`) numpy.ndarray
- Resampling matrix to use in permuting data based on supplied `coords`.
- cost : (N, `n_rotate`,) numpy.ndarray
- Cost (specified as Euclidean distance) of re-assigning each coordinate
- for every rotation in `spinsamples`. Only provided if `return_cost` is
- True.
-
- Notes
- -----
- By default, this function uses the minimum Euclidean distance between the
- original coordinates and the new, rotated coordinates to generate a
- resampling array after each spin. Unfortunately, this can (with some
- frequency) lead to multiple coordinates being re-assigned the same value:
-
- >>> from netneurotools import stats as nnstats
- >>> coords = [[0, 0, 1], [1, 0, 0], [0, 0, 1], [1, 0, 0]]
- >>> hemi = [0, 0, 1, 1]
- >>> nnstats.gen_spinsamples(coords, hemi, n_rotate=1, seed=1,
- ... method='original', check_duplicates=False)
- array([[0],
- [0],
- [2],
- [3]])
-
- While this is reasonable in most circumstances, if you feel incredibly
- strongly about having a perfect "permutation" (i.e., all indices appear
- once and exactly once in the resampling), you can set the ``method``
- parameter to either 'vasa' or 'hungarian':
-
- >>> nnstats.gen_spinsamples(coords, hemi, n_rotate=1, seed=1,
- ... method='vasa', check_duplicates=False)
- array([[1],
- [0],
- [2],
- [3]])
- >>> nnstats.gen_spinsamples(coords, hemi, n_rotate=1, seed=1,
- ... method='hungarian', check_duplicates=False)
- array([[0],
- [1],
- [2],
- [3]])
-
- Note that setting this parameter may increase the runtime of the function
- (especially for `method='hungarian'`). Refer to [ST1]_ for information on
- why the default (i.e., ``exact`` set to False) suffices in most cases.
-
- For the original MATLAB implementation of this function refer to [ST5]_.
-
- References
- ----------
- .. [ST1] Alexander-Bloch, A., Shou, H., Liu, S., Satterthwaite, T. D.,
- Glahn, D. C., Shinohara, R. T., Vandekar, S. N., & Raznahan, A. (2018).
- On testing for spatial correspondence between maps of human brain
- structure and function. NeuroImage, 178, 540-51.
-
- .. [ST2] Blaser, R., & Fryzlewicz, P. (2016). Random Rotation Ensembles.
- Journal of Machine Learning Research, 17(4), 1–26.
-
- .. [ST3] Lefèvre, J., Pepe, A., Muscato, J., De Guio, F., Girard, N.,
- Auzias, G., & Germanaud, D. (2018). SPANOL (SPectral ANalysis of Lobes):
- A Spectral Clustering Framework for Individual and Group Parcellation of
- Cortical Surfaces in Lobes. Frontiers in Neuroscience, 12, 354.
-
- .. [ST4] Váša, F., Seidlitz, J., Romero-Garcia, R., Whitaker, K. J.,
- Rosenthal, G., Vértes, P. E., ... & Jones, P. B. (2018). Adolescent
- tuning of association cortex in human structural brain networks.
- Cerebral Cortex, 28(1), 281-294.
-
- .. [ST5] https://github.com/spin-test/spin-test
- """
- methods = ['original', 'vasa', 'hungarian']
- if method not in methods:
- raise ValueError('Provided method "{}" invalid. Must be one of {}.'
- .format(method, methods))
-
- if exact:
- warnings.warn('The `exact` parameter will no longer be supported in '
- 'an upcoming release. Please use the `method` parameter '
- 'instead.', DeprecationWarning, stacklevel=3)
- if exact == 'vasa' and method == 'original':
- method = 'vasa'
- elif exact and method == 'original':
- method = 'hungarian'
-
- seed = check_random_state(seed)
-
- coords = np.asanyarray(coords)
- hemiid = np.squeeze(np.asanyarray(hemiid, dtype='int8'))
-
- # check supplied coordinate shape
- if coords.shape[-1] != 3 or coords.squeeze().ndim != 2:
- raise ValueError('Provided `coords` must be of shape (N, 3), not {}'
- .format(coords.shape))
-
- # ensure hemisphere designation array is correct
- if hemiid.ndim != 1:
- raise ValueError('Provided `hemiid` array must be one-dimensional.')
- if len(coords) != len(hemiid):
- raise ValueError('Provided `coords` and `hemiid` must have the same '
- 'length. Provided lengths: coords = {}, hemiid = {}'
- .format(len(coords), len(hemiid)))
- if np.max(hemiid) > 1 or np.min(hemiid) < 0:
- raise ValueError('Hemiid must have values in {0, 1} denoting left and '
- 'right hemisphere coordinates, respectively. '
- + 'Provided array contains values: {}'
- .format(np.unique(hemiid)))
-
- # empty array to store resampling indices
- spinsamples = np.zeros((len(coords), n_rotate), dtype=int)
- cost = np.zeros((len(coords), n_rotate))
- inds = np.arange(len(coords), dtype=int)
-
- # generate rotations and resampling array!
- msg, warned = '', False
- for n in range(n_rotate):
- count, duplicated = 0, True
-
- if verbose:
- msg = 'Generating spin {:>5} of {:>5}'.format(n, n_rotate)
- print(msg, end='\r', flush=True)
-
- while duplicated and count < 500:
- count, duplicated = count + 1, False
- resampled = np.zeros(len(coords), dtype='int32')
-
- # rotate each hemisphere separately
- for h, rot in enumerate(_gen_rotation(seed=seed)):
- hinds = (hemiid == h)
- coor = coords[hinds]
- if len(coor) == 0:
- continue
-
- # if we need an "exact" mapping (i.e., each node needs to be
- # assigned EXACTLY once) then we have to calculate the full
- # distance matrix which is a nightmare with respect to memory
- # for anything that isn't parcellated data.
- # that is, don't do this with vertex coordinates!
- if method == 'vasa':
- dist = spatial.distance_matrix(coor, coor @ rot)
- # min of max a la Vasa et al., 2018
- col = np.zeros(len(coor), dtype='int32')
- for _ in range(len(dist)):
- # find parcel whose closest neighbor is farthest away
- # overall; assign to that
- row = dist.min(axis=1).argmax()
- col[row] = dist[row].argmin()
- cost[inds[hinds][row], n] = dist[row, col[row]]
- # set to -inf and inf so they can't be assigned again
- dist[row] = -np.inf
- dist[:, col[row]] = np.inf
- # optimization of total cost using Hungarian algorithm. this
- # may result in certain parcels having higher cost than with
- # `method='vasa'` but should always result in the total cost
- # being lower #tradeoffs
- elif method == 'hungarian':
- dist = spatial.distance_matrix(coor, coor @ rot)
- row, col = optimize.linear_sum_assignment(dist)
- cost[hinds, n] = dist[row, col]
- # if nodes can be assigned multiple targets, we can simply use
- # the absolute minimum of the distances (no optimization
- # required) which is _much_ lighter on memory
- # huge thanks to https://stackoverflow.com/a/47779290 for this
- # memory-efficient method
- elif method == 'original':
- dist, col = spatial.cKDTree(coor @ rot).query(coor, 1)
- cost[hinds, n] = dist
-
- resampled[hinds] = inds[hinds][col]
-
- # if we want to check for duplicates ensure that we don't have any
- if check_duplicates:
- if np.any(np.all(resampled[:, None] == spinsamples[:, :n], 0)):
- duplicated = True
- # if our "spin" is identical to the input then that's no good
- elif np.all(resampled == inds):
- duplicated = True
-
- # if we broke out because we tried 500 rotations and couldn't generate
- # a new one, warn that we're using duplicate rotations and give up.
- # this should only be triggered if check_duplicates is set to True
- if count == 500 and not warned:
- warnings.warn(
- 'Duplicate rotations used. Check resampling array '
- 'to determine real number of unique permutations.', stacklevel=2)
- warned = True
-
- spinsamples[:, n] = resampled
-
- if verbose:
- print(' ' * len(msg) + '\b' * len(msg), end='', flush=True)
-
- if return_cost:
- return spinsamples, cost
-
- return spinsamples
-
-
-def get_dominance_stats(X, y, use_adjusted_r_sq=True, verbose=False, n_jobs=1):
- """
- Return the dominance analysis statistics for multilinear regression.
-
- This is a rewritten & simplified version of [DA1]_. It is briefly
- tested against the original package, but still in early stages.
- Please feel free to report any bugs.
-
- Warning: Still work-in-progress. Parameters might change!
-
- Parameters
- ----------
- X : (N, M) array_like
- Input data
- y : (N,) array_like
- Target values
- use_adjusted_r_sq : bool, optional
- Whether to use adjusted r squares. Default: True
- verbose : bool, optional
- Whether to print debug messages. Default: False
- n_jobs : int, optional
- The number of jobs to run in parallel. Default: 1
-
- Returns
- -------
- model_metrics : dict
- The dominance metrics, currently containing `individual_dominance`,
- `partial_dominance`, `total_dominance`, and `full_r_sq`.
- model_r_sq : dict
- Contains all model r squares
-
- Notes
- -----
- Example usage
-
- .. code:: python
-
- from netneurotools.stats import get_dominance_stats
- from sklearn.datasets import load_boston
- X, y = load_boston(return_X_y=True)
- model_metrics, model_r_sq = get_dominance_stats(X, y)
-
- To compare with [DA1]_, use `use_adjusted_r_sq=False`
-
- .. code:: python
-
- from dominance_analysis import Dominance_Datasets
- from dominance_analysis import Dominance
- boston_dataset=Dominance_Datasets.get_boston()
- dominance_regression=Dominance(data=boston_dataset,
- target='House_Price',objective=1)
- incr_variable_rsquare=dominance_regression.incremental_rsquare()
- dominance_regression.dominance_stats()
-
- References
- ----------
- .. [DA1] https://github.com/dominance-analysis/dominance-analysis
-
- """
- # this helps to remove one element from a tuple
- def remove_ret(tpl, elem):
- lst = list(tpl)
- lst.remove(elem)
- return tuple(lst)
-
- # sklearn linear regression wrapper
- def get_reg_r_sq(X, y, use_adjusted_r_sq=True):
- lin_reg = LinearRegression()
- lin_reg.fit(X, y)
- yhat = lin_reg.predict(X)
- SS_Residual = sum((y - yhat) ** 2)
- SS_Total = sum((y - np.mean(y)) ** 2)
- r_squared = 1 - (float(SS_Residual)) / SS_Total
- adjusted_r_squared = 1 - (1 - r_squared) * \
- (len(y) - 1) / (len(y) - X.shape[1] - 1)
- if use_adjusted_r_sq:
- return adjusted_r_squared
- else:
- return r_squared
-
- # helper function to compute r_sq for a given idx_tuple
- def compute_r_sq(idx_tuple):
- return idx_tuple, get_reg_r_sq(X[:, idx_tuple],
- y,
- use_adjusted_r_sq=use_adjusted_r_sq)
-
- # generate all predictor combinations in list (num of predictors) of lists
- n_predictor = X.shape[-1]
- # n_comb_len_group = n_predictor - 1
- predictor_combs = [list(combinations(range(n_predictor), i))
- for i in range(1, n_predictor + 1)]
- if verbose:
- print(f"[Dominance analysis] Generated \
- {len([v for i in predictor_combs for v in i])} combinations")
-
- model_r_sq = dict()
- results = Parallel(n_jobs=n_jobs)(
- delayed(compute_r_sq)(idx_tuple)
- for len_group in tqdm(predictor_combs,
- desc='num-of-predictor loop',
- disable=not verbose)
- for idx_tuple in tqdm(len_group,
- desc='insider loop',
- disable=not verbose))
-
- # extract r_sq from results
- for idx_tuple, r_sq in results:
- model_r_sq[idx_tuple] = r_sq
-
- if verbose:
- print(f"[Dominance analysis] Acquired {len(model_r_sq)} r^2's")
-
- # getting all model metrics
- model_metrics = dict([])
-
- # individual dominance
- individual_dominance = []
- for i_pred in range(n_predictor):
- individual_dominance.append(model_r_sq[(i_pred,)])
- individual_dominance = np.array(individual_dominance).reshape(1, -1)
- model_metrics["individual_dominance"] = individual_dominance
-
- # partial dominance
- partial_dominance = [[] for _ in range(n_predictor - 1)]
- for i_len in range(n_predictor - 1):
- i_len_combs = list(combinations(range(n_predictor), i_len + 2))
- for j_node in range(n_predictor):
- j_node_sel = [v for v in i_len_combs if j_node in v]
- reduced_list = [remove_ret(comb, j_node) for comb in j_node_sel]
- diff_values = [
- model_r_sq[j_node_sel[i]] - model_r_sq[reduced_list[i]]
- for i in range(len(reduced_list))]
- partial_dominance[i_len].append(np.mean(diff_values))
-
- # save partial dominance
- partial_dominance = np.array(partial_dominance)
- model_metrics["partial_dominance"] = partial_dominance
- # get total dominance
- total_dominance = np.mean(
- np.r_[individual_dominance, partial_dominance], axis=0)
- # test and save total dominance
- assert np.allclose(total_dominance.sum(),
- model_r_sq[tuple(range(n_predictor))]), \
- "Sum of total dominance is not equal to full r square!"
- model_metrics["total_dominance"] = total_dominance
- # save full r^2
- model_metrics["full_r_sq"] = model_r_sq[tuple(range(n_predictor))]
-
- return model_metrics, model_r_sq
-
-
-def network_pearsonr(annot1, annot2, weight):
- r"""
- Calculate pearson correlation between two annotation vectors.
-
- .. warning::
- Test before use.
-
- Parameters
- ----------
- annot1 : (N,) array_like
- First annotation vector, demean will be applied.
- annot2 : (N,) array_like
- Second annotation vector, demean will be applied.
- weight : (N, N) array_like
- Weight matrix. Diagonal elements should be 1.
-
- Returns
- -------
- corr : float
- Network correlation between `annot1` and `annot2`
-
- Notes
- -----
- If Pearson correlation is represented as
-
- .. math::
- \rho_{x,y} = \dfrac{
- \mathrm{sum}(I \times (\hat{x} \otimes \hat{y}))
- }{
- \sigma_x \sigma_y
- }
-
- The network correlation is defined analogously as
-
- .. math::
- \rho_{x,y,G} = \dfrac{
- \mathrm{sum}(W \times (\hat{x} \otimes \hat{y}))
- }{
- \sigma_{x,W} \sigma_{y,W}
- }
-
- where :math:`\hat{x}` and :math:`\hat{y}` are the demeaned annotation vectors,
-
- The weight matrix :math:`W` is used to represent the network structure.
- It is usually in the form of :math:`W = \\exp(-kL)` where :math:`L` is the
- length matrix and :math:`k` is a decay parameter.
-
- Example using shortest path length as weight
-
- .. code:: python
-
- spl, _ = distance_wei_floyd(D) # input should be distance matrix
- spl_wei = 1 / np.exp(spl)
- netcorr = network_pearsonr(annot1, annot2, spl_wei)
-
- Example using (inverse) effective resistance as weight
-
- .. code:: python
-
- R_eff = effective_resistance(W)
- R_eff_norm = R_eff / np.max(R_eff)
- W = 1 / R_eff_norm
- W = W / np.max(W)
- np.fill_diagonal(W, 1.0)
- netcorr = network_pearsonr(annot1, annot2, W)
-
- References
- ----------
- .. [1] Coscia, M. (2021). Pearson correlations on complex networks.
- Journal of Complex Networks, 9(6), cnab036.
- https://doi.org/10.1093/comnet/cnab036
-
-
- See Also
- --------
- netneurotools.stats.network_pearsonr_pairwise
- """
- annot1 = annot1 - np.mean(annot1)
- annot2 = annot2 - np.mean(annot2)
- upper = np.sum(np.multiply(weight, np.outer(annot1, annot2)))
- lower1 = np.sum(np.multiply(weight, np.outer(annot1, annot1)))
- lower2 = np.sum(np.multiply(weight, np.outer(annot2, annot2)))
- return upper / np.sqrt(lower1) / np.sqrt(lower2)
-
-
-def network_pearsonr_numba(annot1, annot2, weight):
- """
- Numba version of :meth:`netneurotools.stats.network_pearsonr`.
-
- .. warning::
- Test before use.
-
- Parameters
- ----------
- annot1 : (N,) array_like
- First annotation vector, demean will be applied.
- annot2 : (N,) array_like
- Second annotation vector, demean will be applied.
- weight : (N, N) array_like
- Weight matrix. Diagonal elements should be 1.
-
- Returns
- -------
- corr : float
- Network correlation between `annot1` and `annot2`
- """
- n = annot1.shape[0]
- annot1 = annot1 - np.mean(annot1)
- annot2 = annot2 - np.mean(annot2)
- upper, lower1, lower2 = 0.0, 0.0, 0.0
- for i in range(n):
- for j in range(n):
- upper += annot1[i] * annot2[j] * weight[i, j]
- lower1 += annot1[i] * annot1[j] * weight[i, j]
- lower2 += annot2[i] * annot2[j] * weight[i, j]
- return upper / np.sqrt(lower1) / np.sqrt(lower2)
-
-
-if use_numba:
- network_pearsonr_numba = njit(network_pearsonr_numba)
-
-
-def _cross_outer(annot_mat):
- """
- Calculate cross outer product of input matrix.
-
- This functions is only used in `network_pearsonr_pairwise`.
-
- Parameters
- ----------
- annot_mat : (N, D) array_like
- Input matrix
-
- Returns
- -------
- cross_outer : (N, N, D, D) numpy.ndarray
- Cross outer product of `annot_mat`
- """
- n_samp, n_feat = annot_mat.shape
- cross_outer = np.empty((n_samp, n_samp, n_feat, n_feat), annot_mat.dtype)
- for a in range(n_samp):
- for b in range(n_samp):
- for c in range(n_feat):
- for d in range(n_feat):
- cross_outer[a, b, c, d] = annot_mat[a, c] * annot_mat[b, d]
- return cross_outer
-
-
-if use_numba:
- # ("float64[:,:,:,::1](float64[:,::1])")
- _cross_outer = njit(_cross_outer)
-
-
-def _multiply_sum(cross_outer, weight):
- """
- Multiply and sum cross outer product.
-
- This functions is only used in `network_pearsonr_pairwise`.
-
- Parameters
- ----------
- cross_outer : (N, N, D, D) array_like
- Cross outer product of `annot_mat`
- weight : (D, D) array_like
- Weight matrix
-
- Returns
- -------
- cross_outer_after : (N, N) numpy.ndarray
- Result of multiplying and summing `cross_outer`
- """
- n_samp, _, n_dim, _ = cross_outer.shape
- cross_outer_after = np.empty((n_samp, n_samp), cross_outer.dtype)
- for i in range(n_samp):
- for j in range(n_samp):
- curr_sum = 0.0
- for k in range(n_dim):
- for l in range(n_dim): # noqa: E741
- curr_sum += weight[k, l] * cross_outer[i, j, k, l]
- cross_outer_after[i, j] = curr_sum
- return cross_outer_after
-
-
-if use_numba:
- # ("float64[:,::1](float64[:,:,:,::1],float64[:,::1])")
- _multiply_sum = njit(_multiply_sum)
-
-
-def network_pearsonr_pairwise(annot_mat, weight):
- """
- Calculate pairwise network correlation between rows of `annot_mat`.
-
- .. warning::
- Test before use.
-
- Parameters
- ----------
- annot_mat : (N, D) array_like
- Input matrix
- weight : (D, D) array_like
- Weight matrix. Diagonal elements should be 1.
-
- Returns
- -------
- corr_mat : (N, N) numpy.ndarray
- Pairwise network correlation matrix
-
- Notes
- -----
- This is a faster version of :meth:`netneurotools.stats.network_pearsonr`
- for calculating pairwise network correlation between rows of `annot_mat`.
- Check :meth:`netneurotools.stats.network_pearsonr` for details.
-
- See Also
- --------
- netneurotools.stats.network_pearsonr
- """
- annot_mat_demean = annot_mat - np.mean(annot_mat, axis=1, keepdims=True)
- if use_numba:
- cross_outer = _cross_outer(annot_mat_demean)
- cross_outer_after = _multiply_sum(cross_outer, weight)
- else:
- # https://stackoverflow.com/questions/24839481/python-matrix-outer-product
- cross_outer = np.einsum('ac,bd->abcd', annot_mat_demean, annot_mat_demean)
- cross_outer_after = np.sum(np.multiply(cross_outer, weight), axis=(2, 3))
- # translating the two lines below in numba does not speed up much
- lower = np.sqrt(np.diagonal(cross_outer_after))
- return cross_outer_after / np.einsum('i,j', lower, lower)
-
-
-def _onehot_quadratic_form_broadcast(Q_star):
- """
- Calculate one-hot quadratic form of input matrix.
-
- This functions is only used in `effective_resistance`.
-
- Parameters
- ----------
- Q_star : (N, N) array_like
- Input matrix
-
- Returns
- -------
- R_eff : (N, N) numpy.ndarray
- One-hot quadratic form of `Q_star`
- """
- n = Q_star.shape[0]
- R_eff = np.empty((n, n), Q_star.dtype)
- for i in range(n):
- for j in range(n):
- R_eff[i, j] = Q_star[i, i] - Q_star[j, i] - Q_star[i, j] + Q_star[j, j]
- return R_eff
-
-
-if use_numba:
- # ("float64[:,::1](float64[:,::1])")
- _onehot_quadratic_form_broadcast = njit(_onehot_quadratic_form_broadcast)
-
-
-def effective_resistance(W, directed=True):
- """
- Calculate effective resistance matrix.
-
- The effective resistance between two nodes in a graph, often used in the context
- of electrical networks, is a measure that stems from the inverse of the Laplacian
- matrix of the graph.
-
- .. warning::
- Test before use.
-
- Parameters
- ----------
- W : (N, N) array_like
- Weight matrix.
- directed : bool, optional
- Whether the graph is directed. This is used to determine whether to turn on
- the :code:`hermitian=True` option in :func:`numpy.linalg.pinv`. When you are
- using a symmetric weight matrix (while real-valued implying hermitian), you
- can set this to False for better performance. Default: True
-
- Returns
- -------
- R_eff : (N, N) numpy.ndarray
- Effective resistance matrix
-
- Notes
- -----
- The effective resistance between two nodes :math:`i` and :math:`j` is defined as
-
- .. math::
- R_{ij} = (e_i - e_j)^T Q^* (e_i - e_j)
-
- where :math:`Q^*` is the Moore-Penrose pseudoinverse of the Laplacian matrix
- :math:`L` of the graph, and :math:`e_i` is the :math:`i`-th standard basis vector.
-
- References
- ----------
- .. [1] Ellens, W., Spieksma, F. M., Van Mieghem, P., Jamakovic, A., & Kooij,
- R. E. (2011). Effective graph resistance. Linear Algebra and Its Applications,
- 435(10), 2491–2506. https://doi.org/10.1016/j.laa.2011.02.024
-
- See Also
- --------
- netneurotools.stats.network_polarisation
- """
- L = _graph_laplacian(W)
- Q_star = np.linalg.pinv(L, hermitian=not directed)
- if use_numba:
- R_eff = _onehot_quadratic_form_broadcast(Q_star)
- else:
- Q_star_diag = np.diag(Q_star)
- R_eff = \
- Q_star_diag[:, np.newaxis] \
- - Q_star \
- - Q_star.T \
- + Q_star_diag[np.newaxis, :]
- return R_eff
-
-
-def _polariz_diff(vec):
- """
- Calculate difference between positive and negative parts of a vector.
-
- This functions is only used in `network_polarisation`.
-
- Parameters
- ----------
- vec : (N,) array_like
- Input vector. Must have both positive and negative values.
-
- Returns
- -------
- vec_diff : (N,) numpy.ndarray
- Difference between positive and negative parts of `vec`
- """
- #
- vec_pos = np.maximum(vec, 0.0)
- vec_pos /= np.max(vec_pos)
- #
- vec_neg = np.minimum(vec, 0.0)
- vec_neg = np.abs(vec_neg)
- vec_neg /= np.max(vec_neg)
- return (vec_pos - vec_neg)
-
-
-if use_numba:
- _polariz_diff = njit(_polariz_diff)
-
-
-def _quadratic_form(W, vec_left, vec_right, squared=False):
- """
- Calculate quadratic form :math:`v_{left}^T W v_{right}`.
-
- Parameters
- ----------
- W : (N, N) array_like
- Input matrix.
- vec_left : (N,) array_like
- Left weight vector.
- vec_right : (N,) array_like
- Right weight vector.
- squared : bool, optional
- Whether to square the input weight matrix. Default: False
-
- Returns
- -------
- quadratic_form : float
- Quadratic form from `W`, `vec_left`, and `vec_right`
- """
- # [numpy]
-
- # (vec_left.T @ W @ vec_right)[0, 0]
- # [numba]
- # vec = np.ascontiguousarray(vec[np.newaxis, :])
- n = W.shape[0]
- ret = 0.0
- for i in range(n):
- for j in range(n):
- if squared:
- ret += vec_left[i] * vec_right[j] * W[i, j]**2
- else:
- ret += vec_left[i] * vec_right[j] * W[i, j]
- return ret
-
-
-if use_numba:
- _quadratic_form = njit(_quadratic_form)
-
-
-def network_polarisation(vec, W, directed=True):
- r"""
- Calculate polarisation of a vector on a graph.
-
- Network polarisation is a measure of polizzartion taken into account all the
- three factors below [1]_:
-
- - how extreme the opinions of the people are
- - how much they organize into echo chambers, and
- - how these echo chambers organize in the network
-
- .. warning::
- Test before use.
-
- Parameters
- ----------
- vec : (N,) array_like
- Polarization vector. Must have both positive and negative values. Will be
- normalized between -1 and 1 internally.
- W : (N, N) array_like
- Weight matrix.
- directed : bool, optional
- Whether the graph is directed. This is used to determine whether to turn on
- the :code:`hermitian=True` option in :func:`numpy.linalg.pinv`. When you are
- using a symmetric weight matrix (while real-valued implying hermitian), you
- can set this to False for better performance. Default: True
-
- Returns
- -------
- polariz : float
- Polarization of `vec` on `W`
-
- Notes
- -----
- The measure is based on the genralized Eucledian distance, defined as
-
- .. math::
- \delta_{G, o} = \sqrt{(o^+ - o^-)^T Q^* (o^+ - o^-)}
-
- where :math:`o^+` and :math:`o^-` are the positive and negative parts of the
- polarization vector, and :math:`Q^*` is the Moore-Penrose pseudoinverse
- of the Laplacian matrix :math:`L` of the graph. Check :func:`effective_resistance`
- for similarity.
-
- References
- ----------
- .. [1] Hohmann, M., Devriendt, K., & Coscia, M. (2023). Quantifying ideological
- polarization on a network using generalized Euclidean distance. Science Advances,
- 9(9), eabq2044. https://doi.org/10.1126/sciadv.abq2044
-
- See Also
- --------
- netneurotools.stats.effective_resistance
- """
- L = _graph_laplacian(W)
- Q_star = np.linalg.pinv(L, hermitian=not directed)
- diff = _polariz_diff(vec)
- if use_numba:
- polariz_sq = _quadratic_form(Q_star, diff, diff, squared=False)
- else:
- polariz_sq = (diff.T @ Q_star @ diff)
- return np.sqrt(polariz_sq)
-
-
-def network_variance(vec, D):
- r"""
- Calculate variance of a vector on a graph.
-
- Network variance is a measure of variance taken into account the network
- structure.
-
- .. warning::
- Test before use.
-
- Parameters
- ----------
- vec : (N,) array_like
- Input vector. Must be all positive.
- Will be normalized internally as a probability distribution.
- D : (N, N) array_like
- Distance matrix.
-
- Returns
- -------
- network_variance : float
- Network variance of `vec` on `D`
-
- Notes
- -----
- The network variance is defined as
-
- .. math::
- var(p) = \frac{1}{2} \sum_{i, j} p(i) p(j) d^2(i,j)
-
- where :math:`p` is the probability distribution of `vec`, and :math:`d(i,j)`
- is the distance between node :math:`i` and :math:`j`.
-
- The distance matrix :math:`D` can make use of effective resistance or its
- square root.
-
- Example using effective resistance as weight matrix
-
- .. code:: python
-
- R_eff = effective_resistance(W)
- netvar = network_variance(vec, R_eff)
-
- References
- ----------
- .. [1] Devriendt, K., Martin-Gutierrez, S., & Lambiotte, R. (2022).
- Variance and covariance of distributions on graphs. SIAM Review, 64(2),
- 343–359. https://doi.org/10.1137/20M1361328
-
- See Also
- --------
- netneurotools.stats.network_covariance
- """
- p = vec / np.sum(vec)
- return 0.5 * (p.T @ np.multiply(D, D) @ p)
-
-
-def network_variance_numba(vec, D):
- """
- Numba version of :meth:`netneurotools.stats.network_variance`.
-
- Network variance is a measure of variance taken into account the network
- structure.
-
- .. warning::
- Test before use.
-
- Parameters
- ----------
- vec : (N,) array_like
- Input vector. Must be all positive.
- Will be normalized internally as a probability distribution.
- D : (N, N) array_like
- Distance matrix.
-
- Returns
- -------
- network_variance : float
- Network variance of `vec` on `D`
- """
- p = vec / np.sum(vec)
- return 0.5 * _quadratic_form(D, p, p, squared=True)
-
-
-if use_numba:
- network_variance_numba = njit(network_variance_numba)
-
-
-def network_covariance(joint_pmat, D, calc_marginal=True):
- r"""
- Calculate covariance of a joint probability matrix on a graph.
-
- .. warning::
- Test before use.
-
- Parameters
- ----------
- joint_pmat : (N, N) array_like
- Joint probability matrix. Please make sure that it is valid.
- D : (N, N) array_like
- Distance matrix.
- calc_marginal : bool, optional
- Whether to calculate marginal variance. It will be marginally faster if
- :code:`calc_marginal=False` (returning marginal variances as 0). Default: True
-
- Returns
- -------
- network_covariance : float
- Covariance of `joint_pmat` on `D`
- var_p : float
- Marginal variance of `joint_pmat` on `D`.
- Will be 0 if :code:`calc_marginal=False`
- var_q : float
- Marginal variance of `joint_pmat` on `D`.
- Will be 0 if :code:`calc_marginal=False`
-
- Notes
- -----
- The network variance is defined as
-
- .. math::
- cov(P) = \frac{1}{2} \sum_{i, j} [p(i) q(j) - P(i,j)] d^2(i,j)
-
- where :math:`P` is the joint probability matrix, :math:`p` and :math:`q`
- are the marginal probability distributions of `joint_pmat`, and :math:`d(i,j)`
- is the distance between node :math:`i` and :math:`j`.
-
- Check :func:`network_variance` for usage.
-
- References
- ----------
- .. [1] Devriendt, K., Martin-Gutierrez, S., & Lambiotte, R. (2022).
- Variance and covariance of distributions on graphs. SIAM Review, 64(2),
- 343–359. https://doi.org/10.1137/20M1361328
-
- See Also
- --------
- netneurotools.stats.network_variance
- """
- p = np.sum(joint_pmat, axis=1)
- q = np.sum(joint_pmat, axis=0)
- D_sq = np.multiply(D, D)
- cov = p.T @ D_sq @ q - np.sum(np.multiply(joint_pmat, D_sq))
- if calc_marginal:
- var_p = p.T @ D_sq @ p
- var_q = q.T @ D_sq @ q
- else:
- var_p, var_q = 0, 0
- return 0.5 * cov, 0.5 * var_p, 0.5 * var_q
-
-
-def network_covariance_numba(joint_pmat, D, calc_marginal=True):
- """
- Numba version of :meth:`netneurotools.stats.network_covariance`.
-
- .. warning::
- Test before use.
-
- Parameters
- ----------
- joint_pmat : (N, N) array_like
- Joint probability matrix. Please make sure that it is valid.
- D : (N, N) array_like
- Distance matrix.
- calc_marginal : bool, optional
- Whether to calculate marginal variance. It will be marginally faster if
- :code:`calc_marginal=False` (returning marginal variances as 0). Default: True
-
- Returns
- -------
- network_covariance : float
- Covariance of `joint_pmat` on `D`
- var_p : float
- Marginal variance of `joint_pmat` on `D`.
- Will be 0 if :code:`calc_marginal=False`
- var_q : float
- Marginal variance of `joint_pmat` on `D`.
- Will be 0 if :code:`calc_marginal=False`
- """
- n = joint_pmat.shape[0]
- p = np.sum(joint_pmat, axis=1)
- q = np.sum(joint_pmat, axis=0)
- cov = 0.0
- var_p, var_q = 0.0, 0.0
- for i in range(n):
- for j in range(n):
- cov += (p[i] * q[j] - joint_pmat[i, j]) * D[i, j]**2
- if calc_marginal:
- var_p += p[i] * p[j] * D[i, j]**2
- var_q += q[i] * q[j] * D[i, j]**2
- return 0.5 * cov, 0.5 * var_p, 0.5 * var_q
-
-
-if use_numba:
- network_covariance_numba = njit(network_covariance_numba)
diff --git a/netneurotools/stats/__init__.py b/netneurotools/stats/__init__.py
new file mode 100644
index 0000000..bc35e4d
--- /dev/null
+++ b/netneurotools/stats/__init__.py
@@ -0,0 +1,36 @@
+"""Functions for performing statistical operations."""
+
+
+from .correlation import (
+ efficient_pearsonr,
+ weighted_pearsonr,
+ make_correlated_xy
+)
+
+
+from .permutation_test import (
+ permtest_1samp,
+ permtest_rel,
+ permtest_pearsonr
+)
+
+
+from .regression import (
+ _add_constant,
+ residualize,
+ get_dominance_stats
+)
+
+
+# from .stats_utils import ()
+
+
+__all__ = [
+ # correlation
+ 'efficient_pearsonr', 'weighted_pearsonr', 'make_correlated_xy',
+ # permutation_test
+ 'permtest_1samp', 'permtest_rel', 'permtest_pearsonr',
+ # regression
+ '_add_constant', 'residualize', 'get_dominance_stats',
+ # stats_utils
+]
diff --git a/netneurotools/stats/correlation.py b/netneurotools/stats/correlation.py
new file mode 100644
index 0000000..38489fd
--- /dev/null
+++ b/netneurotools/stats/correlation.py
@@ -0,0 +1,189 @@
+"""Functions for calculating correlation."""
+
+import numpy as np
+import scipy.stats as sstats
+import scipy.special as sspecial
+from sklearn.utils.validation import check_random_state
+
+try: # scipy >= 1.8.0
+ from scipy.stats._stats_py import _chk2_asarray
+except ImportError: # scipy < 1.8.0
+ from scipy.stats.stats import _chk2_asarray
+
+
+def efficient_pearsonr(a, b, ddof=1, nan_policy='propagate'):
+ """
+ Compute correlation of matching columns in `a` and `b`.
+
+ Parameters
+ ----------
+ a,b : array_like
+ Sample observations. These arrays must have the same length and either
+ an equivalent number of columns or be broadcastable
+ ddof : int, optional
+ Degrees of freedom correction in the calculation of the standard
+ deviation. Default: 1
+ nan_policy : bool, optional
+ Defines how to handle when input contains nan. 'propagate' returns nan,
+ 'raise' throws an error, 'omit' performs the calculations ignoring nan
+ values. Default: 'propagate'
+
+ Returns
+ -------
+ corr : float or numpy.ndarray
+ Pearson's correlation coefficient between matching columns of inputs
+ pval : float or numpy.ndarray
+ Two-tailed p-values
+
+ Notes
+ -----
+ If either input contains nan and nan_policy is set to 'omit', both arrays
+ will be masked to omit the nan entries.
+
+ Examples
+ --------
+ >>> from netneurotools import stats
+
+ Generate some not-very-correlated and some highly-correlated data:
+
+ >>> np.random.seed(12345678) # set random seed for reproducible results
+ >>> x1, y1 = stats.make_correlated_xy(corr=0.1, size=100)
+ >>> x2, y2 = stats.make_correlated_xy(corr=0.8, size=100)
+
+ Calculate both correlations simultaneously:
+
+ >>> stats.efficient_pearsonr(np.c_[x1, x2], np.c_[y1, y2])
+ (array([0.10032565, 0.79961189]), array([3.20636135e-01, 1.97429944e-23]))
+ """
+ a, b, _ = _chk2_asarray(a, b, 0)
+ if len(a) != len(b):
+ raise ValueError('Provided arrays do not have same length')
+
+ if a.size == 0 or b.size == 0:
+ return np.nan, np.nan
+
+ if nan_policy not in ('propagate', 'raise', 'omit'):
+ raise ValueError(f'Value for nan_policy "{nan_policy}" not allowed')
+
+ a, b = a.reshape(len(a), -1), b.reshape(len(b), -1)
+ if (a.shape[1] != b.shape[1]):
+ a, b = np.broadcast_arrays(a, b)
+
+ mask = np.logical_or(np.isnan(a), np.isnan(b))
+ if nan_policy == 'raise' and np.any(mask):
+ raise ValueError('Input cannot contain NaN when nan_policy is "omit"')
+ elif nan_policy == 'omit':
+ # avoid making copies of the data, if possible
+ a = np.ma.masked_array(a, mask, copy=False, fill_value=np.nan)
+ b = np.ma.masked_array(b, mask, copy=False, fill_value=np.nan)
+
+ with np.errstate(invalid='ignore'):
+ corr = (sstats.zscore(a, ddof=ddof, nan_policy=nan_policy)
+ * sstats.zscore(b, ddof=ddof, nan_policy=nan_policy))
+
+ sumfunc, n_obs = np.sum, len(a)
+ if nan_policy == 'omit':
+ corr = corr.filled(np.nan)
+ sumfunc = np.nansum
+ n_obs = np.squeeze(np.sum(np.logical_not(np.isnan(corr)), axis=0))
+
+ corr = sumfunc(corr, axis=0) / (n_obs - 1)
+ corr = np.squeeze(np.clip(corr, -1, 1)) / 1
+
+ # taken from scipy.stats
+ ab = (n_obs / 2) - 1
+ prob = 2 * sspecial.betainc(ab, ab, 0.5 * (1 - np.abs(corr)))
+
+ return corr, prob
+
+
+def weighted_pearsonr():
+ """Calculate weighted Pearson correlation coefficient."""
+ pass
+
+
+def make_correlated_xy(corr=0.85, size=10000, seed=None, tol=0.001):
+ """
+ Generate random vectors that are correlated to approximately `corr`.
+
+ Parameters
+ ----------
+ corr : [-1, 1] float or (N, N) numpy.ndarray, optional
+ The approximate correlation desired. If a float is provided, two
+ vectors with the specified level of correlation will be generated. If
+ an array is provided, it is assumed to be a symmetrical correlation
+ matrix and ``len(corr)`` vectors with the specified levels of
+ correlation will be generated. Default: 0.85
+ size : int or tuple, optional
+ Desired size of the generated vectors. Default: 1000
+ seed : {int, np.random.RandomState instance, None}, optional
+ Seed for random number generation. Default: None
+ tol : [0, 1] float, optional
+ Tolerance of correlation between generated `vectors` and specified
+ `corr`. Default: 0.001
+
+ Returns
+ -------
+ vectors : numpy.ndarray
+ Random vectors of size `size` with correlation specified by `corr`
+
+ Examples
+ --------
+ >>> from netneurotools import stats
+
+ By default two vectors are generated with specified correlation
+
+ >>> x, y = stats.make_correlated_xy()
+ >>> np.corrcoef(x, y) # doctest: +SKIP
+ array([[1. , 0.85083661],
+ [0.85083661, 1. ]])
+ >>> x, y = stats.make_correlated_xy(corr=0.2)
+ >>> np.corrcoef(x, y) # doctest: +SKIP
+ array([[1. , 0.20069953],
+ [0.20069953, 1. ]])
+
+ You can also provide correlation matrices to generate more than two vectors
+ if desired. Note that this makes it more difficult to ensure the actual
+ correlations are close to the desired values:
+
+ >>> corr = [[1, 0.5, 0.3], [0.5, 1, 0], [0.3, 0, 1]]
+ >>> out = stats.make_correlated_xy(corr=corr)
+ >>> out.shape
+ (3, 10000)
+ >>> np.corrcoef(out) # doctest: +SKIP
+ array([[1. , 0.50965273, 0.30235686],
+ [0.50965273, 1. , 0.01089107],
+ [0.30235686, 0.01089107, 1. ]])
+ """
+ rs = check_random_state(seed)
+
+ # no correlations outside [-1, 1] bounds
+ if np.any(np.abs(corr) > 1):
+ raise ValueError('Provided `corr` must (all) be in range [-1, 1].')
+
+ # if we're given a single number, assume two vectors are desired
+ if isinstance(corr, (int, float)):
+ covs = np.ones((2, 2)) * 0.111
+ covs[(0, 1), (1, 0)] *= corr
+ # if we're given a correlation matrix, assume `N` vectors are desired
+ elif isinstance(corr, (list, np.ndarray)):
+ corr = np.asarray(corr)
+ if corr.ndim != 2 or len(corr) != len(corr.T):
+ raise ValueError('If `corr` is a list or array, must be a 2D '
+ 'square array, not {}'.format(corr.shape))
+ if np.any(np.diag(corr) != 1):
+ raise ValueError('Diagonal of `corr` must be 1.')
+ covs = corr * 0.111
+ means = [0] * len(covs)
+
+ # generate the variables
+ count = 0
+ while count < 500:
+ vectors = rs.multivariate_normal(mean=means, cov=covs, size=size).T
+ flat = vectors.reshape(len(vectors), -1)
+ # if diff between actual and desired correlations less than tol, break
+ if np.all(np.abs(np.corrcoef(flat) - (covs / 0.111)) < tol):
+ break
+ count += 1
+
+ return vectors
diff --git a/netneurotools/stats/permutation_test.py b/netneurotools/stats/permutation_test.py
new file mode 100644
index 0000000..9ff4434
--- /dev/null
+++ b/netneurotools/stats/permutation_test.py
@@ -0,0 +1,283 @@
+"""Functions for calculating permutation test."""
+
+import numpy as np
+from sklearn.utils.validation import check_random_state
+
+try: # scipy >= 1.8.0
+ from scipy.stats._stats_py import _chk2_asarray
+except ImportError: # scipy < 1.8.0
+ from scipy.stats.stats import _chk2_asarray
+
+from .correlation import efficient_pearsonr
+
+
+def permtest_1samp(a, popmean, axis=0, n_perm=1000, seed=0):
+ """
+ Non-parametric equivalent of :py:func:`scipy.stats.ttest_1samp`.
+
+ Generates two-tailed p-value for hypothesis of whether `a` differs from
+ `popmean` using permutation tests
+
+ Parameters
+ ----------
+ a : array_like
+ Sample observations
+ popmean : float or array_like
+ Expected valued in null hypothesis. If array_like then it must have the
+ same shape as `a` excluding the `axis` dimension
+ axis : int or None, optional
+ Axis along which to compute test. If None, compute over the whole array
+ of `a`. Default: 0
+ n_perm : int, optional
+ Number of permutations to assess. Unless `a` is very small along `axis`
+ this will approximate a randomization test via Monte Carlo simulations.
+ Default: 1000
+ seed : {int, np.random.RandomState instance, None}, optional
+ Seed for random number generation. Set to None for "randomness".
+ Default: 0
+
+ Returns
+ -------
+ stat : float or numpy.ndarray
+ Difference from `popmean`
+ pvalue : float or numpy.ndarray
+ Non-parametric p-value
+
+ Notes
+ -----
+ Providing multiple values to `popmean` to run *independent* tests in
+ parallel is not currently supported.
+
+ The lowest p-value that can be returned by this function is equal to 1 /
+ (`n_perm` + 1).
+
+ Examples
+ --------
+ >>> from netneurotools import stats
+ >>> np.random.seed(7654567) # set random seed for reproducible results
+ >>> rvs = np.random.normal(loc=5, scale=10, size=(50, 2))
+
+ Test if mean of random sample is equal to true mean, and different mean. We
+ reject the null hypothesis in the second case and don't reject it in the
+ first case.
+
+ >>> stats.permtest_1samp(rvs, 5.0)
+ (array([-0.985602 , -0.05204969]), array([0.48551449, 0.95904096]))
+ >>> stats.permtest_1samp(rvs, 0.0)
+ (array([4.014398 , 4.94795031]), array([0.00699301, 0.000999 ]))
+
+ Example using axis and non-scalar dimension for population mean
+
+ >>> stats.permtest_1samp(rvs, [5.0, 0.0])
+ (array([-0.985602 , 4.94795031]), array([0.48551449, 0.000999 ]))
+ >>> stats.permtest_1samp(rvs.T, [5.0, 0.0], axis=1)
+ (array([-0.985602 , 4.94795031]), array([0.51548452, 0.000999 ]))
+ """
+ a, popmean, axis = _chk2_asarray(a, popmean, axis)
+ rs = check_random_state(seed)
+
+ if a.size == 0:
+ return np.nan, np.nan
+
+ # ensure popmean will broadcast to `a` correctly
+ if popmean.ndim != a.ndim:
+ popmean = np.expand_dims(popmean, axis=axis)
+
+ # center `a` around `popmean` and calculate original mean
+ zeroed = a - popmean
+ true_mean = zeroed.mean(axis=axis) / 1
+ abs_mean = np.abs(true_mean)
+
+ # this for loop is not _the fastest_ but is memory efficient
+ # the broadcasting alt. would mean storing zeroed.size * n_perm in memory
+ permutations = np.ones(true_mean.shape)
+ for _ in range(n_perm):
+ flipped = zeroed * rs.choice([-1, 1], size=zeroed.shape) # sign flip
+ permutations += np.abs(flipped.mean(axis=axis)) >= abs_mean
+
+ pvals = permutations / (n_perm + 1) # + 1 in denom accounts for true_mean
+
+ return true_mean, pvals
+
+
+def permtest_rel(a, b, axis=0, n_perm=1000, seed=0):
+ """
+ Non-parametric equivalent of :py:func:`scipy.stats.ttest_rel`.
+
+ Generates two-tailed p-value for hypothesis of whether related samples `a`
+ and `b` differ using permutation tests
+
+ Parameters
+ ----------
+ a, b : array_like
+ Sample observations. These arrays must have the same shape.
+ axis : int or None, optional
+ Axis along which to compute test. If None, compute over whole arrays
+ of `a` and `b`. Default: 0
+ n_perm : int, optional
+ Number of permutations to assess. Unless `a` and `b` are very small
+ along `axis` this will approximate a randomization test via Monte
+ Carlo simulations. Default: 1000
+ seed : {int, np.random.RandomState instance, None}, optional
+ Seed for random number generation. Set to None for "randomness".
+ Default: 0
+
+ Returns
+ -------
+ stat : float or numpy.ndarray
+ Average difference between `a` and `b`
+ pvalue : float or numpy.ndarray
+ Non-parametric p-value
+
+ Notes
+ -----
+ The lowest p-value that can be returned by this function is equal to 1 /
+ (`n_perm` + 1).
+
+ Examples
+ --------
+ >>> from netneurotools import stats
+
+ >>> np.random.seed(12345678) # set random seed for reproducible results
+ >>> rvs1 = np.random.normal(loc=5, scale=10, size=500)
+ >>> rvs2 = (np.random.normal(loc=5, scale=10, size=500)
+ ... + np.random.normal(scale=0.2, size=500))
+ >>> stats.permtest_rel(rvs1, rvs2) # doctest: +SKIP
+ (-0.16506275161572695, 0.8021978021978022)
+
+ >>> rvs3 = (np.random.normal(loc=8, scale=10, size=500)
+ ... + np.random.normal(scale=0.2, size=500))
+ >>> stats.permtest_rel(rvs1, rvs3) # doctest: +SKIP
+ (2.40533726097883, 0.000999000999000999)
+ """
+ a, b, axis = _chk2_asarray(a, b, axis)
+ rs = check_random_state(seed)
+
+ if a.shape[axis] != b.shape[axis]:
+ raise ValueError('Provided arrays do not have same length along axis')
+
+ if a.size == 0 or b.size == 0:
+ return np.nan, np.nan
+
+ # calculate original difference in means
+ ab = np.stack([a, b], axis=0)
+ if ab.ndim < 3:
+ ab = np.expand_dims(ab, axis=-1)
+ true_diff = np.squeeze(np.diff(ab, axis=0)).mean(axis=axis) / 1
+ abs_true = np.abs(true_diff)
+
+ # idx array
+ reidx = list(np.meshgrid(*[range(f) for f in ab.shape], indexing='ij'))
+
+ permutations = np.ones(true_diff.shape)
+ for _ in range(n_perm):
+ # use this to re-index (i.e., swap along) the first axis of `ab`
+ swap = rs.random_sample(ab.shape[:-1]).argsort(axis=axis)
+ reidx[0] = np.repeat(swap[..., np.newaxis], ab.shape[-1], axis=-1)
+ # recompute difference between `a` and `b` (i.e., first axis of `ab`)
+ pdiff = np.squeeze(np.diff(ab[tuple(reidx)], axis=0)).mean(axis=axis)
+ permutations += np.abs(pdiff) >= abs_true
+
+ pvals = permutations / (n_perm + 1) # + 1 in denom accounts for true_diff
+
+ return true_diff, pvals
+
+
+def permtest_pearsonr(a, b, axis=0, n_perm=1000, resamples=None, seed=0):
+ """
+ Non-parametric equivalent of :py:func:`scipy.stats.pearsonr`.
+
+ Generates two-tailed p-value for hypothesis of whether samples `a` and `b`
+ are correlated using permutation tests
+
+ Parameters
+ ----------
+ a,b : (N[, M]) array_like
+ Sample observations. These arrays must have the same length and either
+ an equivalent number of columns or be broadcastable
+ axis : int or None, optional
+ Axis along which to compute test. If None, compute over whole arrays
+ of `a` and `b`. Default: 0
+ n_perm : int, optional
+ Number of permutations to assess. Unless `a` and `b` are very small
+ along `axis` this will approximate a randomization test via Monte
+ Carlo simulations. Default: 1000
+ resamples : (N, P) array_like, optional
+ Resampling array used to shuffle `a` when generating null distribution
+ of correlations. This array must have the same length as `a` and `b`
+ and should have at least the same number of columns as `n_perm` (if it
+ has more then only `n_perm` columns will be used. When not specified a
+ standard permutation is used to shuffle `a`. Default: None
+ seed : {int, np.random.RandomState instance, None}, optional
+ Seed for random number generation. Set to None for "randomness".
+ Default: 0
+
+ Returns
+ -------
+ corr : float or numpyndarray
+ Correlations
+ pvalue : float or numpy.ndarray
+ Non-parametric p-value
+
+ Notes
+ -----
+ The lowest p-value that can be returned by this function is equal to 1 /
+ (`n_perm` + 1).
+
+ Examples
+ --------
+ >>> from netneurotools import stats
+
+ >>> np.random.seed(12345678) # set random seed for reproducible results
+ >>> x, y = stats.make_correlated_xy(corr=0.1, size=100)
+ >>> stats.permtest_pearsonr(x, y) # doctest: +SKIP
+ (0.10032564626876286, 0.3046953046953047)
+
+ >>> x, y = stats.make_correlated_xy(corr=0.5, size=100)
+ >>> stats.permtest_pearsonr(x, y) # doctest: +SKIP
+ (0.500040365781984, 0.000999000999000999)
+
+ Also works with multiple columns by either broadcasting the smaller array
+ to the larger:
+
+ >>> z = x + np.random.normal(loc=1, size=100)
+ >>> stats.permtest_pearsonr(x, np.column_stack([y, z]))
+ (array([0.50004037, 0.25843187]), array([0.000999 , 0.01098901]))
+
+ or by using matching columns in the two arrays (e.g., `x` and `y` vs
+ `a` and `b`):
+
+ >>> a, b = stats.make_correlated_xy(corr=0.9, size=100)
+ >>> stats.permtest_pearsonr(np.column_stack([x, a]), np.column_stack([y, b]))
+ (array([0.50004037, 0.89927523]), array([0.000999, 0.000999]))
+ """ # noqa
+ a, b, axis = _chk2_asarray(a, b, axis)
+ rs = check_random_state(seed)
+
+ if len(a) != len(b):
+ raise ValueError('Provided arrays do not have same length')
+
+ if a.size == 0 or b.size == 0:
+ return np.nan, np.nan
+
+ if resamples is not None:
+ if n_perm > resamples.shape[-1]:
+ raise ValueError('Number of permutations requested exceeds size '
+ 'of resampling array.')
+
+ # divide by one forces coercion to float if ndim = 0
+ true_corr = efficient_pearsonr(a, b)[0] / 1
+ abs_true = np.abs(true_corr)
+
+ permutations = np.ones(true_corr.shape)
+ for perm in range(n_perm):
+ # permute `a` and determine whether correlations exceed original
+ if resamples is None:
+ ap = a[rs.permutation(len(a))]
+ else:
+ ap = a[resamples[:, perm]]
+ permutations += np.abs(efficient_pearsonr(ap, b)[0]) >= abs_true
+
+ pvals = permutations / (n_perm + 1) # + 1 in denom accounts for true_corr
+
+ return true_corr, pvals
diff --git a/netneurotools/stats/regression.py b/netneurotools/stats/regression.py
new file mode 100644
index 0000000..c6ac6e2
--- /dev/null
+++ b/netneurotools/stats/regression.py
@@ -0,0 +1,256 @@
+"""Functions for calculating regression."""
+
+from itertools import combinations
+
+import numpy as np
+from tqdm import tqdm
+import scipy.stats as sstats
+from joblib import Parallel, delayed
+from sklearn.linear_model import LinearRegression
+from sklearn.utils.validation import check_array
+
+
+def _add_constant(data):
+ """
+ Add a constant (i.e., intercept) term to `data`.
+
+ Parameters
+ ----------
+ data : (N, M) array_like
+ Samples by features data array
+
+ Returns
+ -------
+ data : (N, F) np.ndarray
+ Where `F` is `M + 1`
+
+ Examples
+ --------
+ >>> from netneurotools import stats
+
+ >>> A = np.zeros((5, 5))
+ >>> Ac = stats._add_constant(A)
+ >>> Ac
+ array([[0., 0., 0., 0., 0., 1.],
+ [0., 0., 0., 0., 0., 1.],
+ [0., 0., 0., 0., 0., 1.],
+ [0., 0., 0., 0., 0., 1.],
+ [0., 0., 0., 0., 0., 1.]])
+ """
+ data = check_array(data, ensure_2d=False)
+ return np.column_stack([data, np.ones(len(data))])
+
+
+def residualize(X, Y, Xc=None, Yc=None, normalize=True, add_intercept=True):
+ """
+ Return residuals of regression equation from `Y ~ X`.
+
+ Parameters
+ ----------
+ X : (N[, R]) array_like
+ Coefficient matrix of `R` variables for `N` subjects
+ Y : (N[, F]) array_like
+ Dependent variable matrix of `F` variables for `N` subjects
+ Xc : (M[, R]) array_like, optional
+ Coefficient matrix of `R` variables for `M` subjects. If not specified
+ then `X` is used to estimate betas. Default: None
+ Yc : (M[, F]) array_like, optional
+ Dependent variable matrix of `F` variables for `M` subjects. If not
+ specified then `Y` is used to estimate betas. Default: None
+ normalize : bool, optional
+ Whether to normalize (i.e., z-score) residuals. Will use residuals from
+ `Yc ~ Xc` for generating mean and variance. Default: True
+ add_intercept : bool, optional
+ Whether to add intercept to `X` (and `Xc`, if provided). The intercept
+ will not be removed, just used in beta estimation. Default: True
+
+ Returns
+ -------
+ Yr : (N, F) numpy.ndarray
+ Residuals of `Y ~ X`
+
+ Notes
+ -----
+ If both `Xc` and `Yc` are provided, these are used to calculate betas which
+ are then applied to `X` and `Y`.
+ """
+ if ((Yc is None and Xc is not None) or (Yc is not None and Xc is None)):
+ raise ValueError('If processing against a comparative group, you must '
+ 'provide both `Xc` and `Yc`.')
+
+ X, Y = np.asarray(X), np.asarray(Y)
+
+ if Yc is None:
+ Xc, Yc = X.copy(), Y.copy()
+ else:
+ Xc, Yc = np.asarray(Xc), np.asarray(Yc)
+
+ # add intercept to regressors if requested and calculate fit
+ if add_intercept:
+ X, Xc = _add_constant(X), _add_constant(Xc)
+ betas, *_ = np.linalg.lstsq(Xc, Yc, rcond=None)
+
+ # remove intercept from regressors and betas for calculation of residuals
+ if add_intercept:
+ betas = betas[:-1]
+ X, Xc = X[:, :-1], Xc[:, :-1]
+
+ # calculate residuals
+ Yr = Y - (X @ betas)
+ Ycr = Yc - (Xc @ betas)
+
+ if normalize:
+ Yr = sstats.zmap(Yr, compare=Ycr)
+
+ return Yr
+
+
+def get_dominance_stats(X, y, use_adjusted_r_sq=True, verbose=False, n_jobs=1):
+ """
+ Return the dominance analysis statistics for multilinear regression.
+
+ This is a rewritten & simplified version of [DA1]_. It is briefly
+ tested against the original package, but still in early stages.
+ Please feel free to report any bugs.
+
+ Warning: Still work-in-progress. Parameters might change!
+
+ Parameters
+ ----------
+ X : (N, M) array_like
+ Input data
+ y : (N,) array_like
+ Target values
+ use_adjusted_r_sq : bool, optional
+ Whether to use adjusted r squares. Default: True
+ verbose : bool, optional
+ Whether to print debug messages. Default: False
+ n_jobs : int, optional
+ The number of jobs to run in parallel. Default: 1
+
+ Returns
+ -------
+ model_metrics : dict
+ The dominance metrics, currently containing `individual_dominance`,
+ `partial_dominance`, `total_dominance`, and `full_r_sq`.
+ model_r_sq : dict
+ Contains all model r squares
+
+ Notes
+ -----
+ Example usage
+
+ .. code:: python
+
+ from netneurotools.stats import get_dominance_stats
+ from sklearn.datasets import load_boston
+ X, y = load_boston(return_X_y=True)
+ model_metrics, model_r_sq = get_dominance_stats(X, y)
+
+ To compare with [DA1]_, use `use_adjusted_r_sq=False`
+
+ .. code:: python
+
+ from dominance_analysis import Dominance_Datasets
+ from dominance_analysis import Dominance
+ boston_dataset=Dominance_Datasets.get_boston()
+ dominance_regression=Dominance(data=boston_dataset,
+ target='House_Price',objective=1)
+ incr_variable_rsquare=dominance_regression.incremental_rsquare()
+ dominance_regression.dominance_stats()
+
+ References
+ ----------
+ .. [DA1] https://github.com/dominance-analysis/dominance-analysis
+
+ """
+ # this helps to remove one element from a tuple
+ def remove_ret(tpl, elem):
+ lst = list(tpl)
+ lst.remove(elem)
+ return tuple(lst)
+
+ # sklearn linear regression wrapper
+ def get_reg_r_sq(X, y, use_adjusted_r_sq=True):
+ lin_reg = LinearRegression()
+ lin_reg.fit(X, y)
+ yhat = lin_reg.predict(X)
+ SS_Residual = sum((y - yhat) ** 2)
+ SS_Total = sum((y - np.mean(y)) ** 2)
+ r_squared = 1 - (float(SS_Residual)) / SS_Total
+ adjusted_r_squared = 1 - (1 - r_squared) * \
+ (len(y) - 1) / (len(y) - X.shape[1] - 1)
+ if use_adjusted_r_sq:
+ return adjusted_r_squared
+ else:
+ return r_squared
+
+ # helper function to compute r_sq for a given idx_tuple
+ def compute_r_sq(idx_tuple):
+ return idx_tuple, get_reg_r_sq(X[:, idx_tuple],
+ y,
+ use_adjusted_r_sq=use_adjusted_r_sq)
+
+ # generate all predictor combinations in list (num of predictors) of lists
+ n_predictor = X.shape[-1]
+ # n_comb_len_group = n_predictor - 1
+ predictor_combs = [list(combinations(range(n_predictor), i))
+ for i in range(1, n_predictor + 1)]
+ if verbose:
+ print(f"[Dominance analysis] Generated \
+ {len([v for i in predictor_combs for v in i])} combinations")
+
+ model_r_sq = dict()
+ results = Parallel(n_jobs=n_jobs)(
+ delayed(compute_r_sq)(idx_tuple)
+ for len_group in tqdm(predictor_combs,
+ desc='num-of-predictor loop',
+ disable=not verbose)
+ for idx_tuple in tqdm(len_group,
+ desc='insider loop',
+ disable=not verbose))
+
+ # extract r_sq from results
+ for idx_tuple, r_sq in results:
+ model_r_sq[idx_tuple] = r_sq
+
+ if verbose:
+ print(f"[Dominance analysis] Acquired {len(model_r_sq)} r^2's")
+
+ # getting all model metrics
+ model_metrics = dict([])
+
+ # individual dominance
+ individual_dominance = []
+ for i_pred in range(n_predictor):
+ individual_dominance.append(model_r_sq[(i_pred,)])
+ individual_dominance = np.array(individual_dominance).reshape(1, -1)
+ model_metrics["individual_dominance"] = individual_dominance
+
+ # partial dominance
+ partial_dominance = [[] for _ in range(n_predictor - 1)]
+ for i_len in range(n_predictor - 1):
+ i_len_combs = list(combinations(range(n_predictor), i_len + 2))
+ for j_node in range(n_predictor):
+ j_node_sel = [v for v in i_len_combs if j_node in v]
+ reduced_list = [remove_ret(comb, j_node) for comb in j_node_sel]
+ diff_values = [
+ model_r_sq[j_node_sel[i]] - model_r_sq[reduced_list[i]]
+ for i in range(len(reduced_list))]
+ partial_dominance[i_len].append(np.mean(diff_values))
+
+ # save partial dominance
+ partial_dominance = np.array(partial_dominance)
+ model_metrics["partial_dominance"] = partial_dominance
+ # get total dominance
+ total_dominance = np.mean(
+ np.r_[individual_dominance, partial_dominance], axis=0)
+ # test and save total dominance
+ assert np.allclose(total_dominance.sum(),
+ model_r_sq[tuple(range(n_predictor))]), \
+ "Sum of total dominance is not equal to full r square!"
+ model_metrics["total_dominance"] = total_dominance
+ # save full r^2
+ model_metrics["full_r_sq"] = model_r_sq[tuple(range(n_predictor))]
+
+ return model_metrics, model_r_sq
diff --git a/netneurotools/stats/stats_utils.py b/netneurotools/stats/stats_utils.py
new file mode 100644
index 0000000..9964f8e
--- /dev/null
+++ b/netneurotools/stats/stats_utils.py
@@ -0,0 +1 @@
+"""Functions for supporting statistics."""
diff --git a/netneurotools/stats/tests/__init__.py b/netneurotools/stats/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/netneurotools/stats/tests/test_correlation.py b/netneurotools/stats/tests/test_correlation.py
new file mode 100644
index 0000000..f5c8653
--- /dev/null
+++ b/netneurotools/stats/tests/test_correlation.py
@@ -0,0 +1,67 @@
+"""For testing netneurotools.stats.correlation functionality."""
+
+import pytest
+import numpy as np
+from netneurotools import stats
+
+
+@pytest.mark.parametrize('x, y, expected', [
+ # basic one-dimensional input
+ (range(5), range(5), (1.0, 0.0)),
+ # broadcasting occurs regardless of input order
+ (np.stack([range(5), range(5, 0, -1)], 1), range(5),
+ ([1.0, -1.0], [0.0, 0.0])),
+ (range(5), np.stack([range(5), range(5, 0, -1)], 1),
+ ([1.0, -1.0], [0.0, 0.0])),
+ # correlation between matching columns
+ (np.stack([range(5), range(5, 0, -1)], 1),
+ np.stack([range(5), range(5, 0, -1)], 1),
+ ([1.0, 1.0], [0.0, 0.0]))
+])
+def test_efficient_pearsonr(x, y, expected):
+ """Test efficient_pearsonr function."""
+ assert np.allclose(stats.efficient_pearsonr(x, y), expected)
+
+
+def test_efficient_pearsonr_errors():
+ """Test efficient_pearsonr function errors."""
+ with pytest.raises(ValueError):
+ stats.efficient_pearsonr(range(4), range(5))
+
+ assert all(np.isnan(a) for a in stats.efficient_pearsonr([], []))
+
+
+@pytest.mark.parametrize('corr, size, tol, seed', [
+ (0.85, (1000,), 0.05, 1234),
+ (0.85, (1000, 1000), 0.05, 1234),
+ ([[1, 0.5, 0.3], [0.5, 1, 0], [0.3, 0, 1]], (1000,), 0.05, 1234)
+])
+def test_make_correlated_xy(corr, size, tol, seed):
+ """Test make_correlated_xy function."""
+ out = stats.make_correlated_xy(corr=corr, size=size,
+ tol=tol, seed=seed)
+ # ensure output is expected shape
+ assert out.shape[1:] == size
+ assert len(out) == len(corr) if hasattr(corr, '__len__') else 2
+
+ # check outputs are correlated within specified tolerance
+ realcorr = np.corrcoef(out.reshape(len(out), -1))
+ if len(realcorr) == 2 and not hasattr(corr, '__len__'):
+ realcorr = realcorr[0, 1]
+ assert np.all(np.abs(realcorr - corr) < tol)
+
+ # check that seed generates reproducible values
+ duplicate = stats.make_correlated_xy(corr=corr, size=size,
+ tol=tol, seed=seed)
+ assert np.allclose(out, duplicate)
+
+
+@pytest.mark.parametrize('corr', [
+ (1.5), (-1.5), # outside range of [-1, 1]
+ ([0.85]), ([[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]]), # not 2D / square array
+ ([[0.85]]), ([[1, 0.5], [0.5, 0.5]]) # diagonal not equal to 1
+])
+def test_make_correlated_xy_errors(corr):
+ """Test make_correlated_xy function errors."""
+ with pytest.raises(ValueError):
+ stats.make_correlated_xy(corr)
diff --git a/netneurotools/stats/tests/test_permutation.py b/netneurotools/stats/tests/test_permutation.py
new file mode 100644
index 0000000..ae6a335
--- /dev/null
+++ b/netneurotools/stats/tests/test_permutation.py
@@ -0,0 +1,65 @@
+"""For testing netneurotools.stats.permutation_test functionality."""
+
+import pytest
+import numpy as np
+from netneurotools import stats
+
+
+@pytest.mark.xfail
+def test_permtest_1samp():
+ """Test permutation test for one-sample t-test."""
+ assert False
+ # n1, n2, n3 = 10, 15, 20
+ # rs = np.random.RandomState(1234)
+ # rvn1 = rs.normal(loc=8, scale=10, size=(n1, n2, n3))
+
+ # t1, p1 = stats.permtest_1samp(rvn1, 1, axis=0)
+
+
+def test_permtest_rel():
+ """Test permutation test for paired samples."""
+ dr, pr = -0.0005, 0.4175824175824176
+ dpr = ([dr, -dr], [pr, pr])
+
+ rvs1 = np.linspace(1, 100, 100)
+ rvs2 = np.linspace(1.01, 99.989, 100)
+ rvs1_2D = np.array([rvs1, rvs2])
+ rvs2_2D = np.array([rvs2, rvs1])
+
+ # the p-values in these two cases should be consistent
+ d, p = stats.permtest_rel(rvs1, rvs2, axis=0, seed=1234)
+ assert np.allclose([d, p], (dr, pr))
+ d, p = stats.permtest_rel(rvs1_2D.T, rvs2_2D.T, axis=0, seed=1234)
+ assert np.allclose([d, p], dpr)
+
+ # but the p-value will differ here because of _how_ we're drawing the
+ # random permutations... it would be nice if this was consistent, but as
+ # yet i don't have a great idea on how to make that happen without assuming
+ # a whole lot about the data
+ pr = 0.51248751
+ tpr = ([dr, -dr], [pr, pr])
+ d, p = stats.permtest_rel(rvs1_2D, rvs2_2D, axis=1, seed=1234)
+ assert np.allclose([d, p], tpr)
+
+
+def test_permtest_pearsonr():
+ """Test permutation test for Pearson correlation."""
+ np.random.seed(12345678)
+ x, y = stats.make_correlated_xy(corr=0.1, size=100)
+ r, p = stats.permtest_pearsonr(x, y)
+ assert np.allclose([r, p], [0.10032564626876286, 0.3046953046953047])
+
+ x, y = stats.make_correlated_xy(corr=0.5, size=100)
+ r, p = stats.permtest_pearsonr(x, y)
+ assert np.allclose([r, p], [0.500040365781984, 0.000999000999000999])
+
+ z = x + np.random.normal(loc=1, size=100)
+ r, p = stats.permtest_pearsonr(x, np.column_stack([y, z]))
+ assert np.allclose(r, np.array([0.50004037, 0.25843187]))
+ assert np.allclose(p, np.array([0.000999, 0.01098901]))
+
+ a, b = stats.make_correlated_xy(corr=0.9, size=100)
+ r, p = stats.permtest_pearsonr(np.column_stack([x, a]),
+ np.column_stack([y, b]))
+ assert np.allclose(r, np.array([0.50004037, 0.89927523]))
+ assert np.allclose(p, np.array([0.000999, 0.000999]))
diff --git a/netneurotools/stats/tests/test_regression.py b/netneurotools/stats/tests/test_regression.py
new file mode 100644
index 0000000..ce20638
--- /dev/null
+++ b/netneurotools/stats/tests/test_regression.py
@@ -0,0 +1,14 @@
+"""For testing netneurotools.stats.regression functionality."""
+
+import numpy as np
+from netneurotools import stats
+
+
+def test_add_constant():
+ """Test adding a constant to a 1D or 2D array."""
+ # if provided a vector it will return a 2D array
+ assert stats._add_constant(np.random.rand(100)).shape == (100, 2)
+
+ # if provided a 2D array it will return the same, extended by 1 column
+ out = stats._add_constant(np.random.rand(100, 100))
+ assert out.shape == (100, 101) and np.all(out[:, -1] == 1)
diff --git a/netneurotools/surface.py b/netneurotools/surface.py
deleted file mode 100644
index e43ca16..0000000
--- a/netneurotools/surface.py
+++ /dev/null
@@ -1,189 +0,0 @@
-"""Functions for constructing graphs from surface meshes."""
-
-import numpy as np
-from scipy import sparse
-
-
-def _get_edges(faces):
- """
- Get set of edges from `faces`.
-
- Parameters
- ----------
- faces : (F, 3) array_like
- Set of indices creating triangular faces of a mesh
-
- Returns
- -------
- edges : (F*3, 2) array_like
- All edges in `faces`
- """
- faces = np.asarray(faces)
- edges = np.sort(faces[:, [0, 1, 1, 2, 2, 0]].reshape((-1, 2)), axis=1)
-
- return edges
-
-
-def get_direct_edges(vertices, faces):
- """
- Get (unique) direct edges and weights in mesh describes by inputs.
-
- Parameters
- ----------
- vertices : (N, 3) array_like
- Coordinates of `vertices` comprising mesh with `faces`
- faces : (F, 3) array_like
- Indices of `vertices` that compose triangular faces of mesh
-
- Returns
- -------
- edges : (E, 2) array_like
- Indices of `vertices` comprising direct edges (without duplicates)
- weights : (E, 1) array_like
- Distances between `edges`
-
- """
- edges = np.unique(_get_edges(faces), axis=0)
- weights = np.linalg.norm(np.diff(vertices[edges], axis=1), axis=-1)
- return edges, weights.squeeze()
-
-
-def get_indirect_edges(vertices, faces):
- """
- Get indirect edges and weights in mesh described by inputs.
-
- Indirect edges are between two vertices that participate in faces sharing
- an edge
-
- Parameters
- ----------
- vertices : (N, 3) array_like
- Coordinates of `vertices` comprising mesh with `faces`
- faces : (F, 3) array_like
- Indices of `vertices` that compose triangular faces of mesh
-
- Returns
- -------
- edges : (E, 2) array_like
- Indices of `vertices` comprising indirect edges (without duplicates)
- weights : (E, 1) array_like
- Distances between `edges` on surface
-
- References
- ----------
- https://github.com/mikedh/trimesh (MIT licensed)
-
- """
- # first generate the list of edges for the provided faces and the
- # index for which face the edge is from (which is just the index of the
- # face repeated thrice, since each face generates three direct edges)
- edges = _get_edges(faces)
- edges_face = np.repeat(np.arange(len(faces)), 3)
-
- # every edge appears twice in a watertight surface, so we'll first get the
- # indices for each duplicate edge in `edges` (this should, assuming all
- # goes well, have rows equal to len(edges) // 2)
- order = np.lexsort(edges.T[::-1])
- edges_sorted = edges[order]
- dupe = np.any(edges_sorted[1:] != edges_sorted[:-1], axis=1)
- dupe_idx = np.append(0, np.nonzero(dupe)[0] + 1)
- start_ok = np.diff(np.concatenate((dupe_idx, [len(edges_sorted)]))) == 2
- groups = np.tile(dupe_idx[start_ok].reshape(-1, 1), 2)
- edge_groups = order[groups + np.arange(2)]
-
- # now, get the indices of the faces that participate in these duplicate
- # edges, as well as the edges themselves
- adjacency = edges_face[edge_groups]
- nondegenerate = adjacency[:, 0] != adjacency[:, 1]
- adjacency = np.sort(adjacency[nondegenerate], axis=1)
- adjacency_edges = edges[edge_groups[:, 0][nondegenerate]]
-
- # the non-shared vertex index is the same shape as adjacency, holding
- # vertex indices vs face indices
- indirect_edges = np.zeros(adjacency.shape, dtype=np.int32) - 1
-
- # loop through the two columns of adjacency
- for i, fid in enumerate(adjacency.T):
- # faces from the current column of adjacency
- face = faces[fid]
- # get index of vertex not included in shared edge
- unshared = np.logical_not(np.logical_or(
- face == adjacency_edges[:, 0].reshape(-1, 1),
- face == adjacency_edges[:, 1].reshape(-1, 1)))
- # each row should have one "uncontained" vertex; ignore degenerates
- row_ok = unshared.sum(axis=1) == 1
- unshared[~row_ok, :] = False
- indirect_edges[row_ok, i] = face[unshared]
-
- # get vertex coordinates of triangles pairs with shared edges, ordered
- # such that the non-shared vertex is always _last_ among the trio
- shared = np.sort(face[np.logical_not(unshared)].reshape(-1, 1, 2), axis=-1)
- shared = np.repeat(shared, 2, axis=1)
- triangles = np.concatenate((shared, indirect_edges[..., None]), axis=-1)
- # `A.shape`: (3, N, 2) corresponding to (xyz coords, edges, triangle pairs)
- A, B, V = vertices[triangles].transpose(2, 3, 0, 1)
-
- # calculate the xyz coordinates of the foot of each triangle, where the
- # base is the shared edge
- # that is, we're trying to calculate F in the equation `VF = VB - (w * BA)`
- # where `VF`, `VB`, and `BA` are vectors, and `w = (AB * VB) / (AB ** 2)`
- w = (np.sum((A - B) * (V - B), axis=0, keepdims=True)
- / np.sum((A - B) ** 2, axis=0, keepdims=True))
- feet = B - (w * (B - A))
- # calculate coordinates of midpoint b/w the feet of each pair of triangles
- midpoints = (np.sum(feet.transpose(1, 2, 0), axis=1) / 2)[:, None]
- # calculate Euclidean distance between non-shared vertices and midpoints
- # and add distances together for each pair of triangles
- norms = np.linalg.norm(vertices[indirect_edges] - midpoints, axis=-1)
- weights = np.sum(norms, axis=-1)
-
- # NOTE: weights won't be perfectly accurate for a small subset of triangle
- # pairs where either triangle has angle >90 along the shared edge. in these
- # the midpoint lies _outside_ the shared edge, so neighboring triangles
- # would need to be taken into account. that said, this occurs in only a
- # minority of cases and the difference tends to be in the ~0.001 mm range
- return indirect_edges, weights
-
-
-def make_surf_graph(vertices, faces, mask=None):
- """
- Construct adjacency graph from `surf`.
-
- Parameters
- ----------
- vertices : (N, 3) array_like
- Coordinates of `vertices` comprising mesh with `faces`
- faces : (F, 3) array_like
- Indices of `vertices` that compose triangular faces of mesh
- mask : (N,) array_like, optional (default None)
- Boolean mask indicating which vertices should be removed from generated
- graph. If not supplied, all vertices are used.
-
- Returns
- -------
- graph : scipy.sparse.csr_matrix
- Sparse matrix representing graph of `vertices` and `faces`
-
- Raises
- ------
- ValueError : inconsistent number of vertices in `mask` and `vertices`
- """
- if mask is not None and len(mask) != len(vertices):
- raise ValueError('Supplied `mask` array has different number of '
- 'vertices than supplied `vertices`.')
-
- # get all (direct + indirect) edges from surface
- direct_edges, direct_weights = get_direct_edges(vertices, faces)
- indirect_edges, indirect_weights = get_indirect_edges(vertices, faces)
- edges = np.vstack((direct_edges, indirect_edges))
- weights = np.hstack((direct_weights, indirect_weights))
-
- # remove edges that include a vertex in `mask`
- if mask is not None:
- idx, = np.where(mask)
- mask = ~np.any(np.isin(edges, idx), axis=1)
- edges, weights = edges[mask], weights[mask]
-
- # construct our graph on which to calculate shortest paths
- return sparse.csr_matrix((np.squeeze(weights), (edges[:, 0], edges[:, 1])),
- shape=(len(vertices), len(vertices)))
diff --git a/netneurotools/tests/test_civet.py b/netneurotools/tests/test_civet.py
deleted file mode 100644
index 9a6a1ab..0000000
--- a/netneurotools/tests/test_civet.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# -*- coding: utf-8 -*-
-"""For testing netneurotools.civet functionality."""
-
-import numpy as np
-import pytest
-
-from netneurotools import civet, datasets
-
-
-@pytest.fixture(scope='module')
-def civet_surf(tmp_path_factory):
- tmpdir = str(tmp_path_factory.getbasetemp())
- return datasets.fetch_civet(data_dir=tmpdir, verbose=0)['mid']
-
-
-def test_read_civet(civet_surf):
- vertices, triangles = civet.read_civet(civet_surf.lh)
- assert len(vertices) == 40962
- assert len(triangles) == 81920
- assert np.all(triangles.max(axis=0) < vertices.shape[0])
-
-
-def test_civet_to_freesurfer():
- brainmap = np.random.rand(81924)
- out = civet.civet_to_freesurfer(brainmap)
- out2 = civet.civet_to_freesurfer(brainmap, method='linear')
- assert out.shape[0] == out2.shape[0] == 81924
-
- with pytest.raises(ValueError):
- civet.civet_to_freesurfer(np.random.rand(10))
diff --git a/netneurotools/tests/test_datasets.py b/netneurotools/tests/test_datasets.py
deleted file mode 100644
index 0a5af12..0000000
--- a/netneurotools/tests/test_datasets.py
+++ /dev/null
@@ -1,204 +0,0 @@
-# -*- coding: utf-8 -*-
-"""For testing netneurotools.datasets functionality."""
-
-import os
-
-import numpy as np
-import pytest
-
-from netneurotools import datasets
-from netneurotools.datasets import utils
-
-
-@pytest.mark.parametrize('corr, size, tol, seed', [
- (0.85, (1000,), 0.05, 1234),
- (0.85, (1000, 1000), 0.05, 1234),
- ([[1, 0.5, 0.3], [0.5, 1, 0], [0.3, 0, 1]], (1000,), 0.05, 1234)
-])
-def test_make_correlated_xy(corr, size, tol, seed):
- out = datasets.make_correlated_xy(corr=corr, size=size,
- tol=tol, seed=seed)
- # ensure output is expected shape
- assert out.shape[1:] == size
- assert len(out) == len(corr) if hasattr(corr, '__len__') else 2
-
- # check outputs are correlated within specified tolerance
- realcorr = np.corrcoef(out.reshape(len(out), -1))
- if len(realcorr) == 2 and not hasattr(corr, '__len__'):
- realcorr = realcorr[0, 1]
- assert np.all(np.abs(realcorr - corr) < tol)
-
- # check that seed generates reproducible values
- duplicate = datasets.make_correlated_xy(corr=corr, size=size,
- tol=tol, seed=seed)
- assert np.allclose(out, duplicate)
-
-
-@pytest.mark.parametrize('corr', [
- (1.5), (-1.5), # outside range of [-1, 1]
- ([0.85]), ([[0.5, 0.5, 0.5], [0.5, 0.5, 0.5]]), # not 2D / square array
- ([[0.85]]), ([[1, 0.5], [0.5, 0.5]]) # diagonal not equal to 1
-])
-def test_make_correlated_xy_errors(corr):
- with pytest.raises(ValueError):
- datasets.make_correlated_xy(corr)
-
-
-def test_fetch_conte69(tmpdir):
- conte = datasets.fetch_conte69(data_dir=tmpdir, verbose=0)
- assert all(hasattr(conte, k) for k in
- ['midthickness', 'inflated', 'vinflated', 'info'])
-
-
-def test_fetch_yerkes19(tmpdir):
- conte = datasets.fetch_yerkes19(data_dir=tmpdir, verbose=0)
- assert all(hasattr(conte, k) for k in
- ['midthickness', 'inflated', 'vinflated'])
-
-
-def test_fetch_pauli2018(tmpdir):
- pauli = datasets.fetch_pauli2018(data_dir=tmpdir, verbose=0)
- assert all(hasattr(pauli, k) and os.path.isfile(pauli[k]) for k in
- ['probabilistic', 'deterministic', 'info'])
-
-
-@pytest.mark.parametrize('version', [
- 'fsaverage', 'fsaverage3', 'fsaverage4', 'fsaverage5', 'fsaverage6'
-])
-def test_fetch_fsaverage(tmpdir, version):
- fsaverage = datasets.fetch_fsaverage(version=version, data_dir=tmpdir,
- verbose=0)
- assert all(hasattr(fsaverage, k)
- and len(fsaverage[k]) == 2
- and all(os.path.isfile(hemi)
- for hemi in fsaverage[k]) for k in
- ['orig', 'white', 'smoothwm', 'pial', 'inflated', 'sphere'])
-
-
-@pytest.mark.parametrize('version, expected', [
- ('MNI152NLin2009aSym', [1, 1, 1, 1, 1]),
- ('fsaverage', [2, 2, 2, 2, 2]),
- ('fsaverage5', [2, 2, 2, 2, 2]),
- ('fsaverage6', [2, 2, 2, 2, 2]),
- ('fslr32k', [2, 2, 2, 2, 2]),
- ('gcs', [2, 2, 2, 2, 6])
-])
-def test_fetch_cammoun2012(tmpdir, version, expected):
- keys = ['scale033', 'scale060', 'scale125', 'scale250', 'scale500']
- cammoun = datasets.fetch_cammoun2012(version, data_dir=tmpdir, verbose=0)
-
- # output has expected keys
- assert all(hasattr(cammoun, k) for k in keys)
- # and keys are expected lengths!
- for k, e in zip(keys, expected):
- out = getattr(cammoun, k)
- if isinstance(out, (tuple, list)):
- assert len(out) == e
- else:
- assert isinstance(out, str) and out.endswith('.nii.gz')
-
- if 'fsaverage' in version:
- with pytest.warns(DeprecationWarning):
- datasets.fetch_cammoun2012('surface', data_dir=tmpdir, verbose=0)
-
-
-@pytest.mark.parametrize('dataset, expected', [
- ('celegans', ['conn', 'dist', 'labels', 'ref']),
- ('drosophila', ['conn', 'coords', 'labels', 'networks', 'ref']),
- ('human_func_scale033', ['conn', 'coords', 'labels', 'ref']),
- ('human_func_scale060', ['conn', 'coords', 'labels', 'ref']),
- ('human_func_scale125', ['conn', 'coords', 'labels', 'ref']),
- ('human_func_scale250', ['conn', 'coords', 'labels', 'ref']),
- ('human_func_scale500', ['conn', 'coords', 'labels', 'ref']),
- ('human_struct_scale033', ['conn', 'coords', 'dist', 'labels', 'ref']),
- ('human_struct_scale060', ['conn', 'coords', 'dist', 'labels', 'ref']),
- ('human_struct_scale125', ['conn', 'coords', 'dist', 'labels', 'ref']),
- ('human_struct_scale250', ['conn', 'coords', 'dist', 'labels', 'ref']),
- ('human_struct_scale500', ['conn', 'coords', 'dist', 'labels', 'ref']),
- ('macaque_markov', ['conn', 'dist', 'labels', 'ref']),
- ('macaque_modha', ['conn', 'coords', 'dist', 'labels', 'ref']),
- ('mouse', ['acronyms', 'conn', 'coords', 'dist', 'labels', 'ref']),
- ('rat', ['conn', 'labels', 'ref']),
-])
-def test_fetch_connectome(tmpdir, dataset, expected):
- connectome = datasets.fetch_connectome(dataset, data_dir=tmpdir, verbose=0)
-
- for key in expected:
- assert (key in connectome)
- assert isinstance(connectome[key], str if key == 'ref' else np.ndarray)
-
-
-@pytest.mark.parametrize('version', [
- 'fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k'
-])
-def test_fetch_schaefer2018(tmpdir, version):
- keys = [
- '{}Parcels{}Networks'.format(p, n)
- for p in range(100, 1001, 100) for n in [7, 17]
- ]
- schaefer = datasets.fetch_schaefer2018(version, data_dir=tmpdir, verbose=0)
-
- if version == 'fslr32k':
- assert all(k in schaefer and os.path.isfile(schaefer[k]) for k in keys)
- else:
- assert all(k in schaefer
- and len(schaefer[k]) == 2
- and all(os.path.isfile(hemi) for hemi in schaefer[k])
- for k in keys)
-
-
-def test_fetch_hcp_standards(tmpdir):
- hcp = datasets.fetch_hcp_standards(data_dir=tmpdir, verbose=0)
- assert os.path.isdir(hcp)
-
-
-def test_fetch_mmpall(tmpdir):
- mmp = datasets.fetch_mmpall(data_dir=tmpdir, verbose=0)
- assert len(mmp) == 2
- assert all(os.path.isfile(hemi) for hemi in mmp)
- assert all(hasattr(mmp, attr) for attr in ('lh', 'rh'))
-
-
-def test_fetch_voneconomo(tmpdir):
- vek = datasets.fetch_voneconomo(data_dir=tmpdir, verbose=0)
- assert all(hasattr(vek, k) and len(vek[k]) == 2 for k in ['gcs', 'ctab'])
- assert isinstance(vek.get('info'), str)
-
-
-@pytest.mark.parametrize('dset, expected', [
- ('atl-cammoun2012', ['fsaverage', 'fsaverage5', 'fsaverage6', 'fslr32k',
- 'MNI152NLin2009aSym', 'gcs']),
- ('tpl-conte69', ['url', 'md5']),
- ('atl-pauli2018', ['url', 'md5', 'name']),
- ('tpl-fsaverage', ['fsaverage' + f for f in ['', '3', '4', '5', '6']]),
- ('atl-schaefer2018', ['fsaverage', 'fsaverage6', 'fsaverage6'])
-])
-def test_get_dataset_info(dset, expected):
- info = utils._get_dataset_info(dset)
- if isinstance(info, dict):
- assert all(k in info.keys() for k in expected)
- elif isinstance(info, list):
- for f in info:
- assert all(k in f.keys() for k in expected)
- else:
- assert False
-
- with pytest.raises(KeyError):
- utils._get_dataset_info('notvalid')
-
-
-@pytest.mark.parametrize('version', [
- 'v1', 'v2'
-])
-def test_fetch_civet(tmpdir, version):
- civet = datasets.fetch_civet(version=version, data_dir=tmpdir, verbose=0)
- for key in ('mid', 'white'):
- assert key in civet
- for hemi in ('lh', 'rh'):
- assert hasattr(civet[key], hemi)
- assert os.path.isfile(getattr(civet[key], hemi))
-
-
-def test_get_data_dir(tmpdir):
- data_dir = utils._get_data_dir(tmpdir)
- assert os.path.isdir(data_dir)
diff --git a/netneurotools/tests/test_freesurfer.py b/netneurotools/tests/test_freesurfer.py
deleted file mode 100644
index 53bf74e..0000000
--- a/netneurotools/tests/test_freesurfer.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# -*- coding: utf-8 -*-
-"""For testing netneurotools.freesurfer functionality."""
-
-import numpy as np
-import pytest
-
-from netneurotools import datasets, freesurfer
-
-
-@pytest.fixture(scope='module')
-def cammoun_surf(tmp_path_factory):
- tmpdir = str(tmp_path_factory.getbasetemp())
- return datasets.fetch_cammoun2012('fsaverage5', data_dir=tmpdir, verbose=0)
-
-
-@pytest.mark.parametrize('method', [
- 'average', 'surface', 'geodesic'
-])
-@pytest.mark.parametrize('scale, parcels, n_right', [
- ('scale033', 68, 34),
- ('scale060', 114, 57),
- ('scale125', 219, 108),
- ('scale250', 448, 223),
- ('scale500', 1000, 501),
-])
-def test_find_parcel_centroids(cammoun_surf, scale, parcels, n_right, method):
- lh, rh = cammoun_surf[scale]
-
- coords, hemi = freesurfer.find_parcel_centroids(lhannot=lh, rhannot=rh,
- method=method,
- version='fsaverage5')
- assert len(coords) == parcels
- assert len(hemi) == parcels
- assert np.sum(hemi) == n_right
-
-
-@pytest.mark.parametrize('scale, parcels', [
- ('scale033', 68),
- ('scale060', 114),
- ('scale125', 219),
- ('scale250', 448),
- ('scale500', 1000),
-])
-def test_project_reduce_vertices(cammoun_surf, scale, parcels):
- # these functions are partners and should be tested in concert.
- # we can test all the normal functionality and also ensure that "round
- # trips" work as expected
-
- # generate "parcellated" data
- data = np.random.rand(parcels)
- lh, rh = cammoun_surf[scale]
-
- # do we get the expected number of vertices in our projection?
- projected = freesurfer.parcels_to_vertices(data, rhannot=rh, lhannot=lh)
- assert len(projected) == 20484
-
- # does reduction return our input data, as expected?
- reduced = freesurfer.vertices_to_parcels(projected, rhannot=rh, lhannot=lh)
- assert np.allclose(data, reduced)
-
- # can we do this with multi-dimensional data, too?
- data = np.random.rand(parcels, 2)
- projected = freesurfer.parcels_to_vertices(data, rhannot=rh, lhannot=lh)
- assert projected.shape == (20484, 2)
- reduced = freesurfer.vertices_to_parcels(projected, rhannot=rh, lhannot=lh)
- assert np.allclose(data, reduced)
-
- # what about int arrays as input?
- data = np.random.choice(10, size=parcels)
- projected = freesurfer.parcels_to_vertices(data, rhannot=rh, lhannot=lh)
- reduced = freesurfer.vertices_to_parcels(projected, rhannot=rh, lhannot=lh)
- assert np.allclose(reduced, data)
-
- # number of parcels != annotation spec
- with pytest.raises(ValueError):
- freesurfer.parcels_to_vertices(np.random.rand(parcels + 1),
- rhannot=rh, lhannot=lh)
-
- # number of vertices != annotation spec
- with pytest.raises(ValueError):
- freesurfer.vertices_to_parcels(np.random.rand(20485),
- rhannot=rh, lhannot=lh)
diff --git a/netneurotools/tests/test_modularity.py b/netneurotools/tests/test_modularity.py
deleted file mode 100644
index 4018ce0..0000000
--- a/netneurotools/tests/test_modularity.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# -*- coding: utf-8 -*-
-"""For testing netneurotools.modularity functionality."""
-
-import numpy as np
-
-from netneurotools import modularity
-
-rs = np.random.RandomState(1234)
-
-
-def test_dummyvar():
- # generate small example dummy variable code
- out = modularity._dummyvar(np.array([1, 1, 2, 3, 3]))
- assert np.all(out == np.array([[1, 0, 0],
- [1, 0, 0],
- [0, 1, 0],
- [0, 0, 1],
- [0, 0, 1]]))
-
- allones = np.array([1, 1, 1, 1, 1, 1, 1, 1])
- assert np.all(modularity._dummyvar(allones) == allones)
-
-
-def test_zrand():
- # make the same two-group community assignments (with different labels)
- label = np.ones((100, 1))
- X, Y = np.vstack((label, label * 2)), np.vstack((label * 2, label))
- # compare
- assert modularity.zrand(X, Y) == modularity.zrand(X, Y[::-1])
- random = rs.choice([0, 1], size=X.shape)
- assert modularity.zrand(X, Y) > modularity.zrand(X, random)
- assert modularity.zrand(X, Y) == modularity.zrand(X[:, 0], Y[:, 0])
-
-
-def test_zrand_partitions():
- # make random communities
- comm = rs.choice(range(6), size=(10, 100))
- all_diff = modularity._zrand_partitions(comm)
- all_same = modularity._zrand_partitions(np.repeat(comm[:, [0]], 10, axis=1))
-
- # partition of labels that are all the same should have higher average
- # zrand and lower stdev zrand
- assert np.nanmean(all_same) > np.nanmean(all_diff)
- assert np.nanstd(all_same) < np.nanstd(all_diff)
diff --git a/netneurotools/tests/test_plotting.py b/netneurotools/tests/test_plotting.py
deleted file mode 100644
index de35811..0000000
--- a/netneurotools/tests/test_plotting.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# -*- coding: utf-8 -*-
-"""For testing netneurotools.plotting functionality."""
-
-import matplotlib.pyplot as plt
-import numpy as np
-
-from netneurotools import datasets, plotting
-import pytest
-
-
-def test_grid_communities():
- comms = np.asarray([0, 0, 0, 0, 1, 1, 1, 1, 2, 2])
- # check that comms with / without 0 community label yields same output
- assert np.allclose(plotting._grid_communities(comms), [0, 4, 8, 10])
- assert np.allclose(plotting._grid_communities(comms + 1), [0, 4, 8, 10])
-
-
-def test_sort_communities():
- data = np.arange(9).reshape(3, 3)
- comms = np.asarray([0, 0, 2])
- # check that comms with / without 0 community label yields same output
- assert np.allclose(plotting.sort_communities(data, comms), [1, 0, 2])
- assert np.allclose(plotting.sort_communities(data, comms + 1), [1, 0, 2])
-
-
-def test_plot_mod_heatmap():
- data = np.random.rand(100, 100)
- comms = np.random.choice(4, size=(100,))
- ax = plotting.plot_mod_heatmap(data, comms)
- assert isinstance(ax, plt.Axes)
-
-
-@pytest.mark.filterwarnings('ignore')
-def test_plot_fsvertex():
- surfer = pytest.importorskip('surfer')
-
- data = np.random.rand(20484)
- brain = plotting.plot_fsvertex(data, subject_id='fsaverage5',
- offscreen=True)
- assert isinstance(brain, surfer.Brain)
-
-
-@pytest.mark.filterwarnings('ignore')
-def test_plot_fsaverage():
- surfer = pytest.importorskip('surfer')
-
- data = np.random.rand(68)
- lhannot, rhannot = datasets.fetch_cammoun2012('fsaverage5')['scale033']
- brain = plotting.plot_fsaverage(data, lhannot=lhannot, rhannot=rhannot,
- subject_id='fsaverage5', offscreen=True)
- assert isinstance(brain, surfer.Brain)
-
-
-def test_plot_point_brain():
- data = np.random.rand(100)
- coords = np.random.rand(100, 3)
- out = plotting.plot_point_brain(data, coords)
- assert isinstance(out, plt.Figure)
diff --git a/netneurotools/tests/test_stats.py b/netneurotools/tests/test_stats.py
deleted file mode 100644
index 8730bfc..0000000
--- a/netneurotools/tests/test_stats.py
+++ /dev/null
@@ -1,172 +0,0 @@
-# -*- coding: utf-8 -*-
-"""For testing netneurotools.stats functionality."""
-
-import itertools
-import numpy as np
-import pytest
-
-from netneurotools import datasets, stats
-
-
-@pytest.mark.xfail
-def test_permtest_1samp():
- assert False
- # n1, n2, n3 = 10, 15, 20
- # rs = np.random.RandomState(1234)
- # rvn1 = rs.normal(loc=8, scale=10, size=(n1, n2, n3))
-
- # t1, p1 = stats.permtest_1samp(rvn1, 1, axis=0)
-
-
-def test_permtest_rel():
- dr, pr = -0.0005, 0.4175824175824176
- dpr = ([dr, -dr], [pr, pr])
-
- rvs1 = np.linspace(1, 100, 100)
- rvs2 = np.linspace(1.01, 99.989, 100)
- rvs1_2D = np.array([rvs1, rvs2])
- rvs2_2D = np.array([rvs2, rvs1])
-
- # the p-values in these two cases should be consistent
- d, p = stats.permtest_rel(rvs1, rvs2, axis=0, seed=1234)
- assert np.allclose([d, p], (dr, pr))
- d, p = stats.permtest_rel(rvs1_2D.T, rvs2_2D.T, axis=0, seed=1234)
- assert np.allclose([d, p], dpr)
-
- # but the p-value will differ here because of _how_ we're drawing the
- # random permutations... it would be nice if this was consistent, but as
- # yet i don't have a great idea on how to make that happen without assuming
- # a whole lot about the data
- pr = 0.51248751
- tpr = ([dr, -dr], [pr, pr])
- d, p = stats.permtest_rel(rvs1_2D, rvs2_2D, axis=1, seed=1234)
- assert np.allclose([d, p], tpr)
-
-
-def test_permtest_pearsonr():
- np.random.seed(12345678)
- x, y = datasets.make_correlated_xy(corr=0.1, size=100)
- r, p = stats.permtest_pearsonr(x, y)
- assert np.allclose([r, p], [0.10032564626876286, 0.3046953046953047])
-
- x, y = datasets.make_correlated_xy(corr=0.5, size=100)
- r, p = stats.permtest_pearsonr(x, y)
- assert np.allclose([r, p], [0.500040365781984, 0.000999000999000999])
-
- z = x + np.random.normal(loc=1, size=100)
- r, p = stats.permtest_pearsonr(x, np.column_stack([y, z]))
- assert np.allclose(r, np.array([0.50004037, 0.25843187]))
- assert np.allclose(p, np.array([0.000999, 0.01098901]))
-
- a, b = datasets.make_correlated_xy(corr=0.9, size=100)
- r, p = stats.permtest_pearsonr(np.column_stack([x, a]),
- np.column_stack([y, b]))
- assert np.allclose(r, np.array([0.50004037, 0.89927523]))
- assert np.allclose(p, np.array([0.000999, 0.000999]))
-
-
-@pytest.mark.parametrize('x, y, expected', [
- # basic one-dimensional input
- (range(5), range(5), (1.0, 0.0)),
- # broadcasting occurs regardless of input order
- (np.stack([range(5), range(5, 0, -1)], 1), range(5),
- ([1.0, -1.0], [0.0, 0.0])),
- (range(5), np.stack([range(5), range(5, 0, -1)], 1),
- ([1.0, -1.0], [0.0, 0.0])),
- # correlation between matching columns
- (np.stack([range(5), range(5, 0, -1)], 1),
- np.stack([range(5), range(5, 0, -1)], 1),
- ([1.0, 1.0], [0.0, 0.0]))
-])
-def test_efficient_pearsonr(x, y, expected):
- assert np.allclose(stats.efficient_pearsonr(x, y), expected)
-
-
-def test_efficient_pearsonr_errors():
- with pytest.raises(ValueError):
- stats.efficient_pearsonr(range(4), range(5))
-
- assert all(np.isnan(a) for a in stats.efficient_pearsonr([], []))
-
-
-def test_gen_rotation():
- # make a few rotations (some same / different)
- rout1, lout1 = stats._gen_rotation(seed=1234)
- rout2, lout2 = stats._gen_rotation(seed=1234)
- rout3, lout3 = stats._gen_rotation(seed=5678)
-
- # confirm consistency with the same seed
- assert np.allclose(rout1, rout2) and np.allclose(lout1, lout2)
-
- # confirm inconsistency with different seeds
- assert not np.allclose(rout1, rout3) and not np.allclose(lout1, lout3)
-
- # confirm reflection across L/R hemispheres as expected
- # also confirm min/max never exceeds -1/1
- reflected = np.array([[1, -1, -1], [-1, 1, 1], [-1, 1, 1]])
- for r, l in zip([rout1, rout3], [lout1, lout3]): # noqa: E741
- assert np.allclose(r / l, reflected)
- assert r.max() < 1 and r.min() > -1 and l.max() < 1 and l.min() > -1
-
-
-def _get_sphere_coords(s, t, r=1):
- """Get coordinates at angles `s` and `t` a sphere of radius `r`."""
- # convert to radians
- rad = np.pi / 180
- s, t = s * rad, t * rad
-
- # calculate new points
- x = r * np.cos(s) * np.sin(t)
- y = r * np.sin(s) * np.cos(t)
- z = r * np.cos(t)
-
- return x, y, z
-
-
-def test_gen_spinsamples():
- # grab a few points from a spherical surface and duplicate it for the
- # "other hemisphere"
- coords = [_get_sphere_coords(s, t, r=1) for s, t in
- itertools.product(range(0, 360, 45), range(0, 360, 45))]
- coords = np.vstack([coords, coords])
- hemi = np.hstack([np.zeros(len(coords) // 2), np.ones(len(coords) // 2)])
-
- # generate "normal" test spins
- spins, cost = stats.gen_spinsamples(coords, hemi, n_rotate=10, seed=1234,
- return_cost=True)
- assert spins.shape == spins.shape == (len(coords), 10)
-
- # confirm that `method` parameter functions as desired
- for method in ['vasa', 'hungarian']:
- spin_exact, cost_exact = stats.gen_spinsamples(coords, hemi,
- n_rotate=10, seed=1234,
- method=method,
- return_cost=True)
- assert spin_exact.shape == cost.shape == (len(coords), 10)
- for s in spin_exact.T:
- assert len(np.unique(s)) == len(s)
-
- # check that one hemisphere works
- mask = hemi == 0
- spins, cost = stats.gen_spinsamples(coords[mask], hemi[mask], n_rotate=10,
- seed=1234, return_cost=True)
- assert spins.shape == cost.shape == (len(coords[mask]), 10)
-
- # confirm that check_duplicates will raise warnings
- # since spins aren't exact permutations we need to use 4C4 with repeats
- # and then perform one more rotation than that number (i.e., 35 + 1)
- with pytest.warns(UserWarning):
- i = [0, 1, -2, -1] # only grab a few coordinates
- stats.gen_spinsamples(coords[i], hemi[i], n_rotate=36, seed=1234)
-
- # non-3D coords
- with pytest.raises(ValueError):
- stats.gen_spinsamples(coords[:, :2], hemi)
-
- # non-1D hemi
- with pytest.raises(ValueError):
- stats.gen_spinsamples(coords, np.column_stack([hemi, hemi]))
-
- # different length coords and hemi
- with pytest.raises(ValueError):
- stats.gen_spinsamples(coords, hemi[:-1])
diff --git a/netneurotools/tests/test_utils.py b/netneurotools/tests/test_utils.py
deleted file mode 100644
index 1ac6b91..0000000
--- a/netneurotools/tests/test_utils.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# -*- coding: utf-8 -*-
-"""For testing netneurotools.utils functionality."""
-
-import numpy as np
-import pytest
-
-from netneurotools import datasets, utils
-
-
-def test_add_constant():
- # if provided a vector it will return a 2D array
- assert utils.add_constant(np.random.rand(100)).shape == (100, 2)
-
- # if provided a 2D array it will return the same, extended by 1 column
- out = utils.add_constant(np.random.rand(100, 100))
- assert out.shape == (100, 101) and np.all(out[:, -1] == 1)
-
-
-def test_add_triu():
- arr = np.arange(9).reshape(3, 3)
- assert np.all(utils.get_triu(arr) == np.array([1, 2, 5]))
- assert np.all(utils.get_triu(arr, k=0) == np.array([0, 1, 2, 4, 5, 8]))
-
-
-@pytest.mark.parametrize('scale, expected', [
- ('scale033', 83),
- ('scale060', 129),
- ('scale125', 234),
- ('scale250', 463),
- ('scale500', 1015)
-])
-def test_get_centroids(tmpdir, scale, expected):
- # fetch test dataset
- cammoun = datasets.fetch_cammoun2012('MNI152NLin2009aSym', data_dir=tmpdir,
- verbose=0)
-
- ijk = utils.get_centroids(cammoun[scale])
- xyz = utils.get_centroids(cammoun[scale], image_space=True)
-
- # we get expected shape regardless of requested coordinate space
- assert ijk.shape == xyz.shape == (expected, 3)
- # ijk is all positive (i.e., cartesian) coordinates
- assert np.all(ijk > 0)
-
- # requesting specific labels gives us a subset of the full `ijk`
- lim = utils.get_centroids(cammoun[scale], labels=[1, 2, 3])
- assert np.all(lim == ijk[:3])
diff --git a/netneurotools/utils.py b/netneurotools/utils.py
deleted file mode 100644
index 2d8839e..0000000
--- a/netneurotools/utils.py
+++ /dev/null
@@ -1,243 +0,0 @@
-# -*- coding: utf-8 -*-
-"""Miscellaneous functions of various utility."""
-
-import glob
-import os
-import subprocess
-
-import nibabel as nib
-import numpy as np
-from scipy import ndimage
-from sklearn.utils.validation import check_array
-
-
-def add_constant(data):
- """
- Add a constant (i.e., intercept) term to `data`.
-
- Parameters
- ----------
- data : (N, M) array_like
- Samples by features data array
-
- Returns
- -------
- data : (N, F) np.ndarray
- Where `F` is `M + 1`
-
- Examples
- --------
- >>> from netneurotools import utils
-
- >>> A = np.zeros((5, 5))
- >>> Ac = utils.add_constant(A)
- >>> Ac
- array([[0., 0., 0., 0., 0., 1.],
- [0., 0., 0., 0., 0., 1.],
- [0., 0., 0., 0., 0., 1.],
- [0., 0., 0., 0., 0., 1.],
- [0., 0., 0., 0., 0., 1.]])
- """
- data = check_array(data, ensure_2d=False)
- return np.column_stack([data, np.ones(len(data))])
-
-
-def get_triu(data, k=1):
- """
- Return vectorized version of upper triangle from `data`.
-
- Parameters
- ----------
- data : (N, N) array_like
- Input data
- k : int, optional
- Which diagonal to select from (where primary diagonal is 0). Default: 1
-
- Returns
- -------
- triu : (N * N-1 / 2) numpy.ndarray
- Upper triangle of `data`
-
- Examples
- --------
- >>> from netneurotools import utils
-
- >>> X = np.array([[1, 0.5, 0.25], [0.5, 1, 0.33], [0.25, 0.33, 1]])
- >>> tri = utils.get_triu(X)
- >>> tri
- array([0.5 , 0.25, 0.33])
- """
- return data[np.triu_indices(len(data), k=k)].copy()
-
-
-def globpath(*args):
- """
- Join `args` with :py:func:`os.path.join` and returns sorted glob output.
-
- Parameters
- ----------
- args : str
- Paths / `glob`-compatible regex strings
-
- Returns
- -------
- files : list
- Sorted list of files
- """
- return sorted(glob.glob(os.path.join(*args)))
-
-
-def rescale(data, low=0, high=1):
- """
- Rescale `data` so it is within [`low`, `high`].
-
- Parameters
- ----------
- data : array_like
- Input data array
- low : float, optional
- Lower bound for rescaling. Default: -1
- high : float, optional
- Upper bound for rescaling. Default: 1
-
- Returns
- -------
- rescaled : np.ndarray
- Rescaled data
- """
- data = np.asarray(data)
- rescaled = np.interp(data, (data.min(), data.max()), (low, high))
-
- return rescaled
-
-
-def run(cmd, env=None, return_proc=False, quiet=False):
- """
- Run `cmd` via shell subprocess with provided environment `env`.
-
- Parameters
- ----------
- cmd : str
- Command to be run as single string
- env : dict, optional
- If provided, dictionary of key-value pairs to be added to base
- environment when running `cmd`. Default: None
- return_proc : bool, optional
- Whether to return CompletedProcess object. Default: false
- quiet : bool, optional
- Whether to suppress stdout/stderr from subprocess. Default: False
-
- Returns
- -------
- proc : subprocess.CompletedProcess
- Process output
-
- Raises
- ------
- subprocess.CalledProcessError
- If subprocess does not exit cleanly
-
- Examples
- --------
- >>> from netneurotools import utils
- >>> p = utils.run('echo "hello world"', return_proc=True, quiet=True)
- >>> p.returncode
- 0
- >>> p.stdout # doctest: +SKIP
- 'hello world\\n'
- """ # noqa: D301
- merged_env = os.environ.copy()
- if env is not None:
- if not isinstance(env, dict):
- raise TypeError('Provided `env` must be a dictionary, not {}'
- .format(type(env)))
- merged_env.update(env)
-
- opts = {}
- if quiet:
- opts = dict(stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-
- proc = subprocess.run(cmd, env=merged_env, shell=True, check=True,
- universal_newlines=True, **opts)
-
- if return_proc:
- return proc
-
-
-def check_fs_subjid(subject_id, subjects_dir=None):
- """
- Check that `subject_id` exists in provided FreeSurfer `subjects_dir`.
-
- Parameters
- ----------
- subject_id : str
- FreeSurfer subject ID
- subjects_dir : str, optional
- Path to FreeSurfer subject directory. If not set, will inherit from
- the environmental variable $SUBJECTS_DIR. Default: None
-
- Returns
- -------
- subject_id : str
- FreeSurfer subject ID, as provided
- subjects_dir : str
- Full filepath to `subjects_dir`
-
- Raises
- ------
- FileNotFoundError
- """
- # check inputs for subjects_dir and subject_id
- if subjects_dir is None or not os.path.isdir(subjects_dir):
- try:
- subjects_dir = os.environ['SUBJECTS_DIR']
- except KeyError:
- subjects_dir = os.getcwd()
- else:
- subjects_dir = os.path.abspath(subjects_dir)
-
- subjdir = os.path.join(subjects_dir, subject_id)
- if not os.path.isdir(subjdir):
- raise FileNotFoundError('Cannot find specified subject id {} in '
- 'provided subject directory {}.'
- .format(subject_id, subjects_dir))
-
- return subject_id, subjects_dir
-
-
-def get_centroids(img, labels=None, image_space=False):
- """
- Find centroids of `labels` in `img`.
-
- Parameters
- ----------
- img : niimg-like object
- 3D image containing integer label at each point
- labels : array_like, optional
- List of labels for which to find centroids. If not specified all
- labels present in `img` will be used. Zero will be ignored as it is
- considered "background." Default: None
- image_space : bool, optional
- Whether to return xyz (image space) coordinates for centroids based
- on transformation in `img.affine`. Default: False
-
- Returns
- -------
- centroids : (N, 3) np.ndarray
- Coordinates of centroids for ROIs in input data
- """
- from nilearn._utils import check_niimg_3d
-
- img = check_niimg_3d(img)
- data = np.asarray(img.dataobj)
-
- if labels is None:
- labels = np.trim_zeros(np.unique(data))
-
- centroids = np.vstack(ndimage.center_of_mass(data, labels=data,
- index=labels))
-
- if image_space:
- centroids = nib.affines.apply_affine(img.affine, centroids)
-
- return centroids
diff --git a/pyproject.toml b/pyproject.toml
index a1ab0c5..ad05ca1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,10 +37,15 @@ doc = [
"sphinx_rtd_theme",
"sphinx-gallery"
]
-plotting = [
+pysurfer = [
+ "vtk",
"mayavi",
"pysurfer"
]
+pyvista = [
+ "vtk",
+ "pyvista"
+]
numba = [
"numba"
]
@@ -61,12 +66,18 @@ test = [
requires = ["setuptools", "versioneer[toml]"]
build-backend = "setuptools.build_meta"
+[tool.setuptools]
+include-package-data = true
+
[tool.setuptools.packages.find]
include = [
"netneurotools",
"netneurotools.*"
]
+[tool.setuptools.package-data]
+"*" = ["*.json", "*.bib"]
+
[tool.setuptools.dynamic]
version = {attr = "netneurotools.__version__"}
@@ -79,12 +90,7 @@ tag_prefix = ""
parentdir_prefix = ""
[tool.ruff]
-select = ["E", "F", "B", "W", "D", "NPY"]
-ignore = [
- "B905", # zip() without an explicit strict= parameter
- # "W605", # Invalid escape sequence: latex
- "NPY002", # Replace legacy `np.random` call with `np.random.Generator`
-]
+
line-length = 88
exclude = [
"setup.py",
@@ -95,18 +101,26 @@ exclude = [
]
target-version = "py38"
-[tool.ruff.pydocstyle]
+[tool.ruff.lint]
+select = ["E", "F", "B", "W", "D", "NPY"]
+ignore = [
+ "B905", # zip() without an explicit strict= parameter
+ # "W605", # Invalid escape sequence: latex
+ "NPY002", # Replace legacy `np.random` call with `np.random.Generator`
+]
+preview = true
+
+[tool.ruff.lint.pydocstyle]
convention = "numpy"
-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["D104", "F401"]
-"netneurotools/tests/*" = ["B011", "D103"]
+"test_*" = ["B011"]
"examples/*" = ["E402", "D"]
[tool.coverage.run]
source = ["netneurotools"]
omit = [
- "netneurotools/tests/*",
"netneurotools/_version.py",
]
diff --git a/resources/generate_atl-cammoun2012_surface.py b/resources/generate_atl-cammoun2012_surface.py
index 686b518..6edc121 100755
--- a/resources/generate_atl-cammoun2012_surface.py
+++ b/resources/generate_atl-cammoun2012_surface.py
@@ -116,7 +116,7 @@ def combine_cammoun_500(lhannot, rhannot, subject_id, annot=None,
quiet=quiet)
# save ctab information from annotation file
- vtx, ct, names = nib.freesurfer.read_annot(fn)
+ _, ct, names = nib.freesurfer.read_annot(fn)
data = np.column_stack([[f.decode() for f in names], ct[:, :-1]])
ctab = ctab.append(pd.DataFrame(data), ignore_index=True)