From b874deaacf01c91ad54d8115b5e02ba5475ad88c Mon Sep 17 00:00:00 2001
From: Olivier Winter <olivier.winter@hotmail.fr>
Date: Tue, 23 Jan 2024 17:57:43 +0000
Subject: [PATCH] Cellatlas (#5)

* refactor get cache function to return higher level instead of `flatmaps` folder

* wip plenty of stuff

* make sure slice dimension are consistent regardless of the volume memory layout

* add the agea atlas backend with geometry

* bugfixes

* windows CI tests require memmap to be closed to remove tempdir

* review comments modifications
---
 .flake8                                       |  2 +-
 CHANGELOG.md                                  | 10 ++
 README.md                                     | 22 +++--
 ...nomics_load_allen_gene_expression_atlas.py | 14 +++
 iblatlas/__init__.py                          | 28 +++---
 iblatlas/atlas.py                             | 99 ++++++++++---------
 iblatlas/flatmaps.py                          | 18 ++--
 iblatlas/genes.py                             | 38 -------
 iblatlas/genomics/__init__.py                 |  0
 iblatlas/genomics/agea.py                     | 59 +++++++++++
 .../01-make_experiments_parquet.py            | 72 ++++++++++++++
 .../02-scrape-experiments.py                  | 49 +++++++++
 .../03-scrape-correlations.py                 |  9 ++
 .../04-read-experiments.py                    | 65 ++++++++++++
 .../05-generate-atlas-templates.py            | 84 ++++++++++++++++
 iblatlas/plots.py                             |  2 +-
 iblatlas/tests/test_atlas.py                  | 40 +++++---
 iblatlas/tests/test_genomics.py               | 40 ++++++++
 pyproject.toml                                |  3 +-
 19 files changed, 523 insertions(+), 131 deletions(-)
 create mode 100644 examples/atlas_genomics_load_allen_gene_expression_atlas.py
 delete mode 100644 iblatlas/genes.py
 create mode 100644 iblatlas/genomics/__init__.py
 create mode 100644 iblatlas/genomics/agea.py
 create mode 100644 iblatlas/genomics/gene_expression_scrapping/01-make_experiments_parquet.py
 create mode 100644 iblatlas/genomics/gene_expression_scrapping/02-scrape-experiments.py
 create mode 100644 iblatlas/genomics/gene_expression_scrapping/03-scrape-correlations.py
 create mode 100644 iblatlas/genomics/gene_expression_scrapping/04-read-experiments.py
 create mode 100644 iblatlas/genomics/gene_expression_scrapping/05-generate-atlas-templates.py
 create mode 100644 iblatlas/tests/test_genomics.py

diff --git a/.flake8 b/.flake8
index 3f09d70..50d87ec 100644
--- a/.flake8
+++ b/.flake8
@@ -1,6 +1,6 @@
 [flake8]
 max-line-length = 130
-ignore = W504, W503, E266
+ignore = W504, W503, E266, D, BLK
 exclude =
     .git,
     __pycache__,
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ebd49a7..4661b00 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,13 @@
+## [1.0.0]
+
+### Added
+- `iblatlas.genomics` module for working with genomics data from Allen contains
+  - the Allen gene expression atlas in the `iblatlas.genomics.agea` module
+  - the Allen cell types atlas in the `iblatlas.genomics.merfish` module
+### Modified
+- slices of the atlas are now always returned with consistent sizes regardless of the volume layout on disk
+- atlases now can have an extra dimension in the image volume, to allow for multiple layers
+
 ## [0.3.0]
 ### Modified
 - Insertion._get_surface_intersection: can return None if and when mode is not set to raise and insertion is not intersecting with any surface
diff --git a/README.md b/README.md
index 0cc59d5..fa5fd6c 100644
--- a/README.md
+++ b/README.md
@@ -1,20 +1,24 @@
 # iblatlas
-This repository contains tools to manipulate 3D representations of the mouse brain anatomy for electrophysiolgy experiments.
-The tools are mainly using the the Allen Brain Atlas although some other atlases can be used and are written in Python.
+Tools to manipulate hierarchical 3D representations of the mouse brain anatomy for electrophysiolgy experiments.
+The tools are mainly using the the Allen CCF although some other atlases can be used.
 
-One of the requirment of this repository is to use minimal requirements, based of standard matplotlib, numpy and scipy libraries, and excludes visualisation tool.
+**This repository uses minimal requirements, based of standard `matplotlib`, `numpy` and `scipy` libraries, to the exclusion
+of more complex visualization tools such as `pyqt`.**
 
-The documentation can be found here: https://int-brain-lab.github.io/iblenv/notebooks_external/atlas_working_with_ibllib_atlas.html
+## Documentation
+[Full package documentation](https://int-brain-lab.github.io/iblenv/_autosummary/iblatlas.html#module-iblatlas)
+
+[Notebooks and examples](https://int-brain-lab.github.io/iblenv/notebooks_external/atlas_working_with_ibllib_atlas.html)
 
 ## Installation
 `pip install iblatlas`
 
-
 ## Contributing
 Changes are merged by pull requests.
 Release checklist:
-- [ ] Update version in `iblatlas/__init__.py`
-- [ ] Update `CHANGELOG.md`
-- [ ] Create a pull request to the `main` branch on GitHub
+- [x] Update version in `iblatlas/__init__.py`
+- [x] Update `CHANGELOG.md`
+- [x] Create a pull request to the `main` branch on GitHub
+- [x] Once the PR is merged, create a new tag and push the tag
 
-Once merged the package is uploaded to PyPI using GitHub Actions.
+Once a tag is pushed on main the package is uploaded to PyPI using GitHub Actions.
diff --git a/examples/atlas_genomics_load_allen_gene_expression_atlas.py b/examples/atlas_genomics_load_allen_gene_expression_atlas.py
new file mode 100644
index 0000000..fcff9a0
--- /dev/null
+++ b/examples/atlas_genomics_load_allen_gene_expression_atlas.py
@@ -0,0 +1,14 @@
+import matplotlib.pyplot as plt
+from iblatlas.genomics import agea
+
+df_genes, gene_expression_volumes, atlas_agea = agea.load()
+
+igenes = (0,)
+fig, axs = plt.subplots(3, 2, sharex=True, sharey=True)
+atlas_agea.plot_cslice(0, ax=axs[0, 0])
+atlas_agea.plot_cslice(0, ax=axs[1, 0], volume='annotation')
+atlas_agea.plot_cslice(0, ax=axs[2, 0], volume=gene_expression_volumes[igenes[0]], cmap='viridis')
+atlas_agea.plot_sslice(0, ax=axs[0, 1])
+atlas_agea.plot_sslice(0, ax=axs[1, 1], volume='annotation')
+atlas_agea.plot_sslice(0, ax=axs[2, 1], volume=gene_expression_volumes[igenes[0]], cmap='viridis')
+fig.tight_layout()
diff --git a/iblatlas/__init__.py b/iblatlas/__init__.py
index b522ae2..098b670 100644
--- a/iblatlas/__init__.py
+++ b/iblatlas/__init__.py
@@ -3,8 +3,7 @@
 For examples and tutorials on using the IBL atlas package, see
 https://docs.internationalbrainlab.org/atlas_examples.html
 
-.. TODO Explain differences between this package and the Allen SDK.
-Much of this was adapted from the `cortexlab allenCCF repository <https://github.com/cortex-lab/allenCCF>`_.
+
 
 Terminology
 -----------
@@ -61,11 +60,15 @@
 bregma and lambda are at the same DV height) [6]_, however this is not currently accounted for.
 
 
-Mappings
---------
-In addition to the atlases there are also multiple brain region mappings that serve one of two purposes: 1. control the
-granularity particular brain regions; 2. support differing anatomical sub-devisions or nomenclature.  The two Allen atlas mappings
-below were created somewhat arbirarily by Nick Steinmetz to aid in analysis:
+Brain Regions Mappings
+----------------------
+The Allen CCF ontology regroups brain regions in a hierarchical manner, with up to 9 levels of parcellation. The
+labels volumes provided by the Allen Institute are at the finest level of parcellation, ie. leaf nodes.
+However it is often useful to regroup regions at higher levels of the hierarchy for aggregation purposes. The mappings
+represent the correspondence between the leaf nodes and their grouping for given analysis purposes. This is useful for:
+ 1. control the granularity particular brain regions;
+ 2. support differing anatomical sub-devisions or nomenclature.
+The two Allen atlas mappings below were created by Nick Steinmetz to aid in neural analysis:
 
 1. Beryl - brain atlas annotations without layer sub-divisions or certain ganglial/nucleus sub-devisisions (e.g. the core/shell
    sub-division of the lateral geniculate nucleus). Fibre tracts, pia, etc. are also absent.  The choice of which areas to combine
@@ -90,7 +93,7 @@
 Notes
 -----
 The Allen atlas and the CCF annotations have different release dates and versions [10]_. The annotations used by IBL are the 2017
-version.
+version (CCFv2).
 
 The IBL uses the following conventions:
 
@@ -107,7 +110,7 @@
 
 Examples
 --------
-Below are some breif API examples. For in depth tutorials on using the IBL atlas package, see
+Below are some brief API examples. For in depth tutorials on using the IBL atlas package, see
 https://docs.internationalbrainlab.org/atlas_examples.html.
 
 Find bregma position in indices * resolution in um
@@ -138,7 +141,7 @@
   `Mappings`_.
 * **swanson_regions.npy** - A 1D array of length 323 containing the Allen CCF brain region IDs
 * **mappings.pqt** - A table of mappings.  Each column defines a mapping, with the '-lr' suffix indicating a lateralized version.
-  The rows contain the correspondence of each mapping to the int64 index of the lateralized Allen structure tree.  The table is
+  The rows contain the correspondence of each mapping to the int64 index of the lateralized Allen structure tree. The table is
   generated by iblatlas.regions.BrainRegions._compute_mappings.
 
 Remote files
@@ -159,8 +162,6 @@
   region were then simplified using the `Ramer Douglas Peucker algorithm <https://rdp.readthedocs.io/en/latest/>`_
 * **swanson2allen.npz** - TODO Document who made this, its contents, purpose and data type
 * **<flatmap_name>_<res_um>.nrrd** - TODO Document who made this, its contents, purpose and data type
-* **gene-expression.pqt** - TODO Document who made this, its contents, purpose and data type
-* **gene-expression.bin** - TODO Document who made this, its contents, purpose and data type.
 
 .. [*] The annotation and average template volumes were created from the images provided in Supplemtary Data 4 of Chon et al. [3]_
    and stitched together as a single volume using SimpleITK.
@@ -192,6 +193,5 @@
    ontology and flatmaps. J Comp Neurol. [doi 10.1002/cne.24381]
 .. [10] Allen Mouse Common Coordinate Framework Technical White Paper (October 2017 v3)
    http://help.brain-map.org/download/attachments/8323525/Mouse_Common_Coordinate_Framework.pdf
-
 """
-__version__ = '0.3.0'
\ No newline at end of file
+__version__ = '0.4.0'
diff --git a/iblatlas/atlas.py b/iblatlas/atlas.py
index 2828c31..b2f1a93 100644
--- a/iblatlas/atlas.py
+++ b/iblatlas/atlas.py
@@ -148,19 +148,6 @@ def nxyz(self):
         """numpy.array: Coordinates of the element volume[0, 0, 0] in the coordinate space."""
         return np.array([self.nx, self.ny, self.nz])
 
-    """Methods ratios to indices"""
-    def r2ix(self, r):
-        # FIXME Document
-        return int((self.nx - 1) * r)
-
-    def r2iy(self, r):
-        # FIXME Document
-        return int((self.nz - 1) * r)
-
-    def r2iz(self, r):
-        # FIXME Document
-        return int((self.nz - 1) * r)
-
     """Methods distance to indices"""
     @staticmethod
     def _round(i, round=True):
@@ -459,10 +446,13 @@ class BrainAtlas:
     yet this class can be used for other atlases arises.
     """
 
-    """numpy.array: An image volume."""
+    """numpy.array: A 3D image volume."""
     image = None
-    """numpy.array: An annotation label volume."""
+    """numpy.array: A 3D annotation label volume."""
     label = None
+    """numpy.array: One or several optional data volumes, the 3 last dimensions should match
+    the image and the label volumes dimensions"""
+    volumes = None
 
     def __init__(self, image, label, dxyz, regions, iorigin=[0, 0, 0],
                  dims2xyz=[0, 1, 2], xyz2dims=[0, 1, 2]):
@@ -478,8 +468,8 @@ def __init__(self, image, label, dxyz, regions, iorigin=[0, 0, 0],
         self.image = image
         self.label = label
         self.regions = regions
-        self.dims2xyz = dims2xyz
-        self.xyz2dims = xyz2dims
+        self.dims2xyz = np.array(dims2xyz)
+        self.xyz2dims = np.array(xyz2dims)
         assert np.all(self.dims2xyz[self.xyz2dims] == np.array([0, 1, 2]))
         assert np.all(self.xyz2dims[self.dims2xyz] == np.array([0, 1, 2]))
         # create the coordinate transform object that maps volume indices to real world coordinates
@@ -492,8 +482,13 @@ def __init__(self, image, label, dxyz, regions, iorigin=[0, 0, 0],
 
     @staticmethod
     def _get_cache_dir():
+        """
+        ./histology/ATLAS/Needles/Allen
+        Where . is the main ONE cache directory
+        :return: pathlib.Path
+        """
         par = one.params.get(silent=True)
-        path_atlas = Path(par.CACHE_DIR).joinpath('histology', 'ATLAS', 'Needles', 'Allen', 'flatmaps')
+        path_atlas = Path(par.CACHE_DIR).joinpath('histology', 'ATLAS', 'Needles', 'Allen')
         return path_atlas
 
     def mask(self):
@@ -729,7 +724,7 @@ def _plot_slice(im, extent, ax=None, cmap=None, volume=None, **kwargs):
         cmap : str, matplotlib.colors.Colormap
             The Colormap instance or registered colormap name used to map scalar data to colors.
             Defaults to 'bone'.
-        volume : str
+        volume : str | np.array
             If 'boundary', assumes image is an outline of boundaries between all regions.
             FIXME How does this affect the plot?
         **kwargs
@@ -746,10 +741,11 @@ def _plot_slice(im, extent, ax=None, cmap=None, volume=None, **kwargs):
         if not cmap:
             cmap = plt.get_cmap('bone')
 
-        if volume == 'boundary':
-            imb = np.zeros((*im.shape[:2], 4), dtype=np.uint8)
-            imb[im == 1] = np.array([0, 0, 0, 255])
-            im = imb
+        if isinstance(volume, str):
+            if volume == 'boundary':
+                imb = np.zeros((*im.shape[:2], 4), dtype=np.uint8)
+                imb[im == 1] = np.array([0, 0, 0, 255])
+                im = imb
 
         ax.imshow(im, extent=extent, cmap=cmap, **kwargs)
         return ax
@@ -795,7 +791,10 @@ def slice(self, coordinate, axis, volume='image', mode='raise', region_values=No
             - if volume='volume', region_values must have shape ba.image.shape
             - if volume='value', region_values must have shape ba.regions.id
         :param mapping: mapping to use. Options can be found using ba.regions.mappings.keys()
-        :return: 2d array or 3d RGB numpy int8 array
+        :return: 2d array or 3d RGB numpy int8 array of dimensions:
+            - 0: nap x ndv (sagittal slice)
+            - 1: nml x ndv (coronal slice)
+            - 2: nap x nml (horizontal slice)
         """
         if axis == 0:
             index = self.bc.x2i(np.array(coordinate), mode=mode)
@@ -804,43 +803,53 @@ def slice(self, coordinate, axis, volume='image', mode='raise', region_values=No
         elif axis == 2:
             index = self.bc.z2i(np.array(coordinate), mode=mode)
 
-        # np.take is 50 thousand times slower than straight slicing !
         def _take(vol, ind, axis):
+            """
+            This is a 2 steps process to get the slice along the correct axis
+            1) slice the volume according to the mapped axis corresponding to the sclice
+            we do this because np.take is 50 thousand times slower than straight slicing !
+            2) reshape the output array according to the slice specifications
+            """
+            volume_axis = self.xyz2dims[axis]
             if mode == 'clip':
-                ind = np.minimum(np.maximum(ind, 0), vol.shape[axis] - 1)
-            if axis == 0:
-                return vol[ind, :, :]
-            elif axis == 1:
-                return vol[:, ind, :]
-            elif axis == 2:
-                return vol[:, :, ind]
+                ind = np.minimum(np.maximum(ind, 0), vol.shape[volume_axis] - 1)
+            if volume_axis == 0:
+                slic = vol[ind, :, :]
+            elif volume_axis == 1:
+                slic = vol[:, ind, :]
+            elif volume_axis == 2:
+                slic = vol[:, :, ind]
+            output_sizes = [[1, 2], [0, 2], [1, 0]]  # we expect those sizes where index is the axis
+            if np.diff(self.xyz2dims[output_sizes[axis]])[0] < 0:
+                slic = slic.transpose().copy()
+            return slic
 
         def _take_remap(vol, ind, axis, mapping):
             # For the labels, remap the regions indices according to the mapping
             return self._get_mapping(mapping=mapping)[_take(vol, ind, axis)]
 
         if isinstance(volume, np.ndarray):
-            return _take(volume, index, axis=self.xyz2dims[axis])
+            return _take(volume, index, axis=axis)
         elif volume in 'annotation':
-            iregion = _take_remap(self.label, index, self.xyz2dims[axis], mapping)
+            iregion = _take_remap(self.label, index, axis=axis, mapping=mapping)
             return self._label2rgb(iregion)
         elif volume == 'image':
-            return _take(self.image, index, axis=self.xyz2dims[axis])
+            return _take(self.image, index, axis=axis)
         elif volume == 'value':
-            return region_values[_take_remap(self.label, index, self.xyz2dims[axis], mapping)]
+            return region_values[_take_remap(self.label, index, axis, mapping)]
         elif volume == 'image':
-            return _take(self.image, index, axis=self.xyz2dims[axis])
+            return _take(self.image, index, axis=axis)
         elif volume in ['surface', 'edges']:
             self.compute_surface()
-            return _take(self.surface, index, axis=self.xyz2dims[axis])
+            return _take(self.surface, index, axis=axis)
         elif volume == 'boundary':
-            iregion = _take_remap(self.label, index, self.xyz2dims[axis], mapping)
+            iregion = _take_remap(self.label, index, axis, mapping)
             return self.compute_boundaries(iregion)
 
         elif volume == 'volume':
             if bc is not None:
                 index = bc.xyz2i(np.array([coordinate] * 3))[axis]
-            return _take(region_values, index, axis=self.xyz2dims[axis])
+            return _take(region_values, index, axis=axis)
 
     def compute_boundaries(self, values):
         """
@@ -896,7 +905,6 @@ def plot_cslice(self, ap_coordinate, volume='image', mapping=None, region_values
         :param **kwargs: matplotlib.pyplot.imshow kwarg arguments
         :return: matplotlib ax object
         """
-
         cslice = self.slice(ap_coordinate, axis=1, volume=volume, mapping=mapping, region_values=region_values)
         return self._plot_slice(np.moveaxis(cslice, 0, 1), extent=self.extent(axis=1), volume=volume, **kwargs)
 
@@ -964,14 +972,11 @@ def plot_top(self, volume='annotation', mapping=None, region_values=None, ax=Non
         :param kwargs:
         :return:
         """
-
         self.compute_surface()
         ix, iy = np.meshgrid(np.arange(self.bc.nx), np.arange(self.bc.ny))
         iz = self.bc.z2i(self.top)
         inds = self._lookup_inds(np.stack((ix, iy, iz), axis=-1))
-
         regions = self._get_mapping(mapping=mapping)[self.label.flat[inds]]
-
         if volume == 'annotation':
             im = self._label2rgb(regions)
         elif volume == 'image':
@@ -1290,8 +1295,10 @@ class AllenAtlas(BrainAtlas):
 
     """numpy.array: A diffusion weighted imaging (DWI) image volume.
 
-    The Allen atlas DWI average template volume has with the shape (ap, ml, dv) and contains uint16
-    values.  FIXME What do the values represent?
+    This average template brain was created from images of 1,675 young adult C57BL/6J mouse brains
+    acquired using serial two-photon tomography.
+    This volume has a C-ordered shape of (ap, ml, dv) and contains uint16
+    values.
     """
     image = None
 
diff --git a/iblatlas/flatmaps.py b/iblatlas/flatmaps.py
index a5838ec..22e63b0 100644
--- a/iblatlas/flatmaps.py
+++ b/iblatlas/flatmaps.py
@@ -19,10 +19,8 @@
 
 
 class FlatMap(AllenAtlas):
-    """The Allen Atlas flatmap.
+    """
 
-    FIXME Document! How are these flatmaps determined? Are they related to the Swansan atlas or is
-     that something else?
     """
 
     def __init__(self, flatmap='dorsal_cortex', res_um=25):
@@ -46,7 +44,7 @@ def __init__(self, flatmap='dorsal_cortex', res_um=25):
 
     def _get_flatmap_from_file(self):
         # gets the file in the ONE cache for the flatmap name in the property, downloads it if needed
-        file_flatmap = self._get_cache_dir().joinpath(f'{self.name}_{self.res_um}.nrrd')
+        file_flatmap = self._get_cache_dir().joinpath('flatmaps', f'{self.name}_{self.res_um}.nrrd')
         if not file_flatmap.exists():
             file_flatmap.parent.mkdir(exist_ok=True, parents=True)
             aws.s3_download_file(f'atlas/{file_flatmap.name}', file_flatmap)
@@ -214,8 +212,12 @@ def circles(N=5, atlas=None, display='flat'):
 
 def swanson(filename="swanson2allen.npz"):
     """
-    FIXME Document! Which publication to reference? Are these specifically for flat maps?
-     Shouldn't this be made into an Atlas class with a mapping or scaling applied?
+    A rasterized rendition of the Swanson projection of the mouse brain, which is a 2D
+    representation of the mouse brain.
+    Each pixel in the image corresponds to a region index in the Allen CCFv2 annotation volume.
+    [1] J. D. Hahn et al., “An open access mouse brain flatmap and upgraded rat and human
+    brain flatmaps based on current reference atlases,”
+    J Comp Neurol, vol. 529, no. 3, pp. 576–594, Feb. 2021, doi: 10.1002/cne.24966.
 
     Parameters
     ----------
@@ -231,7 +233,7 @@ def swanson(filename="swanson2allen.npz"):
         'bb0554ecc704dd4b540151ab57f73822',  # version 2022-05-02 (remapped)
         '7722c1307cf9a6f291ad7632e5dcc88b',  # version 2022-05-09 (removed wolf pixels and 2 artefact regions)
     ]
-    npz_file = AllenAtlas._get_cache_dir().joinpath(filename)
+    npz_file = AllenAtlas._get_cache_dir().joinpath('flatmaps', filename)
     if not npz_file.exists() or md5(npz_file) in OLD_MD5:
         npz_file.parent.mkdir(exist_ok=True, parents=True)
         _logger.info(f'downloading swanson image from {aws.S3_BUCKET_IBL} s3 bucket...')
@@ -260,7 +262,7 @@ def swanson_json(filename="swansonpaths.json", remap=True):
                # and CUL4 split (on s3 called swansonpaths_56daa.json)
                'f848783954883c606ca390ceda9e37d2']
 
-    json_file = AllenAtlas._get_cache_dir().joinpath(filename)
+    json_file = AllenAtlas._get_cache_dir().joinpath('flatmaps', filename)
     if not json_file.exists() or md5(json_file) in OLD_MD5:
         json_file.parent.mkdir(exist_ok=True, parents=True)
         _logger.info(f'downloading swanson paths from {aws.S3_BUCKET_IBL} s3 bucket...')
diff --git a/iblatlas/genes.py b/iblatlas/genes.py
deleted file mode 100644
index 34deadc..0000000
--- a/iblatlas/genes.py
+++ /dev/null
@@ -1,38 +0,0 @@
-"""Gene expression maps."""
-import logging
-from pathlib import Path
-
-import numpy as np
-import pandas as pd
-
-from iblutil.io.hashfile import md5
-import one.remote.aws as aws
-
-from iblatlas.atlas import AllenAtlas
-
-_logger = logging.getLogger(__name__)
-
-
-def allen_gene_expression(filename='gene-expression.pqt', folder_cache=None):
-    """
-    Reads in the Allen gene expression experiments binary data.
-    :param filename:
-    :param folder_cache:
-    :return: a dataframe of experiments, where each record corresponds to a single gene expression
-    and a memmap of all experiments volumes, size (4345, 58, 41, 67) corresponding to
-    (nexperiments, ml, dv, ap). The spacing between slices is 200 um
-    """
-    OLD_MD5 = []
-    DIM_EXP = (4345, 58, 41, 67)
-    folder_cache = folder_cache or AllenAtlas._get_cache_dir().joinpath(filename)
-    file_parquet = Path(folder_cache).joinpath('gene-expression.pqt')
-    file_bin = file_parquet.with_suffix(".bin")
-
-    if not file_parquet.exists() or md5(file_parquet) in OLD_MD5:
-        file_parquet.parent.mkdir(exist_ok=True, parents=True)
-        _logger.info(f'downloading gene expression data from {aws.S3_BUCKET_IBL} s3 bucket...')
-        aws.s3_download_file(f'atlas/{file_parquet.name}', file_parquet)
-        aws.s3_download_file(f'atlas/{file_bin.name}', file_bin)
-    df_genes = pd.read_parquet(file_parquet)
-    gexp_all = np.memmap(file_bin, dtype=np.float16, mode='r', offset=0, shape=DIM_EXP)
-    return df_genes, gexp_all
diff --git a/iblatlas/genomics/__init__.py b/iblatlas/genomics/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/iblatlas/genomics/agea.py b/iblatlas/genomics/agea.py
new file mode 100644
index 0000000..8788a88
--- /dev/null
+++ b/iblatlas/genomics/agea.py
@@ -0,0 +1,59 @@
+"""
+[1] E. S. Lein et al., “Genome-wide atlas of gene expression in the adult mouse brain,”
+ Nature, vol. 445, no. 7124, Art. no. 7124, Jan. 2007, doi: 10.1038/nature05453.
+[2] L. Ng et al., “An anatomic gene expression atlas of the adult mouse brain,”
+ Nat Neurosci, vol. 12, no. 3, Art. no. 3, Mar. 2009, doi: 10.1038/nn.2281.
+"""
+import logging
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+import one.remote.aws as aws
+
+from iblatlas import atlas
+
+_logger = logging.getLogger(__name__)
+
+_, NML, NDV, NAP = DIM_EXP = (4345, 58, 41, 67)  # nexperiments, nml, ndv, nap
+
+
+def load(folder_cache=None, expression_size=DIM_EXP):
+    """
+    Reads in the Allen gene expression experiments binary data.
+    Generation scripts from the Allen Institute are available in the gene-expression-scrapping folder
+    :param filename:
+    :param folder_cache:
+    :return:
+        a dataframe of experiments (4345, 2), where each record corresponds to a single gene expression
+        a memmap of all experiments brain volumes, size (4345, 58, 41, 67) corresponding to
+    (nexperiments, ml, dv, ap). The spacing between slices is 200 um
+        a brainatlas object with the labels and coordinates matching the gene expression volumes
+    """
+    OLD_VERSIONS = ['2023-06-12']
+    folder_cache = Path(folder_cache or atlas.AllenAtlas._get_cache_dir().joinpath('agea'))
+    # check the AWS version and download the files if needed
+    version_flag = next(folder_cache.glob('*.version'), None)
+    if version_flag is None or version_flag.stem in OLD_VERSIONS:
+        _logger.info(f'downloading gene expression data from {aws.S3_BUCKET_IBL} s3 bucket...')
+        aws.s3_download_folder('atlas/agea', folder_cache)
+
+    # load the genes dataframe and the gene expression volumes
+    file_parquet = Path(folder_cache).joinpath('gene-expression.pqt')
+    file_expression = Path(folder_cache).joinpath('gene-expression.bin')
+    df_genes = pd.read_parquet(file_parquet)
+    expression_volumes = np.memmap(file_expression, dtype=np.float16, mode='r', offset=0, shape=expression_size)
+
+    # create a brain atlas object with the gene expression volume geometry and pre-computed labels
+    atlas_agea = atlas.BrainAtlas(
+        image=np.load(Path(folder_cache).joinpath('image.npy')),
+        label=np.load(Path(folder_cache).joinpath('label.npy')),
+        dxyz=200 / 1e6 * np.array([1, -1, -1]),
+        regions=atlas.BrainRegions(),
+        iorigin=atlas.ALLEN_CCF_LANDMARKS_MLAPDV_UM['bregma'] / 200 + np.array([0, 0, 0]),
+        dims2xyz=[0, 2, 1],
+        xyz2dims=[0, 2, 1]
+    )
+
+    return df_genes, expression_volumes, atlas_agea
diff --git a/iblatlas/genomics/gene_expression_scrapping/01-make_experiments_parquet.py b/iblatlas/genomics/gene_expression_scrapping/01-make_experiments_parquet.py
new file mode 100644
index 0000000..2bd7c2e
--- /dev/null
+++ b/iblatlas/genomics/gene_expression_scrapping/01-make_experiments_parquet.py
@@ -0,0 +1,72 @@
+"""
+Module to download and format gene expression atlas data from Allen Institute AGEA project
+https://mouse.brain-map.org/agea
+https://www.nature.com/articles/nature05453
+http://help.brain-map.org/display/mousebrain/Documentation
+
+volumes for all genes (size 51533 voxels x 4376 genes) ~ .84 Gb float32
+correlation volumes (size 51533 voxels x 51533 genes) ~ 9.89 Gb float32
+
+They are 200 micron voxel
+
+ElementSpacing = 200 200 200
+DimSize = 67 41 58 (159_326 voxels)
+
+Which means you need to query:
+X = 0 to (66*200)
+Y = 0 to (40*200)
+Z = 0 to (57*200)
+"""
+# %%
+from pathlib import Path
+import numpy as np
+import pandas as pd
+
+
+folder_download = Path("/datadisk/gdrive/2022/08_gene_expression_atlas")
+file_parquet = folder_download.joinpath('experiments.pqt')
+
+
+def make_experiments_parquet(file_parquet):
+    """
+    Query experiments and gene acronyms for the xml API and write dataframe
+    with ids and corresponding acronyms
+    :param file_parquet:
+    :return:
+    """
+    import requests
+    from xml.etree import ElementTree
+    from copy import copy
+
+    http_query = ("http://api.brain-map.org/api/v2/data/query.xml?criteria=model::SectionDataSet,rma::"
+                  "criteria,[failed$eqfalse],products[id$eq1],plane_of_section[name$eqcoronal],"
+                  "treatments[name$eqISH],rma::include,genes,rma::options[num_rows$eqall]")
+    response = requests.get(http_query)
+    tree = ElementTree.fromstring(response.text)
+
+    def dictify(r, root=True, tags=None):
+        if root:
+            return {r.tag: dictify(r, False)}
+        d = copy(r.attrib)
+        if r.text and r.text.strip() != '':
+            d = r.text.strip()
+        for x in r.findall("./*"):
+            if x.tag not in d:
+                d[x.tag] = []
+            d[x.tag].append(dictify(x, False))
+        return d
+
+    datasets = dictify(tree)['Response']['section-data-sets'][0]['section-data-set']
+    ids = np.zeros(len(datasets), dtype=np.int64)
+    genes = []
+    for i, ds in enumerate(datasets):
+        ids[i] = ds['id'][0]
+        genes.append(ds['genes'][0]['gene'][0]['acronym'][0])
+
+    df_genes = pd.DataFrame(data=np.c_[ids, np.array(genes)], columns=['id', 'gene'])
+    df_genes.to_parquet(file_parquet)
+
+
+if not file_parquet.exists():
+    make_experiments_parquet(file_parquet)
+df_experiments = pd.read_parquet(file_parquet)
diff --git a/iblatlas/genomics/gene_expression_scrapping/02-scrape-experiments.py b/iblatlas/genomics/gene_expression_scrapping/02-scrape-experiments.py
new file mode 100644
index 0000000..51ec768
--- /dev/null
+++ b/iblatlas/genomics/gene_expression_scrapping/02-scrape-experiments.py
@@ -0,0 +1,49 @@
+from pathlib import Path
+import subprocess
+import shutil
+
+import numpy as np
+import pandas as pd
+from zipfile import ZipFile
+import skimage.io as io
+
+folder_download = Path("/datadisk/gdrive/2022/08_gene_expression_atlas")
+file_parquet = folder_download.joinpath('gene-expression.pqt')
+folder_experiments = folder_download.joinpath('experiments')
+experiments = pd.read_parquet(file_parquet)
+
+
+# %% scrape all zip files
+folder_experiments.mkdir(exist_ok=True)
+for i, exp in experiments.iterrows():
+    file_experiment = folder_experiments.joinpath(f"{exp.id}_{exp.gene}.zip")
+    if not file_experiment.exists():
+        print(i, file_experiment)
+        cmd = f"wget http://api.brain-map.org/grid_data/download/{exp.id} -P {folder_experiments}"
+        process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE,
+                                   stderr=subprocess.PIPE)
+        info, error = process.communicate()
+        shutil.move(folder_experiments.joinpath(exp.id), file_experiment)
+
+
+# %% Interpret
+file_out_npy = folder_download.joinpath("gene-expression.bin")
+scratch_dir = folder_download.joinpath('scratch')
+
+with open(file_out_npy, 'wb+') as fp:
+    for i, exp in experiments.iterrows():
+        print(i, file_experiment)
+        file_experiment = folder_experiments.joinpath(f"{exp.id}_{exp.gene}.zip")
+        scratch_dir.mkdir(exist_ok=True)
+        with ZipFile(file_experiment, 'r') as zip:
+            zip.extractall(scratch_dir)
+
+        #  (58, 41, 67) corresponds to ML, DV, AP
+        mhd_file = next(scratch_dir.glob('*.mhd'))
+        img = io.imread(mhd_file, plugin='simpleitk')
+
+        shutil.rmtree(scratch_dir)
+
+        img.astype(np.float16).tofile(fp)
+        # import matplotlib.pyplot as plt
+        # plt.imshow(np.squeeze(img[:, :, 25].T))
diff --git a/iblatlas/genomics/gene_expression_scrapping/03-scrape-correlations.py b/iblatlas/genomics/gene_expression_scrapping/03-scrape-correlations.py
new file mode 100644
index 0000000..9503d0d
--- /dev/null
+++ b/iblatlas/genomics/gene_expression_scrapping/03-scrape-correlations.py
@@ -0,0 +1,9 @@
+# download xross correlation values
+from pathlib import Path
+from one.webclient import http_download_file
+url = "https://mouse.brain-map.org/agea/data/P56/download?seed=6800,4200,5600&age=P56"
+zip_name = "6800_4200_5600.zip"
+folder_download = Path("/datadisk/gdrive/2022/08_gene_expression_atlas")
+
+fn = http_download_file(url, target_dir=folder_download)
+Path(fn).rename(zip_name)
diff --git a/iblatlas/genomics/gene_expression_scrapping/04-read-experiments.py b/iblatlas/genomics/gene_expression_scrapping/04-read-experiments.py
new file mode 100644
index 0000000..1a116a4
--- /dev/null
+++ b/iblatlas/genomics/gene_expression_scrapping/04-read-experiments.py
@@ -0,0 +1,65 @@
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from sklearn.neural_network import MLPClassifier
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import train_test_split
+
+from ibllib.atlas import AllenAtlas, BrainCoordinates
+
+DIM_EXP = (4345, 58, 41, 67)
+ne, nml, ndv, nap = DIM_EXP
+folder_download = Path("/datadisk/gdrive/2022/08_gene_expression_atlas")
+file_parquet = folder_download.joinpath('gene-expression.pqt')
+file_bin = file_parquet.with_suffix(".bin")
+
+folder_experiments = folder_download.joinpath('experiments')
+experiments = pd.read_parquet(file_parquet)
+
+ba = AllenAtlas()
+
+gexp_all = np.memmap(file_bin, dtype=np.float16, mode='r', offset=0, shape=DIM_EXP)
+
+# %% (58, 41, 67) corresponds to ML, DV, AP
+ie = np.where(experiments['gene'] == 'Pantr1')[0][0]
+
+gexp = gexp_all[ie, :, :, :].astype(np.float32)
+gexp[gexp <= 0] = np.nan
+bc = BrainCoordinates(nxyz=[nml, nap, ndv], dxyz=[200, 200, 200])
+
+xyz = np.array([0, 0, -0.002])
+ba.plot_slices(xyz, volume="annotation")
+
+iml, iap, idv = np.round(ba.xyz2ccf(xyz) / 200).astype(np.int32)
+
+# BrainAtlas(image=gexp, label, dxyz, regions, iorigin=[0, 0, 0],
+#              dims2xyz=[0, 1, 2], xyz2dims=[0, 1, 2]
+fig, axs = plt.subplots(2, 2)
+axs[0, 0].imshow(np.squeeze(gexp[:, :, iap]).T)
+axs[0, 1].imshow(np.squeeze(gexp[iml, :, :]))
+axs[1, 0].imshow(np.squeeze(gexp[:, idv, :]))
+
+
+# %% This cell takes
+ccf_coords = np.meshgrid(*[np.arange(DIM_EXP[i]) * 200 for i in [1, 2, 3]])  # ml, dv, ap
+xyzs = ba.ccf2xyz(np.c_[ccf_coords[0].flatten(), ccf_coords[2].flatten(), ccf_coords[1].flatten()])
+aids = ba.get_labels(xyzs, mode='clip', mapping='Cosmos')
+
+sel = ~np.logical_or(aids == 0, aids == 997)  # remove void and root voxels
+# reshape in a big array nexp x nvoxels
+gexps = gexp_all.reshape((ne, nml * nap * ndv))[:, sel].astype(np.float32).transpose()
+xyzs = xyzs[sel]
+aids = aids[sel]
+
+# %% ok ready for the learning part
+X_train, X_test, y_train, y_test = train_test_split(gexps, aids, stratify=aids)
+scaler = StandardScaler()
+scaler.fit(gexps)
+X_train = scaler.transform(X_train)
+X_test = scaler.transform(X_test)
+clf = MLPClassifier(random_state=1, max_iter=300, verbose=True).fit(X_train, y_train)
+clf.predict_proba(X_test[:1])
+clf.predict(X_test)
+clf.score(X_test, y_test)
diff --git a/iblatlas/genomics/gene_expression_scrapping/05-generate-atlas-templates.py b/iblatlas/genomics/gene_expression_scrapping/05-generate-atlas-templates.py
new file mode 100644
index 0000000..cc6b187
--- /dev/null
+++ b/iblatlas/genomics/gene_expression_scrapping/05-generate-atlas-templates.py
@@ -0,0 +1,84 @@
+"""
+When looking at one or a few genes it is possible to upsample the gene expression volume
+to the 25um or even the 10um resolution of the Allen CCF.
+However when looking at thousand of genes, it is more feasible to keep the 200um resolution
+Here we are downsampling a version of the CCF to 200um resolution that matches the gene expression atlas to
+use as a label volume and to compute the brain coordinates of the gene expression voxels
+"""
+# %%
+from pathlib import Path
+
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+import scipy.stats
+
+from iblatlas import atlas
+
+DIM_EXP = (4345, 58, 41, 67)  # nexperiments, nml, ndv, nap
+#  path_agea_cache = atlas.AllenAtlas._get_cache_dir().joinpath('agea')
+path_agea_cache = Path('/Users/olivier/Downloads/ONE/alyx.internationalbrainlab.org/histology/ATLAS/Needles/Allen/agea')
+df_genes = pd.read_parquet(Path(path_agea_cache).joinpath('gene-expression.pqt'))
+gexp_all = np.memmap(Path(path_agea_cache).joinpath('gene-expression.bin'), dtype=np.float16, mode='r', offset=0, shape=DIM_EXP)
+
+# create a brain atlas object with the gene expression volume geometry
+bg = atlas.BrainAtlas(
+    image=gexp_all[0],
+    label=gexp_all[0] != -1,
+    dxyz=200 / 1e6 * np.array([1, -1, -1]),
+    regions=atlas.BrainRegions(),
+    iorigin=atlas.ALLEN_CCF_LANDMARKS_MLAPDV_UM['bregma'] / 200 + np.array([0, 0, 0]),
+    dims2xyz=[0, 2, 1],
+    xyz2dims=[0, 2, 1]
+)
+# instantiate the allen atlas at 25um resolution and compute the transform to the agea volume
+ba = atlas.AllenAtlas(res_um=25)
+xia2g = bg.bc.x2i(ba.bc.xscale)
+yia2g = bg.bc.y2i(ba.bc.yscale)
+zia2g = bg.bc.z2i(ba.bc.zscale)
+# meshgrid the coordinates and aggregate per agea voxel, mean for image, and mode for the label
+# (this takes less than a minute)
+Xia2g, Yia2g, Zia2g = [aa.flatten() for aa in np.meshgrid(xia2g, yia2g, zia2g)]
+df = pd.DataFrame(
+    np.c_[Xia2g, Yia2g, Zia2g, ba.image.flatten(), ba.label.flatten()],
+    columns=['ix', 'iy', 'iz', 'image', 'label'])
+gene_expression_labels = df.groupby(['ix', 'iy', 'iz']).agg(
+    image=pd.NamedAgg(column="image", aggfunc="mean"),
+    label=pd.NamedAgg(column="label", aggfunc=lambda x: scipy.stats.mode(x)[0][0]),
+).reset_index()
+
+# create the image and label volume for the agea brain atlas
+image = np.zeros(gexp_all[0].shape, dtype=np.float32)
+label = np.zeros(gexp_all[0].shape, dtype=np.int16)
+dim_columns = [['ix', 'iy', 'iz'][i] for i in bg.xyz2dims]
+linear_indices = np.ravel_multi_index([gene_expression_labels[c].values for c in dim_columns], image.shape)
+np.put(image, linear_indices, gene_expression_labels['image'].values)
+np.put(label, linear_indices, gene_expression_labels['label'].values)
+
+# make sure the transform went well by plotting a coronal and a sagittal slice from
+# the same coordinate in both atlases
+np.save(Path(path_agea_cache).joinpath('image.npy'), image)
+np.save(Path(path_agea_cache).joinpath('label.npy'), label)
+bg.image = image
+bg.label = label
+
+igenes = (0, 2325)
+fig, axs = plt.subplots(3, 2, sharex=True, sharey=True)
+ba.plot_cslice(0, ax=axs[0, 0])
+bg.plot_cslice(0, ax=axs[1, 0])
+ba.plot_cslice(0, ax=axs[0, 1], volume='annotation')
+bg.plot_cslice(0, ax=axs[1, 1], volume='annotation')
+bg.plot_cslice(0, ax=axs[2, 0], volume=gexp_all[igenes[0]], cmap='viridis')
+bg.plot_cslice(0, ax=axs[2, 1], volume=gexp_all[igenes[1]], cmap='viridis')
+fig.tight_layout()
+
+fig, axs = plt.subplots(3, 2, sharex=True, sharey=True)
+ba.plot_sslice(0, ax=axs[0, 0])
+bg.plot_sslice(0, ax=axs[1, 0])
+ba.plot_sslice(0, ax=axs[0, 1], volume='annotation')
+bg.plot_sslice(0, ax=axs[1, 1], volume='annotation')
+bg.plot_sslice(0, ax=axs[2, 0], volume=gexp_all[igenes[0]], cmap='viridis')
+bg.plot_sslice(0, ax=axs[2, 1], volume=gexp_all[igenes[1]], cmap='viridis')
+fig.tight_layout()
+
+# %%
diff --git a/iblatlas/plots.py b/iblatlas/plots.py
index 1e2e8d7..d7c6afc 100644
--- a/iblatlas/plots.py
+++ b/iblatlas/plots.py
@@ -231,7 +231,7 @@ def load_slice_files(slice, mapping):
         'top': []
     }
 
-    slice_file = AllenAtlas._get_cache_dir().parent.joinpath('svg', f'{slice}_{mapping}_paths.npy')
+    slice_file = AllenAtlas._get_cache_dir().joinpath('svg', f'{slice}_{mapping}_paths.npy')
     if not slice_file.exists() or md5(slice_file) in OLD_MD5[slice]:
         slice_file.parent.mkdir(exist_ok=True, parents=True)
         _logger.info(f'downloading swanson paths from {aws.S3_BUCKET_IBL} s3 bucket...')
diff --git a/iblatlas/tests/test_atlas.py b/iblatlas/tests/test_atlas.py
index d8a242d..49d9aba 100644
--- a/iblatlas/tests/test_atlas.py
+++ b/iblatlas/tests/test_atlas.py
@@ -3,7 +3,7 @@
 import numpy as np
 import matplotlib.pyplot as plt
 
-from iblatlas.atlas import (BrainCoordinates, cart2sph, sph2cart, Trajectory,
+from iblatlas.atlas import (BrainCoordinates, cart2sph, sph2cart, Trajectory, BrainAtlas,
                             Insertion, ALLEN_CCF_LANDMARKS_MLAPDV_UM, AllenAtlas)
 from iblatlas.regions import BrainRegions, FranklinPaxinosRegions
 from iblatlas.plots import prepare_lr_data, reorder_data
@@ -420,21 +420,35 @@ def test_plot_slices(self):
         axs = self.ba.plot_slices(np.array([0, -.0024, -.0038]))
         assert axs.shape == (2, 2)
 
-    def test_slice(self):
-        ba = self.ba
-        nx, ny, nz = ba.bc.nxyz
-        # tests output shapes
-        self.assertTrue(ba.slice(axis=0, coordinate=0).shape == (ny, nz))  # sagittal
-        self.assertTrue(ba.slice(axis=1, coordinate=0).shape == (nx, nz))  # coronal
-        self.assertTrue(ba.slice(axis=2, coordinate=-.002).shape == (ny, nx))  # horizontal
-        # tests out of bound
-        with self.assertRaises(ValueError):
-            ba.slice(axis=1, coordinate=123)
-        self.assertTrue(ba.slice(axis=1, coordinate=21, mode='clip').shape == (nx, nz))
+    def test_slice_axes(self):
+        nx, nz, ny = (58, 41, 67)
+        ba_gene_atlas_like = BrainAtlas(
+            image=np.random.randn(nx, nz, ny),
+            label=np.ones((nx, nz, ny)),
+            dxyz=200 / 1e6,
+            regions=BrainRegions(),
+            iorigin=[0, 0, 0],
+            dims2xyz=[0, 2, 1],
+            xyz2dims=[0, 2, 1]
+        )
+        for k, ba in {'allen': self.ba, 'gene_expression': ba_gene_atlas_like}.items():
+            with self.subTest(ba=k):
+                nx, ny, nz = ba.bc.nxyz
+                # tests output shapes
+                self.assertTrue(ba.slice(axis=0, coordinate=0).shape == (ny, nz))  # sagittal
+                self.assertTrue(ba.slice(axis=1, coordinate=0).shape == (nx, nz))  # coronal
+                self.assertTrue(ba.slice(axis=2, coordinate=.0002).shape == (ny, nx))  # horizontal
+                # tests out of bound
+                with self.assertRaises(ValueError):
+                    ba.slice(axis=1, coordinate=123)
+                self.assertTrue(ba.slice(axis=1, coordinate=21, mode='clip').shape == (nx, nz))  # another coronal
+
+    def test_slice_volumes(self):
         """
         here we test the different volumes and mappings
         """
-        # the label volume contains the region index (not id!),
+        ba = self.ba
+        nx, ny, nz = ba.bc.nxyz
         iregions = ba.slice(axis=0, coordinate=0, volume=ba.label)
         assert np.all(np.unique(iregions) == np.array([0, 1327]))
         rgb_slice = ba.slice(axis=0, coordinate=0, volume='annotation')
diff --git a/iblatlas/tests/test_genomics.py b/iblatlas/tests/test_genomics.py
new file mode 100644
index 0000000..75ff9e4
--- /dev/null
+++ b/iblatlas/tests/test_genomics.py
@@ -0,0 +1,40 @@
+import unittest
+import tempfile
+from pathlib import Path
+import unittest.mock
+
+import numpy as np
+import pandas as pd
+
+from iblatlas.genomics import agea
+
+
+class TestLoadAgea(unittest.TestCase):
+
+    def test_load(self):
+        """
+        Tests the mechanics of the load function by faking a 3 experiments gene expression dataset
+        Here we just want to make sure the function returns the right dataframes and volumes,
+        and that the atlas reads the right label and image volumes
+        """
+        with tempfile.TemporaryDirectory() as folder_cache:
+            df_genes = pd.DataFrame({
+                'id': np.array(['74658173', '71247618', '74511936']),
+                'gene': ['Ndufa10', 'Grik2', 'Dmp1']
+            })
+            df_genes.to_parquet(Path(folder_cache).joinpath('gene-expression.pqt'))
+            expression_matrices = np.random.rand(3, *agea.DIM_EXP[1:])
+            expression_matrices.tofile(Path(folder_cache).joinpath('gene-expression.bin'))
+            image = np.random.rand(*agea.DIM_EXP[1:])
+            np.save(Path(folder_cache).joinpath('image.npy'), image)
+            label = np.random.rand(*agea.DIM_EXP[1:])
+            np.save(Path(folder_cache).joinpath('label.npy'), label)
+            Path(folder_cache).joinpath('toto.version').touch()
+            df_genes, gene_expression_volumes, atlas_agea = agea.load(
+                folder_cache=folder_cache, expression_size=(3, 58, 41, 67))
+            self.assertEqual(df_genes.shape, (3, 2))
+            self.assertEqual(gene_expression_volumes.shape, (3, 58, 41, 67))
+            self.assertEqual(atlas_agea.image.shape, (58, 41, 67))
+            self.assertEqual(atlas_agea.label.shape, (58, 41, 67))
+            # on windows we need to close the memmap for the tempdir to be deleted
+            gene_expression_volumes._mmap.close()
diff --git a/pyproject.toml b/pyproject.toml
index 2a0f4fc..4718e52 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -35,4 +35,5 @@ version = {attr = "iblatlas.__version__"}
 include-package-data = true
 
 [tool.setuptools.packages.find]
-exclude = ["iblatlas.tests*", "examples*"]  # exclude packages matching these glob patterns
+# exclude packages matching these glob patterns
+exclude = ["iblatlas.tests*", "examples*", "iblatlas.genomics.gene_expression_scrapping"]