Skip to content

Commit

Permalink
Merge branch 'main' into merging_units
Browse files Browse the repository at this point in the history
  • Loading branch information
yger authored Jun 26, 2024
2 parents 4338fe3 + 99cc04e commit 3bf14bc
Show file tree
Hide file tree
Showing 10 changed files with 222 additions and 79 deletions.
7 changes: 6 additions & 1 deletion .github/run_tests.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
#!/bin/bash

MARKER=$1
NOVIRTUALENV=$2

# Check if the second argument is provided and if it is equal to --no-virtual-env
if [ -z "$NOVIRTUALENV" ] || [ "$NOVIRTUALENV" != "--no-virtual-env" ]; then
source $GITHUB_WORKSPACE/test_env/bin/activate
fi

source $GITHUB_WORKSPACE/test_env/bin/activate
pytest -m "$MARKER" -vv -ra --durations=0 --durations-min=0.001 | tee report.txt; test ${PIPESTATUS[0]} -eq 0 || exit 1
echo "# Timing profile of ${MARKER}" >> $GITHUB_STEP_SUMMARY
python $GITHUB_WORKSPACE/.github/build_job_summary.py report.txt >> $GITHUB_STEP_SUMMARY
Expand Down
129 changes: 129 additions & 0 deletions .github/workflows/all-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
name: Complete tests

on:
workflow_dispatch:
schedule:
- cron: "0 12 * * 0" # Weekly on Sunday at noon UTC
pull_request:
types: [synchronize, opened, reopened]
branches:
- main

env:
KACHERY_CLOUD_CLIENT_ID: ${{ secrets.KACHERY_CLOUD_CLIENT_ID }}
KACHERY_CLOUD_PRIVATE_KEY: ${{ secrets.KACHERY_CLOUD_PRIVATE_KEY }}

concurrency: # Cancel previous workflows on the same pull request
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
run:
name: ${{ matrix.os }} Python ${{ matrix.python-version }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
python-version: ["3.9", "3.12"] # Lower and higher versions we support
os: [macos-13, windows-latest, ubuntu-latest]
steps:
- uses: actions/checkout@v4
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
# cache: 'pip' # caching pip dependencies

- name: Get current hash (SHA) of the ephy_testing_data repo
id: repo_hash
run: |
echo "dataset_hash=$(git ls-remote https://gin.g-node.org/NeuralEnsemble/ephy_testing_data.git HEAD | cut -f1)"
echo "dataset_hash=$(git ls-remote https://gin.g-node.org/NeuralEnsemble/ephy_testing_data.git HEAD | cut -f1)" >> $GITHUB_OUTPUT
shell: bash
- name: Cache datasets
id: cache-datasets
uses: actions/cache/restore@v4
with:
path: ~/spikeinterface_datasets
key: ${{ runner.os }}-datasets-${{ steps.repo_hash.outputs.dataset_hash }}
restore-keys: ${{ runner.os }}-datasets

- name: Install packages
run: |
git config --global user.email "[email protected]"
git config --global user.name "CI Almighty"
pip install -e .[test,extractors,streaming_extractors,full]
pip install tabulate
shell: bash

- name: Installad datalad
run: |
pip install datalad-installer
if [ ${{ runner.os }} = 'Linux' ]; then
datalad-installer --sudo ok git-annex --method datalad/packages
elif [ ${{ runner.os }} = 'macOS' ]; then
datalad-installer --sudo ok git-annex --method brew
elif [ ${{ runner.os }} = 'Windows' ]; then
datalad-installer --sudo ok git-annex --method datalad/git-annex:release
fi
pip install datalad
git config --global filter.annex.process "git-annex filter-process" # recommended for efficiency
shell: bash

- name: Set execute permissions on run_tests.sh
run: chmod +x .github/run_tests.sh
shell: bash

- name: Test core
run: pytest -m "core"
shell: bash

- name: Test extractors
env:
HDF5_PLUGIN_PATH: ${{ github.workspace }}/hdf5_plugin_path_maxwell
run: pytest -m "extractors"
shell: bash

- name: Test preprocessing
run: ./.github/run_tests.sh "preprocessing and not deepinterpolation" --no-virtual-env
shell: bash

- name: Test postprocessing
run: ./.github/run_tests.sh postprocessing --no-virtual-env
shell: bash

- name: Test quality metrics
run: ./.github/run_tests.sh qualitymetrics --no-virtual-env
shell: bash

- name: Test comparison
run: ./.github/run_tests.sh comparison --no-virtual-env
shell: bash

- name: Test core sorters
run: ./.github/run_tests.sh sorters --no-virtual-env
shell: bash

- name: Test internal sorters
run: ./.github/run_tests.sh sorters_internal --no-virtual-env
shell: bash

- name: Test curation
run: ./.github/run_tests.sh curation --no-virtual-env
shell: bash

- name: Test widgets
run: ./.github/run_tests.sh widgets --no-virtual-env
shell: bash

- name: Test exporters
run: ./.github/run_tests.sh exporters --no-virtual-env
shell: bash

- name: Test sortingcomponents
run: ./.github/run_tests.sh sortingcomponents --no-virtual-env
shell: bash

- name: Test generation
run: ./.github/run_tests.sh generation --no-virtual-env
shell: bash
68 changes: 25 additions & 43 deletions .github/workflows/caches_cron_job.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,64 +2,35 @@ name: Create caches for gin ecephys data and virtual env

on:
workflow_dispatch:
push: # When someting is pushed into main this checks if caches need to re-created
push: # When something is pushed into main this checks if caches need to be re-created
branches:
- main
schedule:
- cron: "0 12 * * *" # Daily at noon UTC

jobs:



create-virtual-env-cache-if-missing:
name: Caching virtual env
runs-on: "ubuntu-latest"
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Get current year-month
id: date
run: |
echo "date=$(date +'%Y-%m')" >> $GITHUB_OUTPUT
- name: Get current dependencies hash
id: dependencies
run: |
echo "hash=${{hashFiles('**/pyproject.toml')}}" >> $GITHUB_OUTPUT
- uses: actions/cache@v4
id: cache-venv
with:
path: ${{ github.workspace }}/test_env
key: ${{ runner.os }}-venv-${{ steps.dependencies.outputs.hash }}-${{ steps.date.outputs.date }}
lookup-only: 'true' # Avoids downloading the data, saving behavior is not affected.
- name: Cache found?
run: echo "Cache-hit == ${{steps.cache-venv.outputs.cache-hit == 'true'}}"
- name: Create the virtual environment to be cached
if: steps.cache-venv.outputs.cache-hit != 'true'
uses: ./.github/actions/build-test-environment




create-gin-data-cache-if-missing:
name: Caching data env
runs-on: "ubuntu-latest"
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
steps:
- uses: actions/setup-python@v5
with:
python-version: '3.10'
python-version: '3.11'
- name: Create the directory to store the data
run: |
mkdir --parents --verbose $HOME/spikeinterface_datasets/ephy_testing_data/
chmod -R 777 $HOME/spikeinterface_datasets
ls -l $HOME/spikeinterface_datasets
mkdir -p ~/spikeinterface_datasets/ephy_testing_data/
ls -l ~/spikeinterface_datasets
shell: bash
- name: Get current hash (SHA) of the ephy_testing_data repo
id: repo_hash
run: |
echo "dataset_hash=$(git ls-remote https://gin.g-node.org/NeuralEnsemble/ephy_testing_data.git HEAD | cut -f1)"
echo "dataset_hash=$(git ls-remote https://gin.g-node.org/NeuralEnsemble/ephy_testing_data.git HEAD | cut -f1)" >> $GITHUB_OUTPUT
shell: bash
- uses: actions/cache@v4
id: cache-datasets
with:
Expand All @@ -68,31 +39,42 @@ jobs:
lookup-only: 'true' # Avoids downloading the data, saving behavior is not affected.
- name: Cache found?
run: echo "Cache-hit == ${{steps.cache-datasets.outputs.cache-hit == 'true'}}"
shell: bash
- name: Installing datalad and git-annex
if: steps.cache-datasets.outputs.cache-hit != 'true'
run: |
git config --global user.email "[email protected]"
git config --global user.name "CI Almighty"
python -m pip install -U pip # Official recommended way
pip install datalad-installer
datalad-installer --sudo ok git-annex --method datalad/packages
if [ ${{ runner.os }} == 'Linux' ]; then
datalad-installer --sudo ok git-annex --method datalad/packages
elif [ ${{ runner.os }} == 'macOS' ]; then
datalad-installer --sudo ok git-annex --method brew
elif [ ${{ runner.os }} == 'Windows' ]; then
datalad-installer --sudo ok git-annex --method datalad/git-annex:release
fi
pip install datalad
git config --global filter.annex.process "git-annex filter-process" # recommended for efficiency
shell: bash
- name: Download dataset
if: steps.cache-datasets.outputs.cache-hit != 'true'
run: |
datalad install --recursive --get-data https://gin.g-node.org/NeuralEnsemble/ephy_testing_data
shell: bash
- name: Move the downloaded data to the right directory
if: steps.cache-datasets.outputs.cache-hit != 'true'
run: |
mv --force ./ephy_testing_data $HOME/spikeinterface_datasets/
mv ./ephy_testing_data ~/spikeinterface_datasets/
shell: bash
- name: Show size of the cache to assert data is downloaded
run: |
cd $HOME
cd ~
pwd
du -hs spikeinterface_datasets # Should show the size of ephy_testing_data
cd spikeinterface_datasets
pwd
ls -lh # Should show ephy_testing_data
cd ephy_testing_data
ls -lh
shell: bash
7 changes: 3 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,9 @@ test = [

# for sortingview backend
"sortingview",

# recent datalad need a too recent version for git-annex
# so we use an old one here
"datalad==0.16.2",
# Download data
"pooch>=1.8.2",
"datalad>=1.0.2",

## install tridesclous for testing ##
"tridesclous>=1.6.8",
Expand Down
56 changes: 40 additions & 16 deletions src/spikeinterface/core/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,56 +14,80 @@ def download_dataset(
remote_path: str = "mearec/mearec_test_10s.h5",
local_folder: Path | None = None,
update_if_exists: bool = False,
unlock: bool = False,
) -> Path:
"""
Function to download dataset from a remote repository using datalad.
Function to download dataset from a remote repository using a combination of datalad and pooch.
Pooch is designed to download single files from a remote repository.
Because our datasets in gin sometimes point just to a folder, we still use datalad to download
a list of all the files in the folder and then use pooch to download them one by one.
Parameters
----------
repo : str, default: "https://gin.g-node.org/NeuralEnsemble/ephy_testing_data"
The repository to download the dataset from
remote_path : str, default: "mearec/mearec_test_10s.h5"
A specific subdirectory in the repository to download (e.g. Mearec, SpikeGLX, etc)
local_folder : str, default: None
local_folder : str, optional
The destination folder / directory to download the dataset to.
defaults to the path "get_global_dataset_folder()" / f{repo_name} (see `spikeinterface.core.globals`)
if None, then the path "get_global_dataset_folder()" / f{repo_name} is used (see `spikeinterface.core.globals`)
update_if_exists : bool, default: False
Forces re-download of the dataset if it already exists, default: False
unlock : bool, default: False
Use to enable the edition of the downloaded file content, default: False
Returns
-------
Path
The local path to the downloaded dataset
Notes
-----
The reason we use pooch is because have had problems with datalad not being able to download
data on windows machines. Especially in the CI.
See https://handbook.datalad.org/en/latest/intro/windows.html
"""
import pooch
import datalad.api
from datalad.support.gitrepo import GitRepo

if local_folder is None:
base_local_folder = get_global_dataset_folder()
base_local_folder.mkdir(exist_ok=True, parents=True)
local_folder = base_local_folder / repo.split("/")[-1]
local_folder.mkdir(exist_ok=True, parents=True)
else:
if not local_folder.is_dir():
local_folder.mkdir(exist_ok=True, parents=True)

local_folder = Path(local_folder)
if local_folder.exists() and GitRepo.is_valid_repo(local_folder):
dataset = datalad.api.Dataset(path=local_folder)
# make sure git repo is in clean state
repo = dataset.repo
if update_if_exists:
repo.call_git(["checkout", "--force", "master"])
dataset.update(merge=True)
else:
dataset = datalad.api.install(path=local_folder, source=repo)

local_path = local_folder / remote_path
dataset_status = dataset.status(path=remote_path, annex="simple")

# Download only files that also have a git-annex key
dataset_status_files = [status for status in dataset_status if status["type"] == "file"]
dataset_status_files = [status for status in dataset_status_files if "key" in status]

# This downloads the data set content
dataset.get(remote_path)
git_annex_hashing_algorithm = {"MD5E": "md5"}
for status in dataset_status_files:
hash_algorithm = git_annex_hashing_algorithm[status["backend"]]
hash = status["keyname"].split(".")[0]
known_hash = f"{hash_algorithm}:{hash}"
fname = Path(status["path"]).relative_to(local_folder)
url = f"{repo}/raw/master/{fname.as_posix()}"
expected_full_path = local_folder / fname

# Unlock files of a dataset in order to be able to edit the actual content
if unlock:
dataset.unlock(remote_path, recursive=True)
full_path = pooch.retrieve(
url=url,
fname=str(fname),
path=local_folder,
known_hash=known_hash,
progressbar=True,
)
assert full_path == str(expected_full_path)

return local_path
5 changes: 3 additions & 2 deletions src/spikeinterface/extractors/tests/common_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ class CommonTestSuite:
downloads = []
entities = []

def setUp(self):
for remote_path in self.downloads:
@classmethod
def setUpClass(cls):
for remote_path in cls.downloads:
download_dataset(repo=gin_repo, remote_path=remote_path, local_folder=local_folder, update_if_exists=True)


Expand Down
Loading

0 comments on commit 3bf14bc

Please sign in to comment.