Skip to content

Commit

Permalink
Cache generation tests
Browse files Browse the repository at this point in the history
  • Loading branch information
k1o0 committed Nov 15, 2024
1 parent 78d347e commit f3f1ee3
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 3 deletions.
2 changes: 1 addition & 1 deletion alyx/experiments/tests_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ def setUp(self):
self.client.login(username='test', password='test')
# self.session = Session.objects.first()
lab = Lab.objects.create(name='lab')
subject = Subject.objects.create(name='586', lab=lab)
subject = Subject.objects.create(nickname='586', lab=lab)
self.session = Session.objects.create(subject=subject, number=1)
# need to add imaging procedure
self.session.procedures.add(ProcedureType.objects.get_or_create(name='Imaging')[0])
Expand Down
2 changes: 1 addition & 1 deletion alyx/jobs/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def setUp(self) -> None:
Task.objects.create(name=f'task_{i}', status=status)
# Create a session for testing signed-off filter
lab = Lab.objects.create(name='lab')
subject = Subject.objects.create(name='586', lab=lab)
subject = Subject.objects.create(nickname='586', lab=lab)
json_data = {'sign_off_checklist': {'sign_off_date': datetime.today().isoformat()}}
self.session = Session.objects.create(
subject=subject, number=1, json=json_data, type='Experiment')
Expand Down
2 changes: 1 addition & 1 deletion alyx/misc/management/commands/one_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ def generate_datasets_frame(tags=None, batch_size=100_000) -> pd.DataFrame:
df = (pd.DataFrame
.from_records(current_qs.values(*fields))
.rename(fields_map, axis=1)
.astype({'id': str, 'eid': str, 'file_size': np.uint64}))
.astype({'id': str, 'eid': str, 'file_size': 'UInt64'}))
df['exists'] = True

# relative_path
Expand Down
64 changes: 64 additions & 0 deletions alyx/misc/tests.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
import zipfile
from pathlib import Path
from datetime import datetime, timedelta
import tempfile
import unittest
from django.test import TestCase
from one.alf.spec import QC
from one.alf.cache import DATASETS_COLUMNS, SESSIONS_COLUMNS
import pandas as pd

from subjects.models import Subject
from misc.models import Housing, HousingSubject, CageType, LabMember, Lab
from actions.models import Session
from data.models import Dataset, DatasetType, DataRepository, FileRecord, DataFormat

SKIP_ONE_CACHE = False
try:
Expand Down Expand Up @@ -145,6 +153,62 @@ def test_move_subject(self):
@unittest.skipIf(SKIP_ONE_CACHE, 'Missing dependencies')
class ONECache(TestCase):
"""Tests for misc.management.commands.one_cache"""
fixtures = [
'data.datarepositorytype.json', 'data.datasettype.json',
'data.dataformat.json', 'misc.lab.json'
]

def setUp(self):
self.command = one_cache.Command()
tmp = tempfile.TemporaryDirectory()
self.addCleanup(tmp.cleanup)
self.tmp = Path(tmp.name)
# Create some sessions and datasets
lab = Lab.objects.first()
subject = Subject.objects.create(nickname='586', lab=lab)
repo = DataRepository.objects.create(
name='flatiron', globus_path='foo', lab=lab, globus_is_personal=True)
for i in range(5):
session = Session.objects.create(
subject=subject, number=i + 1, type='Experiment', task_protocol='foo', qc=QC.PASS)
for d in ('foo.bar.npy', 'bar.baz.bin'):
dtype, _ = DatasetType.objects.get_or_create(name=Path(d).stem)
format = DataFormat.objects.get(name=Path(d).suffix[1:])
dataset = Dataset.objects.create(
session=session, dataset_type=dtype, collection='alf', qc=QC.PASS,
name=d, data_format=format, file_size=(1024 * i) or None)
p = (f'{session.subject.nickname}/{session.start_time.date()}'
f'/{session.number:03d}/alf/{d}')
FileRecord.objects.create(
relative_path=p, dataset=dataset, data_repository=repo, exists=True)

def test_generate_tables(self):
"""Test ONE cache table generation."""
# Check table name validation
self.assertRaises(ValueError, self.command.handle, verbosity=1, tables=('foo',))
# Check table generation
self.command.handle(
destination=str(self.tmp), compress=False, verbosity=1,
tables=('sessions', 'datasets')
)
self.assertCountEqual(
['date_created', 'origin', 'min_api_version'], self.command.metadata)
tables = sorted(self.tmp.glob('*.pqt'))
self.assertEqual(len(tables), 2)
datasets, sessions = pd.read_parquet(tables[0]), pd.read_parquet(tables[1])
self.assertCountEqual(
datasets.reset_index().columns, DATASETS_COLUMNS + ('default_revision',))
self.assertTrue(all(datasets['rel_path'].str.startswith('alf/')))
self.assertCountEqual(sessions.reset_index().columns, SESSIONS_COLUMNS)
# Test QC and compression
self.command.handle(
destination=str(self.tmp), compress=True, verbosity=1, tables=('sessions',), qc=True)
zip_file = self.tmp / 'cache.zip'
self.assertTrue(zip_file.exists())
cache_info = self.tmp / 'cache_info.json'
self.assertTrue(cache_info.exists())
zip = zipfile.ZipFile(zip_file)
self.assertCountEqual(['sessions.pqt', 'cache_info.json', 'QC.json'], zip.namelist())

def test_s3_filesystem(self):
"""Test the _s3_filesystem function"""
Expand Down

0 comments on commit f3f1ee3

Please sign in to comment.