-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
327 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,210 @@ | ||
# SPDX-License-Identifier: BSD-3-Clause | ||
# Copyright (c) 2024 SciCat Project (https://github.com/SciCatProject/scitacean) | ||
|
||
import pytest | ||
from dateutil.parser import parse as parse_datetime | ||
|
||
from scitacean import Client, DatasetType, RemotePath, model | ||
from scitacean.testing.backend import skip_if_not_backend | ||
from scitacean.testing.backend.config import SciCatAccess | ||
|
||
UPLOAD_DATASETS = { | ||
"raw1": model.UploadRawDataset( | ||
ownerGroup="PLACEHOLDER", | ||
accessGroups=["uu", "faculty"], | ||
contactEmail="[email protected]", | ||
creationTime=parse_datetime("2004-06-13T01:45:28.100Z"), | ||
datasetName="dataset 1", | ||
numberOfFiles=0, | ||
numberOfFilesArchived=0, | ||
owner="PLACEHOLDER", | ||
sourceFolder=RemotePath("/hex/raw1"), | ||
type=DatasetType.RAW, | ||
principalInvestigator="investigator 1", | ||
creationLocation="UU", | ||
proposalId="p0124", | ||
), | ||
"raw2": model.UploadRawDataset( | ||
ownerGroup="PLACEHOLDER", | ||
accessGroups=["uu", "faculty"], | ||
contactEmail="[email protected]", | ||
creationTime=parse_datetime("2004-06-14T14:00:30Z"), | ||
datasetName="dataset 2", | ||
numberOfFiles=0, | ||
numberOfFilesArchived=0, | ||
owner="PLACEHOLDER", | ||
sourceFolder=RemotePath("/hex/raw2"), | ||
type=DatasetType.RAW, | ||
principalInvestigator="investigator 2", | ||
creationLocation="UU", | ||
proposalId="p0124", | ||
), | ||
"raw3": model.UploadRawDataset( | ||
ownerGroup="PLACEHOLDER", | ||
accessGroups=["uu", "faculty"], | ||
contactEmail="[email protected]", | ||
creationTime=parse_datetime("2004-06-10T00:13:13Z"), | ||
datasetName="dataset 3", | ||
numberOfFiles=0, | ||
numberOfFilesArchived=0, | ||
owner="PLACEHOLDER", | ||
sourceFolder=RemotePath("/hex/raw3"), | ||
type=DatasetType.RAW, | ||
principalInvestigator="investigator 1", | ||
creationLocation="UU", | ||
proposalId="p0124", | ||
), | ||
"raw4": model.UploadRawDataset( | ||
ownerGroup="PLACEHOLDER", | ||
accessGroups=["uu", "faculty"], | ||
contactEmail="[email protected]", | ||
creationTime=parse_datetime("2005-11-03T21:56:02Z"), | ||
datasetName="dataset 1", | ||
numberOfFiles=0, | ||
numberOfFilesArchived=0, | ||
owner="PLACEHOLDER", | ||
sourceFolder=RemotePath("/hex/raw4"), | ||
type=DatasetType.RAW, | ||
principalInvestigator="investigator X", | ||
creationLocation="UU", | ||
), | ||
"derived1": model.UploadDerivedDataset( | ||
ownerGroup="PLACEHOLDER", | ||
accessGroups=["uu", "faculty"], | ||
contactEmail="[email protected]", | ||
creationTime=parse_datetime("2004-10-02T08:47:33Z"), | ||
datasetName="dataset 1", | ||
numberOfFiles=0, | ||
numberOfFilesArchived=0, | ||
owner="PLACEHOLDER", | ||
sourceFolder=RemotePath("/hex/derived1"), | ||
type=DatasetType.DERIVED, | ||
investigator="investigator 1", | ||
inputDatasets=[], | ||
usedSoftware=["scitacean"], | ||
), | ||
"derived2": model.UploadDerivedDataset( | ||
ownerGroup="PLACEHOLDER", | ||
accessGroups=["uu", "faculty"], | ||
contactEmail="[email protected]", | ||
creationTime=parse_datetime("2004-10-14T09:18:58Z"), | ||
datasetName="derived dataset 2", | ||
numberOfFiles=0, | ||
numberOfFilesArchived=0, | ||
owner="PLACEHOLDER", | ||
sourceFolder=RemotePath("/hex/derived2"), | ||
type=DatasetType.DERIVED, | ||
investigator="investigator 1", | ||
inputDatasets=[], | ||
usedSoftware=["scitacean"], | ||
), | ||
} | ||
SEED = {} | ||
|
||
|
||
@pytest.fixture(scope="module", autouse=True) | ||
def seed_database(request: pytest.FixtureRequest, scicat_access: SciCatAccess) -> None: | ||
skip_if_not_backend(request) | ||
|
||
client = Client.from_credentials( | ||
url=scicat_access.url, | ||
**scicat_access.user.credentials, # type: ignore[arg-type] | ||
) | ||
for key, dset in UPLOAD_DATASETS.items(): | ||
dset.ownerGroup = scicat_access.user.group | ||
dset.owner = scicat_access.user.username | ||
SEED[key] = client.scicat.create_dataset_model(dset) | ||
|
||
|
||
def test_query_dataset_multiple_by_single_field(real_client, seed_database): | ||
datasets = real_client.scicat.query_datasets({"proposalId": "p0124"}) | ||
actual = {ds.pid: ds for ds in datasets} | ||
expected = {SEED[key].pid: SEED[key] for key in ("raw1", "raw2", "raw3")} | ||
assert actual == expected | ||
|
||
|
||
def test_query_dataset_no_match(real_client, seed_database): | ||
datasets = real_client.scicat.query_datasets({"owner": "librarian"}) | ||
assert not datasets | ||
|
||
|
||
def test_query_dataset_multiple_by_multiple_fields(real_client, seed_database): | ||
datasets = real_client.scicat.query_datasets( | ||
{"proposalId": "p0124", "principalInvestigator": "investigator 1"}, | ||
) | ||
actual = {ds.pid: ds for ds in datasets} | ||
expected = {SEED[key].pid: SEED[key] for key in ("raw1", "raw3")} | ||
assert actual == expected | ||
|
||
|
||
def test_query_dataset_multiple_by_derived_field(real_client, seed_database): | ||
datasets = real_client.scicat.query_datasets( | ||
{"investigator": "investigator 1"}, | ||
) | ||
actual = {ds.pid: ds for ds in datasets} | ||
expected = {SEED[key].pid: SEED[key] for key in ("derived1", "derived2")} | ||
assert actual == expected | ||
|
||
|
||
def test_query_dataset_uses_conjunction_of_fields(real_client, seed_database): | ||
datasets = real_client.scicat.query_datasets( | ||
{"proposalId": "p0124", "investigator": "investigator X"}, | ||
) | ||
assert not datasets | ||
|
||
|
||
def test_query_dataset_can_use_custom_type(real_client, seed_database): | ||
datasets = real_client.scicat.query_datasets( | ||
{"sourceFolder": RemotePath("/hex/raw4")}, | ||
) | ||
expected = [SEED["raw4"]] | ||
assert datasets == expected | ||
|
||
|
||
def test_query_dataset_set_order(real_client, seed_database): | ||
datasets = real_client.scicat.query_datasets( | ||
{"proposalId": "p0124"}, | ||
order="creationTime:desc", | ||
) | ||
# This test uses a list to check the order | ||
expected = [SEED[key] for key in ("raw2", "raw1", "raw3")] | ||
assert datasets == expected | ||
|
||
|
||
def test_query_dataset_limit_ascending_creation_time(real_client, seed_database): | ||
datasets = real_client.scicat.query_datasets( | ||
{"proposalId": "p0124"}, | ||
limit=2, | ||
order="creationTime:asc", | ||
) | ||
actual = {ds.pid: ds for ds in datasets} | ||
expected = {SEED[key].pid: SEED[key] for key in ("raw1", "raw3")} | ||
assert actual == expected | ||
|
||
|
||
def test_query_dataset_limit_descending_creation_time(real_client, seed_database): | ||
datasets = real_client.scicat.query_datasets( | ||
{"proposalId": "p0124"}, | ||
limit=2, | ||
order="creationTime:desc", | ||
) | ||
actual = {ds.pid: ds for ds in datasets} | ||
expected = {SEED[key].pid: SEED[key] for key in ("raw1", "raw2")} | ||
assert actual == expected | ||
|
||
|
||
def test_query_dataset_limit_needs_order(real_client, seed_database): | ||
with pytest.raises(ValueError, match="limit"): | ||
real_client.scicat.query_datasets( | ||
{"proposalId": "p0124"}, | ||
limit=2, | ||
) | ||
|
||
|
||
def test_query_dataset_all(real_client, seed_database): | ||
datasets = real_client.scicat.query_datasets({}) | ||
actual = {ds.pid: ds for ds in datasets} | ||
# We cannot test `datasets` directly because there are other datasets | ||
# in the database from other tests. | ||
for ds in SEED.values(): | ||
assert actual[ds.pid] == ds |