Skip to content

Commit

Permalink
Experiment: Get new models from SciCat
Browse files Browse the repository at this point in the history
  • Loading branch information
jl-wynen committed Oct 30, 2024
1 parent d02ced7 commit defa231
Show file tree
Hide file tree
Showing 8 changed files with 154 additions and 78 deletions.
170 changes: 122 additions & 48 deletions src/scitacean/_dataset_fields.py

Large diffs are not rendered by default.

35 changes: 20 additions & 15 deletions src/scitacean/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,14 +101,11 @@
from .thumbnail import Thumbnail


class DownloadDataset(
BaseModel, masked=("attachments", "datablocks", "history", "origdatablocks")
):
class DownloadDataset(BaseModel, masked=("history",)):
contactEmail: str | None = None
creationLocation: str | None = None
creationTime: datetime | None = None
inputDatasets: list[PID] | None = None
investigator: str | None = None
numberOfFilesArchived: NonNegativeInt | None = None
owner: str | None = None
ownerGroup: str | None = None
Expand All @@ -127,7 +124,7 @@ class DownloadDataset(
description: str | None = None
endTime: datetime | None = None
instrumentGroup: str | None = None
instrumentId: str | None = None
instrumentIds: list[str] | None = None
isPublished: bool | None = None
jobLogData: str | None = None
jobParameters: dict[str, Any] | None = None
Expand All @@ -141,12 +138,13 @@ class DownloadDataset(
ownerEmail: str | None = None
packedSize: NonNegativeInt | None = None
pid: PID | None = None
proposalId: str | None = None
proposalIds: list[str] | None = None
relationships: list[DownloadRelationship] | None = None
sampleId: str | None = None
sampleIds: list[str] | None = None
sharedWith: list[str] | None = None
size: NonNegativeInt | None = None
sourceFolderHost: str | None = None
startTime: datetime | None = None
techniques: list[DownloadTechnique] | None = None
updatedAt: datetime | None = None
updatedBy: str | None = None
Expand Down Expand Up @@ -195,6 +193,7 @@ class UploadDerivedDataset(BaseModel):
orcidOfOwner: str | None = None
ownerEmail: str | None = None
packedSize: NonNegativeInt | None = None
proposalId: str | None = None
relationships: list[UploadRelationship] | None = None
sharedWith: list[str] | None = None
size: NonNegativeInt | None = None
Expand All @@ -219,12 +218,15 @@ class UploadRawDataset(BaseModel):
contactEmail: str
creationLocation: str
creationTime: datetime
inputDatasets: list[PID]
investigator: str
numberOfFilesArchived: NonNegativeInt
owner: str
ownerGroup: str
principalInvestigator: str
sourceFolder: RemotePath
type: DatasetType
usedSoftware: list[str]
accessGroups: list[str] | None = None
classification: str | None = None
comment: str | None = None
Expand All @@ -235,6 +237,8 @@ class UploadRawDataset(BaseModel):
instrumentGroup: str | None = None
instrumentId: str | None = None
isPublished: bool | None = None
jobLogData: str | None = None
jobParameters: dict[str, Any] | None = None
keywords: list[str] | None = None
license: str | None = None
scientificMetadata: dict[str, Any] | None = None
Expand All @@ -249,6 +253,7 @@ class UploadRawDataset(BaseModel):
sharedWith: list[str] | None = None
size: NonNegativeInt | None = None
sourceFolderHost: str | None = None
startTime: datetime | None = None
techniques: list[UploadTechnique] | None = None
validationStatus: str | None = None

Expand Down Expand Up @@ -316,13 +321,13 @@ def download_model_type(cls) -> type[DownloadAttachment]:

class DownloadOrigDatablock(BaseModel):
dataFileList: list[DownloadDataFile] | None = None
datasetId: PID | None = None
size: NonNegativeInt | None = None
id: str | None = pydantic.Field(alias="_id", default=None)
accessGroups: list[str] | None = None
chkAlg: str | None = None
createdAt: datetime | None = None
createdBy: str | None = None
datasetId: PID | None = None
instrumentGroup: str | None = None
isPublished: bool | None = None
ownerGroup: str | None = None
Expand Down Expand Up @@ -472,9 +477,9 @@ def download_model_type(cls) -> type[DownloadRelationship]:


class DownloadHistory(BaseModel):
id: str | None = pydantic.Field(alias="_id", default=None)
id: str | None = None
updatedAt: datetime | None = None
updatedBy: datetime | None = None
updatedBy: str | None = None

@pydantic.field_validator("updatedAt", mode="before")
def _validate_datetime(cls, value: Any) -> Any:
Expand Down Expand Up @@ -764,20 +769,20 @@ def download_model_type(cls) -> type[DownloadRelationship]:

@dataclass(kw_only=True, slots=True)
class History(BaseUserModel):
__id: str | None = None
_id: str | None = None
_updated_at: datetime | None = None
_updated_by: datetime | None = None
_updated_by: str | None = None

@property
def _id(self) -> str | None:
return self.__id
def id(self) -> str | None:
return self._id

@property
def updated_at(self) -> datetime | None:
return self._updated_at

@property
def updated_by(self) -> datetime | None:
def updated_by(self) -> str | None:
return self._updated_by

@classmethod
Expand Down
8 changes: 4 additions & 4 deletions tests/client/dataset_client_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def derived_dataset(scicat_access):
@pytest.mark.parametrize("key", ["raw", "derived"])
def test_get_dataset_model(scicat_client, key):
dset = INITIAL_DATASETS[key]
downloaded = scicat_client.get_dataset_model(dset.pid)
downloaded = scicat_client.get_dataset_model(dset.pid, strict_validation=True)
# The backend may update the dataset after upload.
# We cannot easily predict when that happens.
downloaded.updatedAt = dset.updatedAt
Expand All @@ -57,7 +57,7 @@ def test_get_dataset_model_bad_id(scicat_client):

def test_create_dataset_model(scicat_client, derived_dataset):
finalized = scicat_client.create_dataset_model(derived_dataset)
downloaded = scicat_client.get_dataset_model(finalized.pid)
downloaded = scicat_client.get_dataset_model(finalized.pid, strict_validation=True)
for key, expected in finalized:
# The database populates a number of fields that are None in dset.
# But we don't want to test those here as we don't want to test the database.
Expand All @@ -75,7 +75,7 @@ def test_validate_dataset_model(real_client, require_scicat_backend, derived_dat
def test_get_dataset(client):
dset = INITIAL_DATASETS["raw"]
dblock = INITIAL_ORIG_DATABLOCKS["raw"][0]
downloaded = client.get_dataset(dset.pid)
downloaded = client.get_dataset(dset.pid, strict_validation=True)

assert downloaded.source_folder == dset.sourceFolder
assert downloaded.creation_time == dset.creationTime
Expand All @@ -96,7 +96,7 @@ def test_can_get_public_dataset_without_login(require_scicat_backend, scicat_acc

dset = INITIAL_DATASETS["public"]
dblock = INITIAL_ORIG_DATABLOCKS["public"][0]
downloaded = client.get_dataset(dset.pid)
downloaded = client.get_dataset(dset.pid, strict_validation=True)

assert downloaded.source_folder == dset.sourceFolder
assert downloaded.creation_time == dset.creationTime
Expand Down
2 changes: 1 addition & 1 deletion tools/model-generation/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ python generate_models.py --launch-scicat
```

This overwrites the relevant files in the source directory.
If will clean up the docker resources afterwards.
It will clean up the docker resources afterward.

See `generate_models.py` for options to configure the schema URL and output file paths.

Expand Down
4 changes: 2 additions & 2 deletions tools/model-generation/spec/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,8 @@ def _collect_schemas(
) -> dict[str, _UpDownSchemas | _DatasetSchemas]:
return {
"Dataset": _DatasetSchemas(
upload_derived=schemas["CreateDerivedDatasetDto"],
upload_raw=schemas["CreateRawDatasetDto"],
upload_derived=schemas["CreateDerivedDatasetObsoleteDto"],
upload_raw=schemas["CreateRawDatasetObsoleteDto"],
download=schemas["DatasetClass"],
),
**{
Expand Down
3 changes: 0 additions & 3 deletions tools/model-generation/spec/masked-fields.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,5 @@
# what model to mask it in.
# Field names must be SciCat names (camelCase).
Dataset:
- attachments
- datablocks
- history # because history is dropped (see field-validations.yml)
- origdatablocks
- datasetlifecycle: upload
8 changes: 4 additions & 4 deletions tools/model-generation/spec/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ def parse_field_type(spec: dict[str, Any]):
return parse_field_type(spec["allOf"][0])
if "$ref" in spec:
return spec["$ref"].rsplit("/", 1)[1]
if "enum" in spec:
if spec["type"] != "string":
raise ValueError(f"Enum fields must have type 'string', got: {spec}")
return "Enum[" + ", ".join(spec["enum"]) + "]"
# if "enum" in spec:
# if spec["type"] != "string":
# raise ValueError(f"Enum fields must have type 'string', got: {spec}")
# return "Enum[" + ", ".join(spec["enum"]) + "]"
if spec["type"] == "number":
return "int"
if spec["type"] == "string":
Expand Down
2 changes: 1 addition & 1 deletion tools/model-generation/templates/model.py.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

{% macro mask_keyword(spec, kind) %}
{% if kind == "download" and spec.masked_fields_download %}
, masked=({{ spec.masked_fields_download|map("quote")|join(", ") }})
, masked=({{ spec.masked_fields_download|map("quote")|join(", ") }},)
{% endif %}
{% endmacro %}

Expand Down

0 comments on commit defa231

Please sign in to comment.