Work around schema inconsistencies

SciCatProject · Dec 16, 2024 · bf29ac8 · bf29ac8
1 parent 0ad5b52
commit bf29ac8
Show file tree

Hide file tree

Showing 10 changed files with 76 additions and 77 deletions.
diff --git a/src/scitacean/_dataset_fields.py b/src/scitacean/_dataset_fields.py
@@ -99,7 +99,7 @@ def used_by(self, dataset_type: DatasetType) -> bool:
         ),
         Field(
             name="access_groups",
-            description="List of groups which have access to this item.",
+            description="Optional additional groups which have read access to the data. Users which are members in one of the groups listed here are allowed to access this data. The special group 'public' makes data available to all users.",
             read_only=False,
             required=False,
             scicat_name="accessGroups",
@@ -199,7 +199,7 @@ def used_by(self, dataset_type: DatasetType) -> bool:
         ),
         Field(
             name="data_quality_metrics",
-            description="Data Quality Metrics is a number given by the user to rate the dataset.",
+            description="Data Quality Metrics given by the user to rate the dataset.",
             read_only=False,
             required=False,
             scicat_name="dataQualityMetrics",
@@ -239,7 +239,7 @@ def used_by(self, dataset_type: DatasetType) -> bool:
         ),
         Field(
             name="instrument_group",
-            description="Group of the instrument which this item was acquired on.",
+            description="Optional additional groups which have read and write access to the data. Users which are members in one of the groups listed here are allowed to access this data.",
             read_only=False,
             required=False,
             scicat_name="instrumentGroup",
@@ -248,12 +248,12 @@ def used_by(self, dataset_type: DatasetType) -> bool:
             used_by_raw=True,
         ),
         Field(
-            name="instrument_ids",
+            name="instrument_id",
             description="ID of the instrument where the data was created.",
             read_only=False,
             required=False,
-            scicat_name="instrumentIds",
-            type=list[str],
+            scicat_name="instrumentId",
+            type=str,
             used_by_derived=False,
             used_by_raw=True,
         ),
@@ -379,7 +379,7 @@ def used_by(self, dataset_type: DatasetType) -> bool:
         ),
         Field(
             name="owner_group",
-            description="Name of the group owning this item.",
+            description="Defines the group which owns the data, and therefore has unrestricted access to this data. Usually a pgroup like p12151",
             read_only=False,
             required=True,
             scicat_name="ownerGroup",
@@ -389,7 +389,7 @@ def used_by(self, dataset_type: DatasetType) -> bool:
         ),
         Field(
             name="pid",
-            description="Persistent identifier of the dataset.",
+            description="Persistent Identifier for datasets derived from UUIDv4 and prepended automatically by site specific PID prefix like 20.500.12345/",
             read_only=True,
             required=False,
             scicat_name="pid",
@@ -408,10 +408,20 @@ def used_by(self, dataset_type: DatasetType) -> bool:
             used_by_raw=True,
         ),
         Field(
-            name="proposal_ids",
+            name="proposal_id",
             description="The ID of the proposal to which the dataset belongs.",
             read_only=False,
             required=False,
+            scicat_name="proposalId",
+            type=str,
+            used_by_derived=True,
+            used_by_raw=True,
+        ),
+        Field(
+            name="proposal_ids",
+            description="The ID of the proposal to which the dataset belongs to and it has been acquired under.",
+            read_only=True,
+            required=False,
             scicat_name="proposalIds",
             type=list[str],
             used_by_derived=True,
@@ -428,22 +438,12 @@ def used_by(self, dataset_type: DatasetType) -> bool:
             used_by_raw=True,
         ),
         Field(
-            name="run_number",
-            description="Run number assigned by the system to the data acquisition for the current dataset.",
-            read_only=False,
-            required=False,
-            scicat_name="runNumber",
-            type=str,
-            used_by_derived=True,
-            used_by_raw=True,
-        ),
-        Field(
-            name="sample_ids",
+            name="sample_id",
             description="ID of the sample used when collecting the data.",
             read_only=False,
             required=False,
-            scicat_name="sampleIds",
-            type=list[str],
+            scicat_name="sampleId",
+            type=str,
             used_by_derived=False,
             used_by_raw=True,
         ),
@@ -565,6 +565,7 @@ def used_by(self, dataset_type: DatasetType) -> bool:
         "_end_time",
         "_input_datasets",
         "_instrument_group",
+        "_instrument_id",
         "_instrument_ids",
         "_investigator",
         "_is_published",
@@ -580,9 +581,10 @@ def used_by(self, dataset_type: DatasetType) -> bool:
         "_owner_group",
         "_pid",
         "_principal_investigator",
+        "_proposal_id",
         "_proposal_ids",
         "_relationships",
-        "_run_number",
+        "_sample_id",
         "_sample_ids",
         "_shared_with",
         "_source_folder",
@@ -615,7 +617,7 @@ def __init__(
         end_time: datetime | None = None,
         input_datasets: list[PID] | None = None,
         instrument_group: str | None = None,
-        instrument_ids: list[str] | None = None,
+        instrument_id: str | None = None,
         investigator: str | None = None,
         is_published: bool | None = None,
         job_log_data: str | None = None,
@@ -628,10 +630,9 @@ def __init__(
         owner_email: str | None = None,
         owner_group: str | None = None,
         principal_investigator: str | None = None,
-        proposal_ids: list[str] | None = None,
+        proposal_id: str | None = None,
         relationships: list[Relationship] | None = None,
-        run_number: str | None = None,
-        sample_ids: list[str] | None = None,
+        sample_id: str | None = None,
         shared_with: list[str] | None = None,
         source_folder: RemotePath | str | None = None,
         source_folder_host: str | None = None,
@@ -655,7 +656,7 @@ def __init__(
         self._end_time = end_time
         self._input_datasets = input_datasets
         self._instrument_group = instrument_group
-        self._instrument_ids = instrument_ids
+        self._instrument_id = instrument_id
         self._investigator = investigator
         self._is_published = is_published
         self._job_log_data = job_log_data
@@ -668,10 +669,9 @@ def __init__(
         self._owner_email = owner_email
         self._owner_group = owner_group
         self._principal_investigator = principal_investigator
-        self._proposal_ids = proposal_ids
+        self._proposal_id = proposal_id
         self._relationships = relationships
-        self._run_number = run_number
-        self._sample_ids = sample_ids
+        self._sample_id = sample_id
         self._shared_with = shared_with
         self._source_folder = _parse_remote_path(source_folder)
         self._source_folder_host = source_folder_host
@@ -698,12 +698,12 @@ def __init__(
 
     @property
     def access_groups(self) -> list[str] | None:
-        """List of groups which have access to this item."""
+        """Optional additional groups which have read access to the data. Users which are members in one of the groups listed here are allowed to access this data. The special group 'public' makes data available to all users."""
         return self._access_groups
 
     @access_groups.setter
     def access_groups(self, access_groups: list[str] | None) -> None:
-        """List of groups which have access to this item."""
+        """Optional additional groups which have read access to the data. Users which are members in one of the groups listed here are allowed to access this data. The special group 'public' makes data available to all users."""
         self._access_groups = access_groups
 
     @property
@@ -783,12 +783,12 @@ def data_format(self, data_format: str | None) -> None:
 
     @property
     def data_quality_metrics(self) -> int | None:
-        """Data Quality Metrics is a number given by the user to rate the dataset."""
+        """Data Quality Metrics given by the user to rate the dataset."""
         return self._data_quality_metrics
 
     @data_quality_metrics.setter
     def data_quality_metrics(self, data_quality_metrics: int | None) -> None:
-        """Data Quality Metrics is a number given by the user to rate the dataset."""
+        """Data Quality Metrics given by the user to rate the dataset."""
         self._data_quality_metrics = data_quality_metrics
 
     @property
@@ -823,23 +823,23 @@ def input_datasets(self, input_datasets: list[PID] | None) -> None:
 
     @property
     def instrument_group(self) -> str | None:
-        """Group of the instrument which this item was acquired on."""
+        """Optional additional groups which have read and write access to the data. Users which are members in one of the groups listed here are allowed to access this data."""
         return self._instrument_group
 
     @instrument_group.setter
     def instrument_group(self, instrument_group: str | None) -> None:
-        """Group of the instrument which this item was acquired on."""
+        """Optional additional groups which have read and write access to the data. Users which are members in one of the groups listed here are allowed to access this data."""
         self._instrument_group = instrument_group
 
     @property
-    def instrument_ids(self) -> list[str] | None:
+    def instrument_id(self) -> str | None:
         """ID of the instrument where the data was created."""
-        return self._instrument_ids
+        return self._instrument_id
 
-    @instrument_ids.setter
-    def instrument_ids(self, instrument_ids: list[str] | None) -> None:
+    @instrument_id.setter
+    def instrument_id(self, instrument_id: str | None) -> None:
         """ID of the instrument where the data was created."""
-        self._instrument_ids = instrument_ids
+        self._instrument_id = instrument_id
 
     @property
     def instrument_ids(self) -> list[str] | None:
@@ -953,17 +953,17 @@ def owner_email(self, owner_email: str | None) -> None:
 
     @property
     def owner_group(self) -> str | None:
-        """Name of the group owning this item."""
+        """Defines the group which owns the data, and therefore has unrestricted access to this data. Usually a pgroup like p12151"""
         return self._owner_group
 
     @owner_group.setter
     def owner_group(self, owner_group: str | None) -> None:
-        """Name of the group owning this item."""
+        """Defines the group which owns the data, and therefore has unrestricted access to this data. Usually a pgroup like p12151"""
         self._owner_group = owner_group
 
     @property
     def pid(self) -> PID | None:
-        """Persistent identifier of the dataset."""
+        """Persistent Identifier for datasets derived from UUIDv4 and prepended automatically by site specific PID prefix like 20.500.12345/"""
         return self._pid
 
     @property
@@ -977,14 +977,14 @@ def principal_investigator(self, principal_investigator: str | None) -> None:
         self._principal_investigator = principal_investigator
 
     @property
-    def proposal_ids(self) -> list[str] | None:
+    def proposal_id(self) -> str | None:
         """The ID of the proposal to which the dataset belongs."""
-        return self._proposal_ids
+        return self._proposal_id
 
-    @proposal_ids.setter
-    def proposal_ids(self, proposal_ids: list[str] | None) -> None:
+    @proposal_id.setter
+    def proposal_id(self, proposal_id: str | None) -> None:
         """The ID of the proposal to which the dataset belongs."""
-        self._proposal_ids = proposal_ids
+        self._proposal_id = proposal_id
 
     @property
     def proposal_ids(self) -> list[str] | None:
@@ -1002,24 +1002,14 @@ def relationships(self, relationships: list[Relationship] | None) -> None:
         self._relationships = relationships
 
     @property
-    def run_number(self) -> str | None:
-        """Run number assigned by the system to the data acquisition for the current dataset."""
-        return self._run_number
-
-    @run_number.setter
-    def run_number(self, run_number: str | None) -> None:
-        """Run number assigned by the system to the data acquisition for the current dataset."""
-        self._run_number = run_number
-
-    @property
-    def sample_ids(self) -> list[str] | None:
+    def sample_id(self) -> str | None:
         """ID of the sample used when collecting the data."""
-        return self._sample_ids
+        return self._sample_id
 
-    @sample_ids.setter
-    def sample_ids(self, sample_ids: str | None) -> None:
+    @sample_id.setter
+    def sample_id(self, sample_id: str | None) -> None:
         """ID of the sample used when collecting the data."""
-        self._sample_ids = sample_ids
+        self._sample_id = sample_id
 
     @property
     def sample_ids(self) -> list[str] | None:
@@ -1130,7 +1120,9 @@ def _prepare_fields_from_download(
         for field in DatasetBase._FIELD_SPEC:
             if field.read_only:
                 read_only["_" + field.name] = getattr(download_model, field.scicat_name)
-            else:
+            elif hasattr(
+                download_model, field.scicat_name
+            ):  # TODO remove condition in API v4
                 init_args[field.name] = getattr(download_model, field.scicat_name)
 
         init_args["meta"] = download_model.scientificMetadata

diff --git a/src/scitacean/model.py b/src/scitacean/model.py
@@ -219,14 +219,14 @@ class UploadRawDataset(BaseModel):
     creationLocation: str
     creationTime: datetime
     inputDatasets: list[PID]
-    investigator: str | None = None
     numberOfFilesArchived: NonNegativeInt
     owner: str
     ownerGroup: str
     principalInvestigator: str
     sourceFolder: RemotePath
     type: DatasetType
     usedSoftware: list[str]
+    investigator: str | None = None
     accessGroups: list[str] | None = None
     classification: str | None = None
     comment: str | None = None

diff --git a/src/scitacean/testing/backend/seed.py b/src/scitacean/testing/backend/seed.py
@@ -48,6 +48,7 @@
         ownerEmail="[email protected]",
         sourceFolder=RemotePath("/hex/data/123"),
         type=DatasetType.RAW,
+        investigator="Ponder Stibbons",
         principalInvestigator="Ponder Stibbons",
         creationLocation=SITE,
         techniques=[UploadTechnique(pid="DM666", name="dark_magic")],
@@ -95,6 +96,7 @@
         ownerEmail="[email protected]",
         sourceFolder=RemotePath("/hex/secret/stuff"),
         type=DatasetType.RAW,
+        investigator="Mustrum Ridcully",
         principalInvestigator="Mustrum Ridcully",
         creationLocation=SITE,
         techniques=[UploadTechnique(pid="S", name="shoes")],

diff --git a/tests/client/attachment_client_test.py b/tests/client/attachment_client_test.py
@@ -120,7 +120,7 @@ def test_create_attachment_for_dataset_for_dataset_populates_ids(
     assert finalized.id is not None
     assert finalized.datasetId is not None
     assert finalized.sampleId is None
-    assert finalized.proposalIds is None
+    assert finalized.proposalId is None
 
 
 def test_get_attachments_for_dataset(scicat_client):

diff --git a/tests/client/dataset_client_test.py b/tests/client/dataset_client_test.py
@@ -76,6 +76,7 @@ def test_get_dataset(client):
     dset = INITIAL_DATASETS["raw"]
     dblock = INITIAL_ORIG_DATABLOCKS["raw"][0]
     downloaded = client.get_dataset(dset.pid, strict_validation=True)
+    print(downloaded.source_folder)
 
     assert downloaded.source_folder == dset.sourceFolder
     assert downloaded.creation_time == dset.creationTime

diff --git a/tests/client/query_client_test.py b/tests/client/query_client_test.py
@@ -22,7 +22,7 @@
         type=DatasetType.RAW,
         principalInvestigator="investigator 1",
         creationLocation="UU",
-        proposalIds=["p0124"],
+        proposalId="p0124",
         inputDatasets=[],
         usedSoftware=["scitacean"],
     ),
@@ -39,7 +39,7 @@
         type=DatasetType.RAW,
         principalInvestigator="investigator 2",
         creationLocation="UU",
-        proposalIds=["p0124"],
+        proposalId="p0124",
         inputDatasets=[],
         usedSoftware=[],
     ),
@@ -56,7 +56,7 @@
         type=DatasetType.RAW,
         principalInvestigator="investigator 1",
         creationLocation="UU",
-        proposalIds=["p0124"],
+        proposalId="p0124",
         inputDatasets=[],
         usedSoftware=["scitacean"],
     ),