diff --git a/src/scitacean/_dataset_fields.py b/src/scitacean/_dataset_fields.py index 40fdf74d..6b994aa4 100644 --- a/src/scitacean/_dataset_fields.py +++ b/src/scitacean/_dataset_fields.py @@ -99,7 +99,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="access_groups", - description="List of groups which have access to this item.", + description="Optional additional groups which have read access to the data. Users which are members in one of the groups listed here are allowed to access this data. The special group 'public' makes data available to all users.", read_only=False, required=False, scicat_name="accessGroups", @@ -199,7 +199,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="data_quality_metrics", - description="Data Quality Metrics is a number given by the user to rate the dataset.", + description="Data Quality Metrics given by the user to rate the dataset.", read_only=False, required=False, scicat_name="dataQualityMetrics", @@ -239,7 +239,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="instrument_group", - description="Group of the instrument which this item was acquired on.", + description="Optional additional groups which have read and write access to the data. Users which are members in one of the groups listed here are allowed to access this data.", read_only=False, required=False, scicat_name="instrumentGroup", @@ -248,12 +248,12 @@ def used_by(self, dataset_type: DatasetType) -> bool: used_by_raw=True, ), Field( - name="instrument_ids", + name="instrument_id", description="ID of the instrument where the data was created.", read_only=False, required=False, - scicat_name="instrumentIds", - type=list[str], + scicat_name="instrumentId", + type=str, used_by_derived=False, used_by_raw=True, ), @@ -379,7 +379,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="owner_group", - description="Name of the group owning this item.", + description="Defines the group which owns the data, and therefore has unrestricted access to this data. Usually a pgroup like p12151", read_only=False, required=True, scicat_name="ownerGroup", @@ -389,7 +389,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: ), Field( name="pid", - description="Persistent identifier of the dataset.", + description="Persistent Identifier for datasets derived from UUIDv4 and prepended automatically by site specific PID prefix like 20.500.12345/", read_only=True, required=False, scicat_name="pid", @@ -408,10 +408,20 @@ def used_by(self, dataset_type: DatasetType) -> bool: used_by_raw=True, ), Field( - name="proposal_ids", + name="proposal_id", description="The ID of the proposal to which the dataset belongs.", read_only=False, required=False, + scicat_name="proposalId", + type=str, + used_by_derived=True, + used_by_raw=True, + ), + Field( + name="proposal_ids", + description="The ID of the proposal to which the dataset belongs to and it has been acquired under.", + read_only=True, + required=False, scicat_name="proposalIds", type=list[str], used_by_derived=True, @@ -428,22 +438,12 @@ def used_by(self, dataset_type: DatasetType) -> bool: used_by_raw=True, ), Field( - name="run_number", - description="Run number assigned by the system to the data acquisition for the current dataset.", - read_only=False, - required=False, - scicat_name="runNumber", - type=str, - used_by_derived=True, - used_by_raw=True, - ), - Field( - name="sample_ids", + name="sample_id", description="ID of the sample used when collecting the data.", read_only=False, required=False, - scicat_name="sampleIds", - type=list[str], + scicat_name="sampleId", + type=str, used_by_derived=False, used_by_raw=True, ), @@ -565,6 +565,7 @@ def used_by(self, dataset_type: DatasetType) -> bool: "_end_time", "_input_datasets", "_instrument_group", + "_instrument_id", "_instrument_ids", "_investigator", "_is_published", @@ -580,9 +581,10 @@ def used_by(self, dataset_type: DatasetType) -> bool: "_owner_group", "_pid", "_principal_investigator", + "_proposal_id", "_proposal_ids", "_relationships", - "_run_number", + "_sample_id", "_sample_ids", "_shared_with", "_source_folder", @@ -615,7 +617,7 @@ def __init__( end_time: datetime | None = None, input_datasets: list[PID] | None = None, instrument_group: str | None = None, - instrument_ids: list[str] | None = None, + instrument_id: str | None = None, investigator: str | None = None, is_published: bool | None = None, job_log_data: str | None = None, @@ -628,10 +630,9 @@ def __init__( owner_email: str | None = None, owner_group: str | None = None, principal_investigator: str | None = None, - proposal_ids: list[str] | None = None, + proposal_id: str | None = None, relationships: list[Relationship] | None = None, - run_number: str | None = None, - sample_ids: list[str] | None = None, + sample_id: str | None = None, shared_with: list[str] | None = None, source_folder: RemotePath | str | None = None, source_folder_host: str | None = None, @@ -655,7 +656,7 @@ def __init__( self._end_time = end_time self._input_datasets = input_datasets self._instrument_group = instrument_group - self._instrument_ids = instrument_ids + self._instrument_id = instrument_id self._investigator = investigator self._is_published = is_published self._job_log_data = job_log_data @@ -668,10 +669,9 @@ def __init__( self._owner_email = owner_email self._owner_group = owner_group self._principal_investigator = principal_investigator - self._proposal_ids = proposal_ids + self._proposal_id = proposal_id self._relationships = relationships - self._run_number = run_number - self._sample_ids = sample_ids + self._sample_id = sample_id self._shared_with = shared_with self._source_folder = _parse_remote_path(source_folder) self._source_folder_host = source_folder_host @@ -698,12 +698,12 @@ def __init__( @property def access_groups(self) -> list[str] | None: - """List of groups which have access to this item.""" + """Optional additional groups which have read access to the data. Users which are members in one of the groups listed here are allowed to access this data. The special group 'public' makes data available to all users.""" return self._access_groups @access_groups.setter def access_groups(self, access_groups: list[str] | None) -> None: - """List of groups which have access to this item.""" + """Optional additional groups which have read access to the data. Users which are members in one of the groups listed here are allowed to access this data. The special group 'public' makes data available to all users.""" self._access_groups = access_groups @property @@ -783,12 +783,12 @@ def data_format(self, data_format: str | None) -> None: @property def data_quality_metrics(self) -> int | None: - """Data Quality Metrics is a number given by the user to rate the dataset.""" + """Data Quality Metrics given by the user to rate the dataset.""" return self._data_quality_metrics @data_quality_metrics.setter def data_quality_metrics(self, data_quality_metrics: int | None) -> None: - """Data Quality Metrics is a number given by the user to rate the dataset.""" + """Data Quality Metrics given by the user to rate the dataset.""" self._data_quality_metrics = data_quality_metrics @property @@ -823,23 +823,23 @@ def input_datasets(self, input_datasets: list[PID] | None) -> None: @property def instrument_group(self) -> str | None: - """Group of the instrument which this item was acquired on.""" + """Optional additional groups which have read and write access to the data. Users which are members in one of the groups listed here are allowed to access this data.""" return self._instrument_group @instrument_group.setter def instrument_group(self, instrument_group: str | None) -> None: - """Group of the instrument which this item was acquired on.""" + """Optional additional groups which have read and write access to the data. Users which are members in one of the groups listed here are allowed to access this data.""" self._instrument_group = instrument_group @property - def instrument_ids(self) -> list[str] | None: + def instrument_id(self) -> str | None: """ID of the instrument where the data was created.""" - return self._instrument_ids + return self._instrument_id - @instrument_ids.setter - def instrument_ids(self, instrument_ids: list[str] | None) -> None: + @instrument_id.setter + def instrument_id(self, instrument_id: str | None) -> None: """ID of the instrument where the data was created.""" - self._instrument_ids = instrument_ids + self._instrument_id = instrument_id @property def instrument_ids(self) -> list[str] | None: @@ -953,17 +953,17 @@ def owner_email(self, owner_email: str | None) -> None: @property def owner_group(self) -> str | None: - """Name of the group owning this item.""" + """Defines the group which owns the data, and therefore has unrestricted access to this data. Usually a pgroup like p12151""" return self._owner_group @owner_group.setter def owner_group(self, owner_group: str | None) -> None: - """Name of the group owning this item.""" + """Defines the group which owns the data, and therefore has unrestricted access to this data. Usually a pgroup like p12151""" self._owner_group = owner_group @property def pid(self) -> PID | None: - """Persistent identifier of the dataset.""" + """Persistent Identifier for datasets derived from UUIDv4 and prepended automatically by site specific PID prefix like 20.500.12345/""" return self._pid @property @@ -977,14 +977,14 @@ def principal_investigator(self, principal_investigator: str | None) -> None: self._principal_investigator = principal_investigator @property - def proposal_ids(self) -> list[str] | None: + def proposal_id(self) -> str | None: """The ID of the proposal to which the dataset belongs.""" - return self._proposal_ids + return self._proposal_id - @proposal_ids.setter - def proposal_ids(self, proposal_ids: list[str] | None) -> None: + @proposal_id.setter + def proposal_id(self, proposal_id: str | None) -> None: """The ID of the proposal to which the dataset belongs.""" - self._proposal_ids = proposal_ids + self._proposal_id = proposal_id @property def proposal_ids(self) -> list[str] | None: @@ -1002,24 +1002,14 @@ def relationships(self, relationships: list[Relationship] | None) -> None: self._relationships = relationships @property - def run_number(self) -> str | None: - """Run number assigned by the system to the data acquisition for the current dataset.""" - return self._run_number - - @run_number.setter - def run_number(self, run_number: str | None) -> None: - """Run number assigned by the system to the data acquisition for the current dataset.""" - self._run_number = run_number - - @property - def sample_ids(self) -> list[str] | None: + def sample_id(self) -> str | None: """ID of the sample used when collecting the data.""" - return self._sample_ids + return self._sample_id - @sample_ids.setter - def sample_ids(self, sample_ids: str | None) -> None: + @sample_id.setter + def sample_id(self, sample_id: str | None) -> None: """ID of the sample used when collecting the data.""" - self._sample_ids = sample_ids + self._sample_id = sample_id @property def sample_ids(self) -> list[str] | None: @@ -1130,7 +1120,9 @@ def _prepare_fields_from_download( for field in DatasetBase._FIELD_SPEC: if field.read_only: read_only["_" + field.name] = getattr(download_model, field.scicat_name) - else: + elif hasattr( + download_model, field.scicat_name + ): # TODO remove condition in API v4 init_args[field.name] = getattr(download_model, field.scicat_name) init_args["meta"] = download_model.scientificMetadata diff --git a/src/scitacean/model.py b/src/scitacean/model.py index 152b2862..99f66384 100644 --- a/src/scitacean/model.py +++ b/src/scitacean/model.py @@ -219,7 +219,6 @@ class UploadRawDataset(BaseModel): creationLocation: str creationTime: datetime inputDatasets: list[PID] - investigator: str | None = None numberOfFilesArchived: NonNegativeInt owner: str ownerGroup: str @@ -227,6 +226,7 @@ class UploadRawDataset(BaseModel): sourceFolder: RemotePath type: DatasetType usedSoftware: list[str] + investigator: str | None = None accessGroups: list[str] | None = None classification: str | None = None comment: str | None = None diff --git a/src/scitacean/testing/backend/seed.py b/src/scitacean/testing/backend/seed.py index eed7ceed..ff8c2bf0 100644 --- a/src/scitacean/testing/backend/seed.py +++ b/src/scitacean/testing/backend/seed.py @@ -48,6 +48,7 @@ ownerEmail="PLACE@HOLD.ER", sourceFolder=RemotePath("/hex/data/123"), type=DatasetType.RAW, + investigator="Ponder Stibbons", principalInvestigator="Ponder Stibbons", creationLocation=SITE, techniques=[UploadTechnique(pid="DM666", name="dark_magic")], @@ -95,6 +96,7 @@ ownerEmail="PLACE@HOLD.ER", sourceFolder=RemotePath("/hex/secret/stuff"), type=DatasetType.RAW, + investigator="Mustrum Ridcully", principalInvestigator="Mustrum Ridcully", creationLocation=SITE, techniques=[UploadTechnique(pid="S", name="shoes")], diff --git a/tests/client/attachment_client_test.py b/tests/client/attachment_client_test.py index 99f575e7..c627c0d1 100644 --- a/tests/client/attachment_client_test.py +++ b/tests/client/attachment_client_test.py @@ -120,7 +120,7 @@ def test_create_attachment_for_dataset_for_dataset_populates_ids( assert finalized.id is not None assert finalized.datasetId is not None assert finalized.sampleId is None - assert finalized.proposalIds is None + assert finalized.proposalId is None def test_get_attachments_for_dataset(scicat_client): diff --git a/tests/client/dataset_client_test.py b/tests/client/dataset_client_test.py index d8fb4287..d39eea22 100644 --- a/tests/client/dataset_client_test.py +++ b/tests/client/dataset_client_test.py @@ -76,6 +76,7 @@ def test_get_dataset(client): dset = INITIAL_DATASETS["raw"] dblock = INITIAL_ORIG_DATABLOCKS["raw"][0] downloaded = client.get_dataset(dset.pid, strict_validation=True) + print(downloaded.source_folder) assert downloaded.source_folder == dset.sourceFolder assert downloaded.creation_time == dset.creationTime diff --git a/tests/client/query_client_test.py b/tests/client/query_client_test.py index 1a478351..4081ea42 100644 --- a/tests/client/query_client_test.py +++ b/tests/client/query_client_test.py @@ -22,7 +22,7 @@ type=DatasetType.RAW, principalInvestigator="investigator 1", creationLocation="UU", - proposalIds=["p0124"], + proposalId="p0124", inputDatasets=[], usedSoftware=["scitacean"], ), @@ -39,7 +39,7 @@ type=DatasetType.RAW, principalInvestigator="investigator 2", creationLocation="UU", - proposalIds=["p0124"], + proposalId="p0124", inputDatasets=[], usedSoftware=[], ), @@ -56,7 +56,7 @@ type=DatasetType.RAW, principalInvestigator="investigator 1", creationLocation="UU", - proposalIds=["p0124"], + proposalId="p0124", inputDatasets=[], usedSoftware=["scitacean"], ), diff --git a/tests/dataset_test.py b/tests/dataset_test.py index 2a37f67c..40187f21 100644 --- a/tests/dataset_test.py +++ b/tests/dataset_test.py @@ -24,7 +24,6 @@ def raw_download_model(): creationLocation="UnseenUniversity", creationTime=parse_datetime("1995-08-06T14:14:14Z"), inputDatasets=None, - investigator=None, numberOfFilesArchived=None, owner="pstibbons", ownerGroup="faculty", @@ -93,11 +92,10 @@ def derived_download_model(): creationLocation=None, creationTime=parse_datetime("1995-08-06T14:14:14Z"), inputDatasets=[PID.parse("123.cc/948.f7.2a")], - investigator="Ponder Stibbons", numberOfFilesArchived=None, owner="pstibbons", ownerGroup="faculty", - principalInvestigator=None, + principalInvestigator="Ponder Stibbons", sourceFolder=RemotePath("/uu/hex"), type=DatasetType.DERIVED, usedSoftware=["scitacean"], @@ -173,6 +171,8 @@ def get_model_field(name): dset = Dataset.from_download_models(dataset_download_model, []) for field in dset.fields(): + if field.name in ("instrument_id", "sample_id", "proposal_id", "investigator"): + continue # TODO remove when API v4 is released if field.used_by(dataset_download_model.type): assert getattr(dset, field.name) == get_model_field(field.scicat_name) @@ -180,6 +180,8 @@ def get_model_field(name): def test_from_download_models_does_not_initialize_wrong_fields(dataset_download_model): dset = Dataset.from_download_models(dataset_download_model, []) for field in dset.fields(): + if field.name == "principal_investigator": + continue # TODO remove when API v4 is released if not field.used_by(dataset_download_model.type): assert getattr(dset, field.name) is None @@ -318,6 +320,7 @@ def test_dataset_models_roundtrip(initial): orig_datablock_models=dblock_models, attachment_models=attachment_models, ) + rebuilt.investigator = initial.investigator # TODO remove in API v4 assert initial == rebuilt diff --git a/tests/html_repr/html_repr_test.py b/tests/html_repr/html_repr_test.py index 433baa61..da172ffc 100644 --- a/tests/html_repr/html_repr_test.py +++ b/tests/html_repr/html_repr_test.py @@ -13,7 +13,7 @@ def test_dataset_html_repr(): name="My dataset", contact_email="devsci.cat", owner="The People", - instrument_ids=["the-peoples-neutron-gun"], + instrument_id="the-peoples-neutron-gun", used_software=["scitacean"], source_folder=RemotePath("/remote/dir/"), meta={ diff --git a/tests/model_test.py b/tests/model_test.py index ee33206c..24386e2c 100644 --- a/tests/model_test.py +++ b/tests/model_test.py @@ -226,7 +226,6 @@ def test_default_masked_fields_are_dropped(): def test_custom_masked_fields_are_dropped(): mod = DownloadDataset( # type: ignore[call-arg] - attachments=[{"id": "abc"}], id="abc", _id="def", _v="123", diff --git a/tools/model-generation/templates/dataset_fields.py.jinja b/tools/model-generation/templates/dataset_fields.py.jinja index 2e8d5a3b..a5eb10d5 100644 --- a/tools/model-generation/templates/dataset_fields.py.jinja +++ b/tools/model-generation/templates/dataset_fields.py.jinja @@ -202,7 +202,9 @@ class DatasetBase: for field in DatasetBase._FIELD_SPEC: if field.read_only: read_only["_"+field.name] = getattr(download_model, field.scicat_name) - else: + elif hasattr( + download_model, field.scicat_name + ): # TODO remove condition in API v4 init_args[field.name] = getattr(download_model, field.scicat_name) init_args["meta"] = download_model.scientificMetadata