Skip to content

Commit

Permalink
feat: add mammal part preps
Browse files Browse the repository at this point in the history
The preparation view can now support preps that are from the molecular collections sub-department, or are mammal group part records from LS mammals with the DToL project tag.
  • Loading branch information
jrdh committed Feb 7, 2024
1 parent 6fb190b commit ee552fa
Show file tree
Hide file tree
Showing 5 changed files with 263 additions and 24 deletions.
32 changes: 26 additions & 6 deletions dataimporter/emu/views/preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
from dataimporter.lib.model import SourceRecord
from dataimporter.lib.view import View, FilterResult, SUCCESS_RESULT

INVALID_SUBDEPARTMENT = FilterResult(False, "Invalid subdepartment")
INVALID_SUB_DEPARTMENT = FilterResult(False, "Invalid sub-department")
INVALID_PROJECT = FilterResult(False, "Invalid project")


class PreparationView(View):
Expand All @@ -26,12 +27,34 @@ class PreparationView(View):
def is_member(self, record: SourceRecord) -> FilterResult:
"""
Filters the given record, determining whether it should be included in the
preparation resource or not.
preparation resource or not. This view member filter checks for one of two kinds
of record in order for the record to be included:
- a preparation record from the Molecular Collections sub-department
- a mammal group part record from the LS Mammals sub-department with the
DToL project tag
These two types have slightly different fields but are both preps.
:param record: the record to filter
:return: a FilterResult object
"""
if record.get_first_value("ColRecordType", default="").lower() != "preparation":
record_type = record.get_first_value("ColRecordType", default="").lower()
sub_department = record.get_first_value("ColSubDepartment", default="").lower()

if record_type == "preparation":
# if the record is a prep, it must be a molecular collections prep
if sub_department != "molecular collections":
return INVALID_SUB_DEPARTMENT
elif record_type == "mammal group part":
# if the record is a mammal group part, it must be a mammals record and be
# a DToL project record
if sub_department != "ls mammals":
return INVALID_SUB_DEPARTMENT
if record.get_first_value("NhmSecProjectName") != "Darwin Tree of Life":
return INVALID_PROJECT
else:
# any other type is invalid
return INVALID_TYPE

if not is_web_published(record):
Expand All @@ -46,9 +69,6 @@ def is_member(self, record: SourceRecord) -> FilterResult:
if record.get_first_value("ColDepartment") not in DEPARTMENT_COLLECTION_CODES:
return INVALID_DEPARTMENT

if record.get_first_value("ColSubDepartment") != "Molecular Collections":
return INVALID_SUBDEPARTMENT

return SUCCESS_RESULT

def make_data(self, record: SourceRecord) -> dict:
Expand Down
102 changes: 94 additions & 8 deletions dataimporter/links.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from itertools import chain
from operator import itemgetter
from pathlib import Path
from typing import List
from typing import List, Optional

from dataimporter.lib.dbs import Index
from dataimporter.lib.model import SourceRecord
Expand Down Expand Up @@ -190,7 +189,7 @@ def clear_from_foreign(self):
self.gbif_id_map.clear()


class PreparationSpecimenLink(ManyToOneViewLink):
class PreparationSpecimenLink(ViewLink):
"""
A ViewLink representing the link between a preparation record and the specimen
voucher record it was created from.
Expand All @@ -203,6 +202,9 @@ class PreparationSpecimenLink(ManyToOneViewLink):

# the EMu field on the prep records which links to the specimen voucher record
SPECIMEN_ID_REF_FIELD = "EntPreSpecimenRef"
# the EMu field on mammal part prep records which links to the specimen voucher
# record
PARENT_SPECIMEN_ID_REF_FIELD = "RegRegistrationParentRef"
# the Portal fields which are copied from the specimen to the prep data dict
# TODO: missing CollEventDateVisitedFrom, CollEventName_tab, and kinda ColSite
MAPPED_SPECIMEN_FIELDS = [
Expand All @@ -222,13 +224,97 @@ def __init__(self, path: Path, prep_view: View, specimen_view: View):
:param prep_view: the preparation view
:param specimen_view: the specimen view
"""
super().__init__(
path,
prep_view,
specimen_view,
PreparationSpecimenLink.SPECIMEN_ID_REF_FIELD,
super().__init__(path.name, prep_view, specimen_view)
self.path = path
# a many-to-one index from prep id -> specimen id via EntPreSpecimenRef
self.prep_id_map = Index(path / "prep_id_map")
# a many-to-one index from prep id -> specimen id via RegRegistrationParentRef
self.parent_id_map = Index(path / "parent_id_map")

@staticmethod
def _is_mammal_part_prep(record: SourceRecord) -> bool:
return (
record.get_first_value("ColRecordType", default="").lower()
== "mammal group part"
)

def update_from_base(self, prep_records: List[SourceRecord]):
"""
Extracts the linked foreign ID from each of the given records and adds them to
the ID map.
:param prep_records: the changed prep records
"""
prep_to_specimen_map = {}
prep_to_parent_specimen_map = {}

for prep_record in prep_records:
# first try to get the specimen ID using the prep voucher ref field
specimen_id = prep_record.get_first_value(self.SPECIMEN_ID_REF_FIELD)
if specimen_id:
prep_to_specimen_map[prep_record.id] = specimen_id
continue

# no specimen ID from voucher ref, check to see if the prep is a mammal part
if not self._is_mammal_part_prep(prep_record):
continue

# it is a mammal part, try getting the specimen ID using the parent ref
parent_id = prep_record.get_first_value(self.PARENT_SPECIMEN_ID_REF_FIELD)
if parent_id:
prep_to_parent_specimen_map[prep_record.id] = parent_id

# update the ID maps if needed
if prep_to_specimen_map:
self.prep_id_map.put_many(prep_to_specimen_map.items())
if prep_to_parent_specimen_map:
self.parent_id_map.put_many(prep_to_parent_specimen_map.items())

def update_from_foreign(self, specimen_records: List[SourceRecord]):
"""
Propagate the changes in the given specimen records to the prep records linked
to them.
:param specimen_records: the updated specimen records
"""
# do a reverse lookup to find the potentially many prep IDs associated with each
# updated specimen ID, and store them in a set. First, find them using the prep
# ID ref field's map
prep_ids = {
prep_id
for specimen_record in specimen_records
for prep_id in self.prep_id_map.get_keys(specimen_record.id)
}
# now add the preps from the parent ref map
prep_ids.update(
prep_id
for specimen_record in specimen_records
for prep_id in self.parent_id_map.get_keys(specimen_record.id)
)

if prep_ids:
prep_records = list(self.base_view.db.get_records(prep_ids))
if prep_records:
# if there are associated base records, queue changes to them on the
# base view
self.base_view.queue(prep_records)

def get_foreign_record_data(self, prep_record: SourceRecord) -> Optional[dict]:
# try with the specimen voucher ref first
specimen_id = prep_record.get_first_value(self.SPECIMEN_ID_REF_FIELD)
# if we can't find it using the specimen voucher ref, try the mammal parent ref
if not specimen_id and self._is_mammal_part_prep(prep_record):
specimen_id = prep_record.get_first_value(self.PARENT_SPECIMEN_ID_REF_FIELD)
if specimen_id:
return self.foreign_view.get_and_transform(specimen_id)

def clear_from_base(self):
"""
Clears out the ID map.
"""
self.prep_id_map.clear()
self.parent_id_map.clear()

def transform(self, prep_record: SourceRecord, data: dict):
"""
Transform the given prep record's data with data from the linked voucher
Expand Down
72 changes: 64 additions & 8 deletions tests/emu/views/test_preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,26 @@

import pytest

from dataimporter.lib.dbs import DataDB
from dataimporter.emu.views.preparation import (
PreparationView,
INVALID_SUB_DEPARTMENT,
INVALID_PROJECT,
)
from dataimporter.emu.views.utils import (
NO_PUBLISH,
INVALID_TYPE,
INVALID_GUID,
INVALID_STATUS,
INVALID_DEPARTMENT,
)
from dataimporter.lib.dbs import DataDB
from dataimporter.lib.model import SourceRecord
from dataimporter.lib.view import FilterResult, SUCCESS_RESULT
from dataimporter.emu.views.preparation import PreparationView, INVALID_SUBDEPARTMENT
from tests.helpers.samples.preparation import (
SAMPLE_PREPARATION_DATA,
SAMPLE_PREPARATION_ID,
SAMPLE_MAMMAL_PREPARATION_ID,
SAMPLE_MAMMAL_PREPARATION_DATA,
)


Expand All @@ -29,33 +35,63 @@ def prep_view(tmp_path: Path) -> PreparationView:
yield view


is_member_scenarios: List[Tuple[dict, FilterResult]] = [
mol_prep_is_member_scenarios: List[Tuple[dict, FilterResult]] = [
({"ColRecordType": "Specimen"}, INVALID_TYPE),
# this is a check to make sure a mammal part in molecular collections doesn't come
# through
({"ColRecordType": "Mammal Group Part"}, INVALID_SUB_DEPARTMENT),
({"AdmPublishWebNoPasswordFlag": "n"}, NO_PUBLISH),
({"AdmGUIDPreferredValue": "not a valid guid!"}, INVALID_GUID),
({"SecRecordStatus": "INVALID"}, INVALID_STATUS),
({"ColDepartment": "DDI"}, INVALID_DEPARTMENT),
({"ColSubDepartment": "Informatics"}, INVALID_SUBDEPARTMENT),
({"ColSubDepartment": "Informatics"}, INVALID_SUB_DEPARTMENT),
({"ColSubDepartment": "LS Mammals"}, INVALID_SUB_DEPARTMENT),
({}, SUCCESS_RESULT),
]


@pytest.mark.parametrize("overrides, result", is_member_scenarios)
def test_is_member(overrides: dict, result: FilterResult, prep_view: PreparationView):
@pytest.mark.parametrize("overrides, result", mol_prep_is_member_scenarios)
def test_is_member_mol_prep(
overrides: dict, result: FilterResult, prep_view: PreparationView
):
data = {**SAMPLE_PREPARATION_DATA, **overrides}
record = SourceRecord(SAMPLE_PREPARATION_ID, data, "test")
assert prep_view.is_member(record) == result


mammal_part_prep_is_member_scenarios: List[Tuple[dict, FilterResult]] = [
({"ColRecordType": "Specimen"}, INVALID_TYPE),
({"AdmPublishWebNoPasswordFlag": "n"}, NO_PUBLISH),
({"AdmGUIDPreferredValue": "not a valid guid!"}, INVALID_GUID),
({"SecRecordStatus": "INVALID"}, INVALID_STATUS),
({"ColDepartment": "DDI"}, INVALID_DEPARTMENT),
({"ColSubDepartment": "Informatics"}, INVALID_SUB_DEPARTMENT),
({"ColSubDepartment": "Molecular Collections"}, INVALID_SUB_DEPARTMENT),
({"NhmSecProjectName": "Life of Darwin Tree"}, INVALID_PROJECT),
# this is a check to make sure a prep in LS Mammals doesn't come through
({"ColRecordType": "Preparation"}, INVALID_SUB_DEPARTMENT),
({}, SUCCESS_RESULT),
]


@pytest.mark.parametrize("overrides, result", mammal_part_prep_is_member_scenarios)
def test_is_member_mammal_part_prep(
overrides: dict, result: FilterResult, prep_view: PreparationView
):
data = {**SAMPLE_MAMMAL_PREPARATION_DATA, **overrides}
record = SourceRecord(SAMPLE_MAMMAL_PREPARATION_ID, data, "test")
assert prep_view.is_member(record) == result


def test_transform_deleted(prep_view: PreparationView):
record = SourceRecord(SAMPLE_PREPARATION_ID, {}, "test")
record = SourceRecord("an_ID_it_does_not_matter", {}, "test")
assert record.is_deleted

data = prep_view.transform(record)
assert data == {}


def test_make_data(prep_view: PreparationView):
def test_make_data_mol_prep(prep_view: PreparationView):
record = SourceRecord(SAMPLE_PREPARATION_ID, SAMPLE_PREPARATION_DATA, "test")

data = prep_view.make_data(record)
Expand All @@ -71,3 +107,23 @@ def test_make_data(prep_view: PreparationView):
"preparationContents": "**OTHER_SOMATIC_ANIMAL_TISSUE**",
"preparationDate": "2022-05-09",
}


def test_make_data_mammal_part(prep_view: PreparationView):
record = SourceRecord(
SAMPLE_MAMMAL_PREPARATION_ID, SAMPLE_MAMMAL_PREPARATION_DATA, "test"
)

data = prep_view.make_data(record)
assert data == {
"_id": record.id,
"created": "2023-05-02T14:55:51+00:00",
"modified": "2023-05-02T14:55:51+00:00",
"project": "Darwin Tree of Life",
"preparationNumber": "FF06063966",
"preparationType": None,
"mediumType": None,
"preparationProcess": "Flash Freezing: Dry Ice",
"preparationContents": "MUSCLE",
"preparationDate": None,
}
78 changes: 76 additions & 2 deletions tests/helpers/samples/preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

# this is taken from ecatalogue.export.20231008.gz but with the EntPreSpecimenRef field
# replaced with a single reference to the SAMPLE_SPECIMEN_ID
raw_data = f"""
raw_prep_data = f"""
rownum=3645
irn:1=9968955
SummaryData:1=no Collection Kind for preparation (irn 9968955)
Expand Down Expand Up @@ -70,4 +70,78 @@
NhmSecProjectName:1=Darwin Tree of Life
"""

SAMPLE_PREPARATION_ID, SAMPLE_PREPARATION_DATA = read_emu_extract(raw_data)
SAMPLE_PREPARATION_ID, SAMPLE_PREPARATION_DATA = read_emu_extract(raw_prep_data)

raw_mammal_part_data = f"""
rownum=7228
irn:1=10337727
SummaryData:1=NHMUK ZD 2022.43, Neovison vison Schreber, 1777, tissue sample, Tissue Sample, MUSCLE
ExtendedData:1=10337727
ExtendedData:2=
ExtendedData:3=NHMUK ZD 2022.43, Neovison vison Schreber, 1777, tissue sample, Tissue Sample, MUSCLE
ColDepartment:1=Zoology
ColSubDepartment:1=LS Mammals
ColRecordType:1=Mammal Group Part
GeneralCatalogueNumber:1=irn: 10337727
EntIdeQualifiedNameAutomatic:1=Yes
EntPreContents:1=MUSCLE
EntPrePreparationMethod:1=Flash Freezing: Dry Ice
EntPrePreparatorRef:1=408801
EntPrePreparatorRefLocal:1=408801
EntPrePreparatorSumDataLocal:1=Inez Januszczak; Natural History Museum; Life Sciences; Januszczak
EntPreNumber:1=FF06063966
PalOthNumber:1=
PalOthNumber:2=FF06063966
PalOthTypeofNumber:1=Tube barcode
AcqHistoric:1=No
PrtType:1=tissue sample
PrtPreparedByRef:1=408801
PrtCompleteness:1=MUSCLE
RegRegistrationParentRef:1={SAMPLE_SPECIMEN_ID}
RegRegistrationParentRefLocal:1={SAMPLE_SPECIMEN_ID}
RegRegistrationNumberLocal:1=NHMUK ZD 2022.43
EntIdeFiledAsQualifiedNameLocal:1=Neovison vison Schreber, 1777
RegRegParentHigherTaxonLocal:1=Animalia; Chordata; Vertebrata; Mammalia; Carnivora; Caniformia; Mustelidae; Mustelinae
RegRegParentScientificNameLocal:1=Neovison vison Schreber, 1777
CatKindOfObject:1=Tissue Sample
LocIndependentlyMoveable:1=Yes
AcqLegTransferOfTitle:1=No
AcqLegPurAgree:1=No
AcqLegConfirmationOfGift:1=No
AcqLegDueDilligence:1=No
AcqLegCollectionImpact:1=No
NteText0:1=M
NteText1:1=Purpose of specimen: DNA barcoding only
NteType:1=Size
AdmPublishWebNoPasswordFlag:1=Y
AdmPublishWebNoPassword:1=Yes
AdmPublishWebPasswordFlag:1=Y
AdmPublishWebPassword:1=Yes
AdmGUIDPreferredType:1=UUID4
AdmGUIDPreferredValue:1=541cb421-2a3f-4699-ad3b-8030f36afffa
AdmGUIDIsPreferred:1=Yes
AdmGUIDType:1=UUID4
AdmGUIDValue:1=541cb421-2a3f-4699-ad3b-8030f36afffa
AdmInsertedBy:1=Matthew Besley
AdmDateInserted=2023-05-02
AdmImportIdentifier:1=02052023_Mammals_Parts_Fix
AdmTimeInserted=14:55:51.000
AdmSystemIdentifier:1=mattb1-230502-1455
AdmModifiedBy:1=Matthew Besley
AdmDateModified=2023-05-02
AdmTimeModified=14:55:51.000
AdmDateRecordModified=2023-05-02
AdmTimeRecordModified=14:55:51.000
SecRecordStatus:1=Active
SecCanDisplay:1=Group Default
SecCanEdit:1=Group Default
SecCanDelete:1=Group Default
SecDepartment:1=Entomology
SecLookupRoot:1=Entomology
NhmSecOpenDataPolicyException:1=none
NhmSecProjectName:1=Darwin Tree of Life
"""

SAMPLE_MAMMAL_PREPARATION_ID, SAMPLE_MAMMAL_PREPARATION_DATA = read_emu_extract(
raw_mammal_part_data
)
Loading

0 comments on commit ee552fa

Please sign in to comment.