Skip to content

Commit

Permalink
chore: switch a number of fields to get_first instead of get_all
Browse files Browse the repository at this point in the history
I checked all of the fields against a full ecatalogue dump from 15/07/2021 and many of the fields only had a single value in them, therefore switcheroo.
  • Loading branch information
jrdh committed Oct 31, 2023
1 parent cabd998 commit 433e9e1
Showing 1 changed file with 109 additions and 118 deletions.
227 changes: 109 additions & 118 deletions dataimporter/emu/views/specimen.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,6 @@ def make_data(self, record: SourceRecord) -> dict:
:return: a dict containing the data for this record that should be displayed on
the Data Portal
"""
# TODO: I had a note for checking minimumDepthInMeters and maximumDepthInMeters
# for some reason?

# cache these for perf
get_all = record.get_all_values
get_first = record.get_first_value
Expand All @@ -101,149 +98,143 @@ def make_data(self, record: SourceRecord) -> dict:
"coordinateUncertaintyInMeters": get_first(
"DarCoordinateUncertaintyInMeter"
),
"verbatimLongitude": get_all("sumPreferredCentroidLongitude"),
"verbatimLatitude": get_all("sumPreferredCentroidLatitude"),
"locality": get_first(
"PalNearestNamedPlaceLocal",
"sumPreciseLocation",
"MinNhmVerbatimLocalityLocal",
),
"verbatimLongitude": get_first("sumPreferredCentroidLongitude"),
"verbatimLatitude": get_first("sumPreferredCentroidLatitude"),
"locality": get_first("sumPreciseLocation"),
"minimumDepthInMeters": get_first(
"CollEventFromMetres", "DarMinimumDepthInMeters"
),
"maximumDepthInMeters": get_first(
"CollEventToMetres", "DarMaximumDepthInMeters"
),
"country": get_all("DarCountry"),
"waterBody": get_all("DarWaterBody"),
"stateProvince": get_all("DarStateProvince"),
"continent": get_all("DarContinent"),
"island": get_all("DarIsland"),
"islandGroup": get_all("DarIslandGroup"),
"higherGeography": get_all("DarHigherGeography"),
"geodeticDatum": get_all("DarGeodeticDatum"),
"georeferenceProtocol": get_all("DarGeorefMethod"),
"minimumElevationInMeters": get_all("DarMinimumElevationInMeters"),
"maximumElevationInMeters": get_all("DarMaximumElevationInMeters"),
"country": get_first("DarCountry"),
"waterBody": get_first("DarWaterBody"),
"stateProvince": get_first("DarStateProvince"),
"continent": get_first("DarContinent"),
"island": get_first("DarIsland"),
"islandGroup": get_first("DarIslandGroup"),
"higherGeography": get_first("DarHigherGeography"),
"geodeticDatum": get_first("DarGeodeticDatum"),
"georeferenceProtocol": get_first("DarGeorefMethod"),
"minimumElevationInMeters": get_first("DarMinimumElevationInMeters"),
"maximumElevationInMeters": get_first("DarMaximumElevationInMeters"),
# occurrence
"lifeStage": get_first("DarLifeStage", "CardParasiteStage"),
"catalogNumber": get_first("DarCatalogNumber", "RegRegistrationNumber"),
"recordNumber": get_all("DarCollectorNumber"),
"recordNumber": get_first("DarCollectorNumber"),
"occurrenceID": get_first("AdmGUIDPreferredValue"),
"recordedBy": get_first("DarCollector", "CollEventNameSummaryData"),
"individualCount": get_all("DarIndividualCount"),
"sex": get_all("DarSex"),
"preparations": get_all("DarPreparations"),
"individualCount": get_first("DarIndividualCount"),
"sex": get_first("DarSex"),
"preparations": get_first("DarPreparations"),
# identification
"typeStatus": get_first("DarTypeStatus", "sumTypeStatus"),
"identifiedBy": get_all("DarIdentifiedBy"),
"dateIdentified": get_all("EntIdeDateIdentified"),
"identificationQualifier": get_all("DarIdentificationQualifier"),
"identifiedBy": get_first("DarIdentifiedBy"),
"dateIdentified": get_first("EntIdeDateIdentified"),
"identificationQualifier": get_first("DarIdentificationQualifier"),
# taxon
"scientificName": get_all("DarScientificName"),
"scientificNameAuthorship": get_all("IdeFiledAsAuthors"),
"kingdom": get_all("DarKingdom"),
"phylum": get_all("DarPhylum"),
"class": get_all("DarClass"),
"order": get_all("DarOrder"),
"family": get_all("DarFamily"),
"genus": get_all("DarGenus"),
"subgenus": get_all("DarSubgenus"),
"specificEpithet": get_all("DarSpecies"),
"infraspecificEpithet": get_all("DarSubspecies"),
"higherClassification": get_all("DarHigherTaxon"),
"taxonRank": get_all("DarInfraspecificRank"),
"scientificName": get_first("DarScientificName"),
"scientificNameAuthorship": get_first("IdeFiledAsAuthors"),
"kingdom": get_first("DarKingdom"),
"phylum": get_first("DarPhylum"),
"class": get_first("DarClass"),
"order": get_first("DarOrder"),
"family": get_first("DarFamily"),
"genus": get_first("DarGenus"),
"subgenus": get_first("DarSubgenus"),
"specificEpithet": get_first("DarSpecies"),
"infraspecificEpithet": get_first("DarSubspecies"),
"higherClassification": get_first("DarHigherTaxon"),
"taxonRank": get_first("DarInfraspecificRank"),
# event
"samplingProtocol": get_all("CollEventCollectionMethod"),
"fieldNumber": get_all("DarFieldNumber"),
"samplingProtocol": get_first("CollEventCollectionMethod"),
"fieldNumber": get_first("DarFieldNumber"),
"habitat": get_all("ColHabitatVerbatim"),
"eventTime": get_all("DarTimeOfDay"),
"day": get_all("DarDayCollected"),
"month": get_all("DarMonthCollected"),
"year": get_all("DarYearCollected"),
"eventTime": get_first("DarTimeOfDay"),
"day": get_first("DarDayCollected"),
"month": get_first("DarMonthCollected"),
"year": get_first("DarYearCollected"),
# geological context
"earliestEonOrLowestEonothem": get_all("DarEarliestEon"),
"latestEonOrHighestEonothem": get_all("DarLatestEon"),
"earliestEraOrLowestErathem": get_all("DarEarliestEra"),
"latestEraOrHighestErathem": get_all("DarLatestEra"),
"earliestPeriodOrLowestSystem": get_all("DarEarliestPeriod"),
"latestPeriodOrHighestSystem": get_all("DarLatestPeriod"),
"earliestEpochOrLowestSeries": get_all("DarEarliestEpoch"),
"latestEpochOrHighestSeries": get_all("DarLatestEpoch"),
"earliestAgeOrLowestStage": get_all("DarEarliestAge"),
"latestAgeOrHighestStage": get_all("DarLatestAge"),
"lowestBiostratigraphicZone": get_all("DarLowestBiostrat"),
"highestBiostratigraphicZone": get_all("DarHighestBiostrat"),
"group": get_all("DarGroup"),
"formation": get_all("DarFormation"),
"member": get_all("DarMember"),
"bed": get_all("DarBed"),
"earliestEonOrLowestEonothem": get_first("DarEarliestEon"),
"latestEonOrHighestEonothem": get_first("DarLatestEon"),
"earliestEraOrLowestErathem": get_first("DarEarliestEra"),
"latestEraOrHighestErathem": get_first("DarLatestEra"),
"earliestPeriodOrLowestSystem": get_first("DarEarliestPeriod"),
"latestPeriodOrHighestSystem": get_first("DarLatestPeriod"),
"earliestEpochOrLowestSeries": get_first("DarEarliestEpoch"),
"latestEpochOrHighestSeries": get_first("DarLatestEpoch"),
"earliestAgeOrLowestStage": get_first("DarEarliestAge"),
"latestAgeOrHighestStage": get_first("DarLatestAge"),
"lowestBiostratigraphicZone": get_first("DarLowestBiostrat"),
"highestBiostratigraphicZone": get_first("DarHighestBiostrat"),
"group": get_first("DarGroup"),
"formation": get_first("DarFormation"),
"member": get_first("DarMember"),
"bed": get_first("DarBed"),
# custom
"created": emu_date(
get_first("AdmDateInserted"), get_first("AdmTimeInserted")
),
"barcode": get_first("EntCatBarcode", "CardBarcode"),
"preservative": get_first("CatPreservative", "EntCatPreservation"),
"expedition": get_first(
"EntLocExpeditionNameLocal", "CollEventExpeditionName"
),
"vessel": get_all("CollEventVesselName"),
"subDepartment": get_all("ColSubDepartment"),
"partType": get_all("PrtType"),
"registrationCode": get_all("RegCode"),
"kindOfObject": get_all("CatKindOfObject"),
"kindOfCollection": get_all("CatKindOfCollection"),
"collectionKind": get_all("ColKind"),
"expedition": get_first("CollEventExpeditionName"),
"vessel": get_first("CollEventVesselName"),
"subDepartment": get_first("ColSubDepartment"),
"partType": get_first("PrtType"),
"registrationCode": get_first("RegCode"),
"kindOfObject": get_first("CatKindOfObject"),
"kindOfCollection": get_first("CatKindOfCollection"),
"collectionKind": get_first("ColKind"),
"collectionName": get_all("EntPriCollectionName"),
"donorName": get_all("PalAcqAccLotDonorFullName"),
"preparationType": get_all("DarPreparationType"),
"observedWeight": get_all("DarObservedWeight"),
"viceCounty": get_all("sumViceCountry"),
"extractionMethod": get_all("DnaExtractionMethod"),
"resuspendedIn": get_all("DnaReSuspendedIn"),
"totalVolume": get_all("DnaTotalVolume"),
"clutchSize": get_all("EggClutchSize"),
"setMark": get_all("EggSetMark"),
"nestShape": get_all("NesShape"),
"nestSite": get_all("NesSite"),
"populationCode": get_all("SilPopulationCode"),
"exsiccata": get_all("CollExsiccati"),
"exsiccataNumber": get_all("ColExsiccatiNumber"),
"labelLocality": get_all("ColSiteDescription"),
"donorName": get_first("PalAcqAccLotDonorFullName"),
"preparationType": get_first("DarPreparationType"),
"observedWeight": get_first("DarObservedWeight"),
"viceCounty": get_first("sumViceCountry"),
"extractionMethod": get_first("DnaExtractionMethod"),
"resuspendedIn": get_first("DnaReSuspendedIn"),
"totalVolume": get_first("DnaTotalVolume"),
"clutchSize": get_first("EggClutchSize"),
"setMark": get_first("EggSetMark"),
"nestShape": get_first("NesShape"),
"nestSite": get_first("NesSite"),
"populationCode": get_first("SilPopulationCode"),
"exsiccata": get_first("CollExsiccati"),
"exsiccataNumber": get_first("ColExsiccatiNumber"),
"labelLocality": get_first("ColSiteDescription"),
"plantDescription": get_all("ColPlantDescription"),
"catalogueDescription": get_all("PalDesDescription"),
"chronostratigraphy": get_all("PalStrChronostratLocal"),
"lithostratigraphy": get_all("PalStrLithostratLocal"),
"dateRegistered": get_all("MinDateRegistered"),
"identificationAsRegistered": get_all("MinIdentificationAsRegistered"),
"chronostratigraphy": get_first("PalStrChronostratLocal"),
"lithostratigraphy": get_first("PalStrLithostratLocal"),
"dateRegistered": get_first("MinDateRegistered"),
"identificationAsRegistered": get_first("MinIdentificationAsRegistered"),
"identificationDescription": get_all("MinIdentificationDescription"),
"occurrence": get_all("MinPetOccurance"),
"commodity": get_all("MinOreCommodity"),
"depositType": get_all("MinOreDepositType"),
"occurrence": get_first("MinPetOccurance"),
"commodity": get_first("MinOreCommodity"),
"depositType": get_first("MinOreDepositType"),
"texture": get_all("MinTextureStructure"),
"identificationVariety": get_all("MinIdentificationVariety"),
"identificationVariety": get_first("MinIdentificationVariety"),
"identificationOther": get_all("MinIdentificationOther"),
"hostRock": get_all("MinHostRock"),
"age": get_all("MinAgeDataAge"),
"ageType": get_all("MinAgeDataType"),
"tectonicProvince": get_all("MinNhmTectonicProvinceLocal"),
"mine": get_all("MinNhmStandardMineLocal"),
"miningDistrict": get_all("MinNhmMiningDistrictLocal"),
"mineralComplex": get_all("MinNhmComplexLocal"),
"geologyRegion": get_all("MinNhmRegionLocal"),
"meteoriteType": get_all("MinMetType"),
"meteoriteGroup": get_all("MinMetGroup"),
"chondriteAchondrite": get_all("MinMetChondriteAchondrite"),
"meteoriteClass": get_all("MinMetClass"),
"petrologyType": get_all("MinMetPetType"),
"petrologySubtype": get_all("MinMetPetSubtype"),
"recovery": get_all("MinMetRecoveryFindFall"),
"recoveryDate": get_all("MinMetRecoveryDate"),
"recoveryWeight": get_all("MinMetRecoveryWeight"),
"registeredWeight": get_all("MinMetWeightAsRegistered"),
"registeredWeightUnit": get_all("MinMetWeightAsRegisteredUnit"),
"determinationTypes": get_all("IdeCitationTypeStatus"),
"determinationNames": get_all("EntIdeScientificNameLocal"),
"determinationFiledAs": get_all("EntIdeFiledAs"),
"hostRock": get_first("MinHostRock"),
"age": get_first("MinAgeDataAge"),
"ageType": get_first("MinAgeDataType"),
"tectonicProvince": get_first("MinNhmTectonicProvinceLocal"),
"mine": get_first("MinNhmStandardMineLocal"),
"miningDistrict": get_first("MinNhmMiningDistrictLocal"),
"mineralComplex": get_first("MinNhmComplexLocal"),
"geologyRegion": get_first("MinNhmRegionLocal"),
"meteoriteType": get_first("MinMetType"),
"meteoriteGroup": get_first("MinMetGroup"),
"chondriteAchondrite": get_first("MinMetChondriteAchondrite"),
"meteoriteClass": get_first("MinMetClass"),
"petrologyType": get_first("MinMetPetType"),
"petrologySubtype": get_first("MinMetPetSubtype"),
"recovery": get_first("MinMetRecoveryFindFall"),
"recoveryDate": get_first("MinMetRecoveryDate"),
"recoveryWeight": get_first("MinMetRecoveryWeight"),
"registeredWeight": get_first("MinMetWeightAsRegistered"),
"registeredWeightUnit": get_first("MinMetWeightAsRegisteredUnit"),
"determinationTypes": get_all("IdeCitationTypeStatus", clean=False),
"determinationNames": get_all("EntIdeScientificNameLocal", clean=False),
"determinationFiledAs": get_all("EntIdeFiledAs", clean=False),
"project": get_all("NhmSecProjectName"),
}

0 comments on commit 433e9e1

Please sign in to comment.