Skip to content

Commit

Permalink
Metadata Typing (#80)
Browse files Browse the repository at this point in the history
* add types.py (originally from file-catalog-indexer)

* add __init__ and test

* check for members in test_00_types.py

* rename new modules

* rename new modules - 2

* rename new modules - 3

* git mv validation.py schema/validation.py

* add validation to __init__

* add schema.validation import test

* fix server.py Validation import
  • Loading branch information
ric-evans authored Mar 1, 2021
1 parent bf76c98 commit 52e7671
Show file tree
Hide file tree
Showing 5 changed files with 215 additions and 1 deletion.
3 changes: 3 additions & 0 deletions file_catalog/schema/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""Init."""

from . import types, validation
182 changes: 182 additions & 0 deletions file_catalog/schema/types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
"""Metadata schema type hints."""

from typing import List, Optional

try:
from typing import TypedDict
except ImportError:
from typing_extensions import TypedDict

Date = str
EventID = int


class Checksum(TypedDict, total=False):
"""Checksum dict."""

sha512: str


class LocationEntry(TypedDict, total=False):
"""Location entry."""

site: str
path: str
archive: bool


class SoftwareEntry(TypedDict, total=False):
"""Software entry."""

name: str
version: str
date: Date


class EventsData(TypedDict):
"""Events info."""

first_event: Optional[int]
last_event: Optional[int]
event_count: int
status: str


class Run(TypedDict):
"""Run dict."""

run_number: int
subrun_number: int
part_number: int
start_datetime: Optional[Date] # ISO date
end_datetime: Optional[Date] # ISO date
first_event: Optional[EventID]
last_event: Optional[EventID]
event_count: int


class GapEntry(TypedDict):
"""Gap dict."""

start_event_id: EventID
stop_event_id: EventID
delta_time: float
start_date: Date
stop_date: Date


class Event(TypedDict):
"""Event entry."""

event_id: EventID
datetime: Date


class OfflineProcessingMetadata(TypedDict, total=False):
"""Offline Processing Metadata."""

dataset_id: int
season: Optional[int]
season_name: Optional[str]
L2_gcd_file: str
L2_snapshot_id: int
L2_production_version: int
L3_source_dataset_id: int
working_group: str
validation_validated: bool
validation_date: Date
validation_software: SoftwareEntry
livetime: Optional[float]
gaps: Optional[List[GapEntry]]
first_event: Optional[Event]
last_event: Optional[Event]


class IceProdMetadata(TypedDict, total=True):
"""IceProd Metadata."""

dataset: int
dataset_id: str
job: Optional[int]
job_id: Optional[str]
task: Optional[str]
task_id: Optional[str]
config: str


class SimulationMetadata(TypedDict, total=False):
"""Simulation Metadata."""

generator: str
composition: str
geometry: str
GCD_file: str
bulk_ice_model: str
hole_ice_model: str
photon_propagator: str
DOMefficiency: float
atmosphere: int
n_events: int
oversampling: int
DOMoversize: int
energy_min: float
energy_max: float
power_law_index: str
cylinder_length: float
cylinder_radius: float
zenith_min: float
zenith_max: float
hadronic_interaction: str


# ideally, we could do some kind of introspection, but this requires universal TypedDict support
simulation_metadata_types = {
"generator": str,
"composition": str,
"geometry": str,
"GCD_file": str,
"bulk_ice_model": str,
"hole_ice_model": str,
"photon_propagator": str,
"DOMefficiency": float,
"atmosphere": int,
"n_events": int,
"oversampling": int,
"DOMoversize": int,
"energy_min": float,
"energy_max": float,
"power_law_index": str,
"cylinder_length": float,
"cylinder_radius": float,
"zenith_min": float,
"zenith_max": float,
"hadronic_interaction": str,
}


class Metadata(TypedDict, total=False):
"""The file-catalog metadata.
https://docs.google.com/document/d/14SanUWiYEbgarElt0YXSn_2We-rwT-ePO5Fg7rrM9lw/view#heading=h.yq8ukujsb797
"""

# Basic File:
logical_name: str
locations: List[LocationEntry]
file_size: int
checksum: Checksum
create_date: Date

# i3 File:
data_type: Optional[str]
processing_level: Optional[str]
content_status: str

# /data/exp/* i3 File:
software: Optional[List[SoftwareEntry]]
run: Run
offline_processing_metadata: OfflineProcessingMetadata

# /data/sim/* i3 File:
iceprod: IceProdMetadata
simulation: SimulationMetadata
File renamed without changes.
2 changes: 1 addition & 1 deletion file_catalog/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
import file_catalog
from file_catalog.mongo import Mongo
from file_catalog import urlargparse, argbuilder
from file_catalog.validation import Validation
from file_catalog.schema.validation import Validation

logger = logging.getLogger('server')

Expand Down
29 changes: 29 additions & 0 deletions tests/test_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""Test schema."""


# local imports
from file_catalog import schema


def test_00_types() -> None:
"""Simply check imports."""
type_dicts = [
"Checksum",
"LocationEntry",
"SoftwareEntry",
"EventsData",
"Run",
"GapEntry",
"Event",
"OfflineProcessingMetadata",
"IceProdMetadata",
"SimulationMetadata",
"Metadata",
]
for type_dict_class in type_dicts:
assert type_dict_class in dir(schema.types)


def test_01_validation() -> None:
"""Simply check imports."""
assert "Validation" in dir(schema.validation)

0 comments on commit 52e7671

Please sign in to comment.