Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

esm_datastore version 2 #359

Closed
wants to merge 13 commits into from
1 change: 1 addition & 0 deletions intake_esm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pkg_resources import DistributionNotFound, get_distribution

from .core import esm_datastore
from .main import esm_datastore_v2
from .utils import show_versions

try:
Expand Down
122 changes: 108 additions & 14 deletions intake_esm/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,37 @@ class AggregationType(str, enum.Enum):
join_existing = 'join_existing'
union = 'union'

class Config:
validate_all = True
validate_assignment = True


class DataFormat(str, enum.Enum):
netcdf = 'netcdf'
zarr = 'zarr'

class Config:
validate_all = True
validate_assignment = True


class Attribute(pydantic.BaseModel):
column_name: str
vocabulary: str = ''
column_name: pydantic.StrictStr
vocabulary: pydantic.StrictStr = ''

class Config:
validate_all = True
validate_assignment = True


class Assets(pydantic.BaseModel):
column_name: str
column_name: pydantic.StrictStr
format: DataFormat
format_column_name: typing.Optional[str]
format_column_name: typing.Optional[pydantic.StrictStr]

class Config:
validate_all = True
validate_assignment = True

@pydantic.root_validator
def _validate_data_format(cls, values):
Expand All @@ -36,30 +52,50 @@ def _validate_data_format(cls, values):

class Aggregation(pydantic.BaseModel):
type: AggregationType
attribute_name: str
attribute_name: pydantic.StrictStr
options: typing.Optional[typing.Dict] = {}

class Config:
validate_all = True
validate_assignment = True


class AggregationControl(pydantic.BaseModel):
variable_column_name: str
groupby_attrs: typing.List[str]
variable_column_name: pydantic.StrictStr
groupby_attrs: typing.List[pydantic.StrictStr]
aggregations: typing.List[Aggregation] = []

class Config:
validate_all = True
validate_assignment = True

@pydantic.validator('aggregations')
def _validate_aggregations(cls, values):
return sorted(
values,
key=lambda item: item.type,
reverse=False,
)


class ESMCatalogModel(pydantic.BaseModel):
"""
Pydantic model for the ESM data catalog defined in https://git.io/JBWoW
"""

esmcat_version: str
esmcat_version: pydantic.StrictStr
id: str
attributes: typing.List[Attribute]
assets: Assets
aggregation_control: AggregationControl
catalog_dict: typing.Optional[typing.List[typing.Dict]] = None
catalog_file: str = None
description: str = None
title: str = None
catalog_file: pydantic.StrictStr = None
description: pydantic.StrictStr = None
title: pydantic.StrictStr = None

class Config:
validate_all = True
validate_assignment = True

@pydantic.root_validator
def validate_catalog(cls, values):
Expand All @@ -70,15 +106,73 @@ def validate_catalog(cls, values):
return values

@classmethod
def load_catalog_file(
def load_json_file(
cls,
catalog_file: typing.Union[str, pydantic.FilePath, pydantic.AnyUrl],
json_file: typing.Union[str, pydantic.FilePath, pydantic.AnyUrl],
storage_options=None,
) -> 'ESMCatalogModel':
"""
Loads the catalog from a file
"""
storage_options = storage_options if storage_options is not None else {}

with fsspec.open(catalog_file, **storage_options) as fobj:
with fsspec.open(json_file, **storage_options) as fobj:
return cls.parse_raw(fobj.read())


class ESMSingleDataSourceModel(pydantic.BaseModel):
key: pydantic.StrictStr
record: typing.Dict[str, typing.Any]
esmcat: ESMCatalogModel
kwargs: typing.Dict[str, typing.Any] = pydantic.Field(default_factory=dict)

class Config:
validate_all = True
validate_assignment = True


class ESMGroupedDataSourceModel(pydantic.BaseModel):

key: pydantic.StrictStr
records: typing.List[typing.Dict[str, typing.Any]]
esmcat: ESMCatalogModel
kwargs: typing.Dict[str, typing.Any] = pydantic.Field(default_factory=dict)

class Config:
validate_all = True
validate_assignment = True


class QueryModel(pydantic.BaseModel):
query: typing.Dict[pydantic.StrictStr, typing.Union[typing.Any, typing.List[typing.Any]]]
columns: typing.List[str]
require_all_on: typing.Union[str, typing.List[typing.Any]] = None

class Config:
validate_all = True
validate_assignment = True

@pydantic.root_validator(pre=False)
def validate_query(cls, values):
query = values.get('query', {})
columns = values.get('columns')
require_all_on = values.get('require_all_on', [])

if query:
for key in query:
if key not in columns:
raise ValueError(f'Column {key} not in columns {columns}')
if isinstance(require_all_on, str):
values['require_all_on'] = [require_all_on]
if require_all_on is not None:
for key in values['require_all_on']:
if key not in columns:
raise ValueError(f'Column {key} not in columns {columns}')
return values

def normalize_query(self) -> typing.Dict[pydantic.StrictStr, typing.List[typing.Any]]:
_query = self.query.copy()
for key, value in _query.items():
if isinstance(value, str):
_query[key] = [value]
return _query
Loading