diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9587066..3d95ad4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,12 +13,6 @@ repos: - id: file-contents-sorter files: requirements-dev.txt -- repo: https://github.com/psf/black - rev: 24.1.1 - hooks: - - id: black - language_version: python3 - - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.8.0 hooks: @@ -33,7 +27,7 @@ repos: - id: blackdoc - repo: https://github.com/econchick/interrogate - rev: 1.5.0 + rev: 237be78f9c6135fc1a620d211cdfdc5d3885082b hooks: - id: interrogate exclude: ^(docs|tests) @@ -56,7 +50,7 @@ repos: - id: add-trailing-comma - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.2.0 + rev: v0.3.0 hooks: - id: ruff diff --git a/docs/source/conf.py b/docs/source/conf.py index 0016b59..24378e1 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -49,9 +49,9 @@ # The master toctree document. master_doc = "index" -from datetime import date +import datetime -year = date.today().year +year = datetime.datetime.now(tz=datetime.timezone.utc).date().year # General information about the project. project = "gliderpy" @@ -62,11 +62,11 @@ # |version| and |release|, also used in various other places throughout the # built documents. # -from gliderpy import __version__ as VERSION # noqa +from gliderpy import __version__ -version = VERSION +version = __version__ # The full version, including alpha/beta/rc tags. -release = VERSION +release = __version__ # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/gliderpy/fetchers.py b/gliderpy/fetchers.py index c47df35..560b806 100644 --- a/gliderpy/fetchers.py +++ b/gliderpy/fetchers.py @@ -1,11 +1,9 @@ -""" -Helper methods to fetch glider data from multiple ERDDAP serves - -""" +"""Helper methods to fetch glider data from multiple ERDDAP serves.""" +import datetime import functools from copy import copy -from typing import Optional +from numbers import Number import httpx import pandas as pd @@ -17,52 +15,64 @@ server_vars, ) -OptionalStr = Optional[str] +OptionalBool = bool | None +OptionalDict = dict | None +OptionalList = list[str] | tuple[str] | None +OptionalStr = str | None +OptionalNum = Number | None +# Should we add more or datetime.datetime catches all? +OptionalDateTime = datetime.datetime | str # Defaults to the IOOS glider DAC. _server = "https://gliders.ioos.us/erddap" @functools.lru_cache(maxsize=128) -def _to_pandas_multiple(glider_grab): - """Thin wrapper to cache the results when multiple datasets are requested.""" +def _to_pandas_multiple(glider_grab: "GliderDataFetcher") -> pd.DataFrame: + """Thin wrapper to cache results when multiple datasets are requested.""" df_all = {} glider_grab_copy = copy(glider_grab) for dataset_id in glider_grab_copy.datasets["Dataset ID"]: glider_grab_copy.fetcher.dataset_id = dataset_id - df = glider_grab_copy.fetcher.to_pandas() + glider_df = glider_grab_copy.fetcher.to_pandas() dataset_url = glider_grab_copy.fetcher.get_download_url().split("?")[0] - df = standardise_df(df, dataset_url) - df_all.update({dataset_id: df}) + glider_df = standardise_df(glider_df, dataset_url) + df_all.update({dataset_id: glider_df}) return df_all -def standardise_df(df, dataset_url): - """ - Standardise variable names in a dataset and add column for url - """ - df.columns = df.columns.str.lower() - df = df.set_index("time (utc)") - df = df.rename(columns=server_parameter_rename) - df.index = pd.to_datetime(df.index) - # We need to sort b/c of the non-sequential submission of files due to the nature of glider data transmission. - df = df.sort_index() - df["dataset_url"] = dataset_url - return df +def standardise_df(glider_df: pd.DataFrame, dataset_url: str) -> pd.DataFrame: + """Standardise variable names in a dataset and add column for URL.""" + glider_df.columns = glider_df.columns.str.lower() + glider_df = glider_df.set_index("time (utc)") + glider_df = glider_df.rename(columns=server_parameter_rename) + glider_df.index = pd.to_datetime(glider_df.index) + # We need to sort b/c of the non-sequential submission of files due to + # the nature of glider data transmission. + glider_df = glider_df.sort_index() + glider_df["dataset_url"] = dataset_url + return glider_df class GliderDataFetcher: - """ + """Instantiate the glider fetcher. + Args: + ---- server: A glider ERDDAP server URL. Attributes: + ---------- dataset_id: A dataset unique id. constraints: Download constraints, defaults same as query. """ - def __init__(self, server=_server): + def __init__( + self: "GliderDataFetcher", + server: OptionalStr = _server, + ) -> None: + """Instantiate main class attributes.""" self.server = server self.fetcher = ERDDAP( server=server, @@ -70,43 +80,45 @@ def __init__(self, server=_server): ) self.fetcher.variables = server_vars[server] self.fetcher.dataset_id: OptionalStr = None - self.datasets: Optional = None + self.datasets: OptionalBool = None - def to_pandas(self): - """ - Fetches data from the server and reads into a pandas dataframe + def to_pandas(self: "GliderDataFetcher") -> pd.DataFrame: + """Return data from the server as a pandas dataframe. - :return: pandas dataframe with datetime UTC as index, multiple dataset_ids dataframes are stored in a dictionary + :return: pandas a dataframe with datetime UTC as index, + multiple dataset_ids dataframes are stored in a dictionary """ if self.fetcher.dataset_id: - df = self.fetcher.to_pandas() + glider_df = self.fetcher.to_pandas() elif not self.fetcher.dataset_id and self.datasets is not None: - df_all = _to_pandas_multiple(self) - # We need to reset to avoid fetching a single dataset_id when making multiple requests. + glider_df = _to_pandas_multiple(self) + # We need to reset to avoid fetching a single dataset_id when + # making multiple requests. self.fetcher.dataset_id = None - return df_all + return glider_df else: - raise ValueError( - f"Must provide a {self.fetcher.dataset_id} or `query` terms to download data.", + msg = ( + f"Must provide a {self.fetcher.dataset_id} or " + "`query` terms to download data." ) + raise ValueError(msg) # Standardize variable names for the single dataset_id. dataset_url = self.fetcher.get_download_url().split("?")[0] - df = standardise_df(df, dataset_url) - return df - - def query( - self, - min_lat=None, - max_lat=None, - min_lon=None, - max_lon=None, - min_time=None, - max_time=None, - delayed=False, - ): - """ - Takes user supplied geographical and time constraints and adds them to the query + return standardise_df(glider_df, dataset_url) + + def query( # noqa: PLR0913 + self: "GliderDataFetcher", + *, + min_lat: OptionalNum = None, + max_lat: OptionalNum = None, + min_lon: OptionalNum = None, + max_lon: OptionalNum = None, + min_time: OptionalDateTime = None, + max_time: OptionalDateTime = None, + delayed: OptionalBool = False, + ) -> pd.DataFrame: + """Add user supplied geographical and time constraints to the query. :param min_lat: southernmost lat :param max_lat: northermost lat @@ -116,7 +128,8 @@ def query( :param max_time: end time, can be datetime object or string :return: search query with argument constraints applied """ - # FIXME: The time constrain could be better implemented by just dropping it instead. + # NB: The time constrain could be better implemented by just + # dropping it instead. min_time = min_time if min_time else "1970-01-01" max_time = max_time if max_time else "2038-01-19" min_lat = min_lat if min_lat else -90.0 @@ -147,19 +160,28 @@ def query( try: data = urlopen(url) except httpx.HTTPError as err: - raise Exception( - f"Error, no datasets found in supplied range. Try relaxing your constraints: {self.fetcher.constraints}", - ) from err - return None - df = pd.read_csv(data)[["Title", "Institution", "Dataset ID"]] + msg = ( + "Error, no datasets found in supplied range. " + f"Try relaxing the constraints: {self.fetcher.constraints}" + ) + err.message = f"{err.message}\n{msg}" + raise + + cols = ["Title", "Institution", "Dataset ID"] + datasets = pd.read_csv(data)[cols] if not delayed: - df = df.loc[~df["Dataset ID"].str.endswith("delayed")] + datasets = datasets.loc[ + ~datasets["Dataset ID"].str.endswith("delayed") + ] info_urls = [ - self.fetcher.get_info_url(dataset_id=dataset_id, response="html") - for dataset_id in df["Dataset ID"] + self.fetcher.get_info_url( + dataset_id=dataset_id, + response="html", + ) + for dataset_id in datasets["Dataset ID"] ] - df["info_url"] = info_urls - self.datasets = df + datasets["info_url"] = info_urls + self.datasets = datasets return self.datasets @@ -167,19 +189,29 @@ class DatasetList: """Build a glider dataset ids list. - Attributes: + Attributes + ---------- e: an ERDDAP server instance - TODO: search_terms: A list of terms to search the server for. Multiple terms will be combined as "AND." + TODO -> search_terms: A list of terms to search the server for. + Multiple terms will be combined as "AND." """ - def __init__(self, server=_server): + def __init__(self: "DatasetList", server: OptionalStr = _server) -> None: + """Instantiate main class attributes. + + Attributes + ---------- + server: the server URL. + protocol: ERDDAP's protocol (tabledap/griddap) + + """ self.e = ERDDAP( server=server, protocol="tabledap", ) - def get_ids(self): + def get_ids(self: "DatasetList") -> list: """Return the allDatasets list for the glider server.""" if self.e.server == "https://gliders.ioos.us/erddap": self.e.dataset_id = "allDatasets" @@ -187,5 +219,5 @@ def get_ids(self): dataset_ids.remove("allDatasets") self.dataset_ids = dataset_ids return self.dataset_ids - else: - raise ValueError(f"The {self.e.server} does not supported this operation.") + msg = f"The {self.e.server} does not supported this operation." + raise ValueError(msg) diff --git a/gliderpy/plotters.py b/gliderpy/plotters.py index 5c81ed1..0b642cd 100644 --- a/gliderpy/plotters.py +++ b/gliderpy/plotters.py @@ -1,27 +1,30 @@ -""" -Some convenience functions to help visualize glider data. -""" +"""Some convenience functions to help visualize glider data.""" + +from __future__ import annotations import warnings +from typing import TYPE_CHECKING try: import cartopy.crs as ccrs import matplotlib.dates as mdates import matplotlib.pyplot as plt -except ModuleNotFoundError as err: +except ModuleNotFoundError: warnings.warn( "gliderpy requires matplotlib and cartopy for plotting.", stacklevel=1, ) - raise err + raise -def plot_track(df): - """ - Plots a track of glider path coloured by temperature +if TYPE_CHECKING: + import pandas as pd + +def plot_track(df: pd.DataFrame) -> tuple(plt.Figure, plt.Axes): + """Plot a track of glider path coloured by temperature. + :return: figures, axes """ - x = df["longitude (degrees_east)"] y = df["latitude (degrees_north)"] dx, dy = 2, 4 @@ -36,9 +39,13 @@ def plot_track(df): return fig, ax -def plot_transect(df, var, **kw): - """ - Makes a scatter plot of depth vs time coloured by a user defined variable +def plot_transect( + df: pd.DataFrame, + var: str, **kw: dict, +) -> tuple(plt.Figure, plt.Axes): + """Make a scatter plot of depth vs time coloured by a user defined + variable. + :param var: variable to colour the scatter plot :return: figure, axes """ diff --git a/gliderpy/servers.py b/gliderpy/servers.py index eae8b4f..b3c2f22 100644 --- a/gliderpy/servers.py +++ b/gliderpy/servers.py @@ -1,7 +1,4 @@ -""" -Server names and aliases that point to an ERDDAP instance - -""" +"""Server names and aliases that point to an ERDDAP instance.""" server_vars = { "https://gliders.ioos.us/erddap": [ diff --git a/pyproject.toml b/pyproject.toml index 7223fce..2c5543e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,11 +16,9 @@ maintainers = [ {name = "Lindsay Abrams"}, {name = "Filipe Fernandes"}, ] -requires-python = ">=3.8" +requires-python = ">=3.10" classifiers = [ "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", @@ -47,33 +45,6 @@ write_to = "gliderpy/_version.py" write_to_template = "__version__ = '{version}'" tag_regex = "^(?Pv)?(?P[^\\+]+)(?P.*)?$" -[tool.ruff] -select = [ - "A", # flake8-builtins - "B", # flake8-bugbear - "C4", # flake8-comprehensions - "F", # flakes - "I", # import sorting - "T20", # flake8-print - "UP", # upgrade - "ERA", # flake8-eradicate/eradicate (remove commented out code) - "PIE", # flake8-pie (misc lints) - "SIM", # flake8-simplify (reduce code complexity) - "TID", # flake8-tidy-imports - "TCH", # flake8-type-checking - "N", # pep8-naming - "RUF", # Ruff-specific rules -] -target-version = "py38" -line-length = 79 - -[tool.ruff.per-file-ignores] -"docs/source/conf.py" = [ - "E402", - "A001", - "ERA001", -] - [tool.check-manifest] ignore = [ "*.yml", diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 0000000..3f04dbc --- /dev/null +++ b/ruff.toml @@ -0,0 +1,44 @@ +line-length = 79 + +lint.select = ["ALL"] + +lint.ignore = [ + "D203", # 1 blank line required before class docstring + "D205", # 1 blank line required between summary line and description + "D213", # incompatible. Ignoring `multi-line-summary-second-line` + "TRY003", # Avoid specifying long messages outside the exception class +] + +[lint.extend-per-file-ignores] +"docs/source/conf.py" = [ + "A001", # builtin-variable-shadowing + "D100", # Missing docstring in public module + "E402", # Module level import not at top of file + "ERA001", # Found commented-out code + "ERA001", # Found commented-out code + "EXE001", # Shebang is present but file is not executable +] +"test_*.py" = [ + "ANN001", # Missing type annotation for function argument + "ANN201", # Missing return type annotation for public function + "ANN202", # Missing return type annotation for private function + "INP001", # File is part of an implicit namespace package + "PD901", # Avoid using the generic variable name `df` for DataFrames + "S101", # Use of assert detected +] +# nbqa-ruff acts on converted .py so we cannot glob .ipynb :-/ +# https://github.com/nbQA-dev/nbQA/issues/823 +"notebooks/*" = [ + "ANN001", # Missing type annotation for function argument + "ANN201", # Missing return type annotation for public function + "B018", # Found useless expression. Either assign it to a variable or remove it + "D100", # Missing docstring in public module + "D103", # Missing docstring in public function + "E402", # Module level import not at top of file + "FBT003", # Boolean positional value in function call + "INP001", # File is part of an implicit namespace package + "PD901", # Avoid using the generic variable name `df` for DataFrames + "T201", # `print` found" +] +[lint.pycodestyle] +max-doc-length = 180 diff --git a/tests/test_fetchers.py b/tests/test_fetchers.py index c64a3a4..5f65101 100644 --- a/tests/test_fetchers.py +++ b/tests/test_fetchers.py @@ -1,18 +1,23 @@ +"""Test Fetchers.""" + + import pytest from gliderpy.fetchers import GliderDataFetcher from gliderpy.servers import server_parameter_rename -@pytest.fixture -@pytest.mark.web +@pytest.fixture() +@pytest.mark.web() def glider_grab(): + """"Create the basic query object for testing.""" g = GliderDataFetcher() g.dataset_id = "whoi_406-20160902T1700" - yield g + return g def test_variables(glider_grab): + """Check if expected variables are being fetched.""" expected = [ "latitude", "longitude", @@ -26,6 +31,7 @@ def test_variables(glider_grab): def test_standardise_variables_ioos(): + """Check if IOOS variables are properly renamed.""" glider_grab = GliderDataFetcher() glider_grab.fetcher.dataset_id = "whoi_406-20160902T1700" df = glider_grab.to_pandas()