Skip to content

Commit

Permalink
enable all lints in ruff
Browse files Browse the repository at this point in the history
  • Loading branch information
ocefpaf committed Mar 6, 2024
1 parent 399041c commit 7213460
Show file tree
Hide file tree
Showing 8 changed files with 181 additions and 130 deletions.
10 changes: 2 additions & 8 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,6 @@ repos:
- id: file-contents-sorter
files: requirements-dev.txt

- repo: https://github.com/psf/black
rev: 24.1.1
hooks:
- id: black
language_version: python3

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.8.0
hooks:
Expand All @@ -33,7 +27,7 @@ repos:
- id: blackdoc

- repo: https://github.com/econchick/interrogate
rev: 1.5.0
rev: 237be78f9c6135fc1a620d211cdfdc5d3885082b
hooks:
- id: interrogate
exclude: ^(docs|tests)
Expand All @@ -56,7 +50,7 @@ repos:
- id: add-trailing-comma

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.2.0
rev: v0.3.0
hooks:
- id: ruff

Expand Down
10 changes: 5 additions & 5 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@
# The master toctree document.
master_doc = "index"

from datetime import date
import datetime

year = date.today().year
year = datetime.datetime.now(tz=datetime.timezone.utc).date().year

# General information about the project.
project = "gliderpy"
Expand All @@ -62,11 +62,11 @@
# |version| and |release|, also used in various other places throughout the
# built documents.
#
from gliderpy import __version__ as VERSION # noqa
from gliderpy import __version__

version = VERSION
version = __version__
# The full version, including alpha/beta/rc tags.
release = VERSION
release = __version__

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
168 changes: 100 additions & 68 deletions gliderpy/fetchers.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
"""
Helper methods to fetch glider data from multiple ERDDAP serves
"""
"""Helper methods to fetch glider data from multiple ERDDAP serves."""

import datetime
import functools
from copy import copy
from typing import Optional
from numbers import Number

import httpx
import pandas as pd
Expand All @@ -17,96 +15,110 @@
server_vars,
)

OptionalStr = Optional[str]
OptionalBool = bool | None
OptionalDict = dict | None
OptionalList = list[str] | tuple[str] | None
OptionalStr = str | None
OptionalNum = Number | None
# Should we add more or datetime.datetime catches all?
OptionalDateTime = datetime.datetime | str

# Defaults to the IOOS glider DAC.
_server = "https://gliders.ioos.us/erddap"


@functools.lru_cache(maxsize=128)
def _to_pandas_multiple(glider_grab):
"""Thin wrapper to cache the results when multiple datasets are requested."""
def _to_pandas_multiple(glider_grab: "GliderDataFetcher") -> pd.DataFrame:
"""Thin wrapper to cache results when multiple datasets are requested."""
df_all = {}
glider_grab_copy = copy(glider_grab)
for dataset_id in glider_grab_copy.datasets["Dataset ID"]:
glider_grab_copy.fetcher.dataset_id = dataset_id
df = glider_grab_copy.fetcher.to_pandas()
glider_df = glider_grab_copy.fetcher.to_pandas()
dataset_url = glider_grab_copy.fetcher.get_download_url().split("?")[0]
df = standardise_df(df, dataset_url)
df_all.update({dataset_id: df})
glider_df = standardise_df(glider_df, dataset_url)
df_all.update({dataset_id: glider_df})
return df_all


def standardise_df(df, dataset_url):
"""
Standardise variable names in a dataset and add column for url
"""
df.columns = df.columns.str.lower()
df = df.set_index("time (utc)")
df = df.rename(columns=server_parameter_rename)
df.index = pd.to_datetime(df.index)
# We need to sort b/c of the non-sequential submission of files due to the nature of glider data transmission.
df = df.sort_index()
df["dataset_url"] = dataset_url
return df
def standardise_df(glider_df: pd.DataFrame, dataset_url: str) -> pd.DataFrame:
"""Standardise variable names in a dataset and add column for URL."""
glider_df.columns = glider_df.columns.str.lower()
glider_df = glider_df.set_index("time (utc)")
glider_df = glider_df.rename(columns=server_parameter_rename)
glider_df.index = pd.to_datetime(glider_df.index)
# We need to sort b/c of the non-sequential submission of files due to
# the nature of glider data transmission.
glider_df = glider_df.sort_index()
glider_df["dataset_url"] = dataset_url
return glider_df


class GliderDataFetcher:
"""
"""Instantiate the glider fetcher.
Args:
----
server: A glider ERDDAP server URL.
Attributes:
----------
dataset_id: A dataset unique id.
constraints: Download constraints, defaults same as query.
"""

def __init__(self, server=_server):
def __init__(
self: "GliderDataFetcher",
server: OptionalStr = _server,
) -> None:
"""Instantiate main class attributes."""
self.server = server
self.fetcher = ERDDAP(
server=server,
protocol="tabledap",
)
self.fetcher.variables = server_vars[server]
self.fetcher.dataset_id: OptionalStr = None
self.datasets: Optional = None
self.datasets: OptionalBool = None

def to_pandas(self):
"""
Fetches data from the server and reads into a pandas dataframe
def to_pandas(self: "GliderDataFetcher") -> pd.DataFrame:
"""Return data from the server as a pandas dataframe.
:return: pandas dataframe with datetime UTC as index, multiple dataset_ids dataframes are stored in a dictionary
:return: pandas a dataframe with datetime UTC as index,
multiple dataset_ids dataframes are stored in a dictionary
"""
if self.fetcher.dataset_id:
df = self.fetcher.to_pandas()
glider_df = self.fetcher.to_pandas()
elif not self.fetcher.dataset_id and self.datasets is not None:
df_all = _to_pandas_multiple(self)
# We need to reset to avoid fetching a single dataset_id when making multiple requests.
glider_df = _to_pandas_multiple(self)
# We need to reset to avoid fetching a single dataset_id when
# making multiple requests.
self.fetcher.dataset_id = None
return df_all
return glider_df
else:
raise ValueError(
f"Must provide a {self.fetcher.dataset_id} or `query` terms to download data.",
msg = (
f"Must provide a {self.fetcher.dataset_id} or "
"`query` terms to download data."
)
raise ValueError(msg)

# Standardize variable names for the single dataset_id.
dataset_url = self.fetcher.get_download_url().split("?")[0]
df = standardise_df(df, dataset_url)
return df

def query(
self,
min_lat=None,
max_lat=None,
min_lon=None,
max_lon=None,
min_time=None,
max_time=None,
delayed=False,
):
"""
Takes user supplied geographical and time constraints and adds them to the query
return standardise_df(glider_df, dataset_url)

def query( # noqa: PLR0913
self: "GliderDataFetcher",
*,
min_lat: OptionalNum = None,
max_lat: OptionalNum = None,
min_lon: OptionalNum = None,
max_lon: OptionalNum = None,
min_time: OptionalDateTime = None,
max_time: OptionalDateTime = None,
delayed: OptionalBool = False,
) -> pd.DataFrame:
"""Add user supplied geographical and time constraints to the query.
:param min_lat: southernmost lat
:param max_lat: northermost lat
Expand All @@ -116,7 +128,8 @@ def query(
:param max_time: end time, can be datetime object or string
:return: search query with argument constraints applied
"""
# FIXME: The time constrain could be better implemented by just dropping it instead.
# NB: The time constrain could be better implemented by just
# dropping it instead.
min_time = min_time if min_time else "1970-01-01"
max_time = max_time if max_time else "2038-01-19"
min_lat = min_lat if min_lat else -90.0
Expand Down Expand Up @@ -147,45 +160,64 @@ def query(
try:
data = urlopen(url)
except httpx.HTTPError as err:
raise Exception(
f"Error, no datasets found in supplied range. Try relaxing your constraints: {self.fetcher.constraints}",
) from err
return None
df = pd.read_csv(data)[["Title", "Institution", "Dataset ID"]]
msg = (
"Error, no datasets found in supplied range. "
f"Try relaxing the constraints: {self.fetcher.constraints}"
)
err.message = f"{err.message}\n{msg}"
raise

cols = ["Title", "Institution", "Dataset ID"]
datasets = pd.read_csv(data)[cols]
if not delayed:
df = df.loc[~df["Dataset ID"].str.endswith("delayed")]
datasets = datasets.loc[
~datasets["Dataset ID"].str.endswith("delayed")
]
info_urls = [
self.fetcher.get_info_url(dataset_id=dataset_id, response="html")
for dataset_id in df["Dataset ID"]
self.fetcher.get_info_url(
dataset_id=dataset_id,
response="html",
)
for dataset_id in datasets["Dataset ID"]
]
df["info_url"] = info_urls
self.datasets = df
datasets["info_url"] = info_urls
self.datasets = datasets
return self.datasets


class DatasetList:
"""Build a glider dataset ids list.
Attributes:
Attributes
----------
e: an ERDDAP server instance
TODO: search_terms: A list of terms to search the server for. Multiple terms will be combined as "AND."
TODO -> search_terms: A list of terms to search the server for.
Multiple terms will be combined as "AND."
"""

def __init__(self, server=_server):
def __init__(self: "DatasetList", server: OptionalStr = _server) -> None:
"""Instantiate main class attributes.
Attributes
----------
server: the server URL.
protocol: ERDDAP's protocol (tabledap/griddap)
"""
self.e = ERDDAP(
server=server,
protocol="tabledap",
)

def get_ids(self):
def get_ids(self: "DatasetList") -> list:
"""Return the allDatasets list for the glider server."""
if self.e.server == "https://gliders.ioos.us/erddap":
self.e.dataset_id = "allDatasets"
dataset_ids = self.e.to_pandas()["datasetID"].to_list()
dataset_ids.remove("allDatasets")
self.dataset_ids = dataset_ids
return self.dataset_ids
else:
raise ValueError(f"The {self.e.server} does not supported this operation.")
msg = f"The {self.e.server} does not supported this operation."
raise ValueError(msg)
31 changes: 19 additions & 12 deletions gliderpy/plotters.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,30 @@
"""
Some convenience functions to help visualize glider data.
"""
"""Some convenience functions to help visualize glider data."""

from __future__ import annotations

import warnings
from typing import TYPE_CHECKING

try:
import cartopy.crs as ccrs
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
except ModuleNotFoundError as err:
except ModuleNotFoundError:
warnings.warn(
"gliderpy requires matplotlib and cartopy for plotting.",
stacklevel=1,
)
raise err
raise


def plot_track(df):
"""
Plots a track of glider path coloured by temperature
if TYPE_CHECKING:
import pandas as pd

def plot_track(df: pd.DataFrame) -> tuple(plt.Figure, plt.Axes):
"""Plot a track of glider path coloured by temperature.
:return: figures, axes
"""

x = df["longitude (degrees_east)"]
y = df["latitude (degrees_north)"]
dx, dy = 2, 4
Expand All @@ -36,9 +39,13 @@ def plot_track(df):
return fig, ax


def plot_transect(df, var, **kw):
"""
Makes a scatter plot of depth vs time coloured by a user defined variable
def plot_transect(
df: pd.DataFrame,
var: str, **kw: dict,
) -> tuple(plt.Figure, plt.Axes):
"""Make a scatter plot of depth vs time coloured by a user defined
variable.
:param var: variable to colour the scatter plot
:return: figure, axes
"""
Expand Down
5 changes: 1 addition & 4 deletions gliderpy/servers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
"""
Server names and aliases that point to an ERDDAP instance
"""
"""Server names and aliases that point to an ERDDAP instance."""

server_vars = {
"https://gliders.ioos.us/erddap": [
Expand Down
Loading

0 comments on commit 7213460

Please sign in to comment.