From cc4fe09b8bbe1f330558a6590482931b24ecd911 Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Thu, 1 Feb 2024 09:58:24 -0300 Subject: [PATCH 1/4] update pre-commits --- .pre-commit-config.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a9e63ce..a2ff450 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,7 +14,7 @@ repos: files: requirements-dev.txt - repo: https://github.com/psf/black - rev: 23.12.1 + rev: 24.1.1 hooks: - id: black language_version: python3 @@ -33,10 +33,10 @@ repos: - id: blackdoc - repo: https://github.com/econchick/interrogate - rev: 1.5.0 + rev: 237be78f9c6135fc1a620d211cdfdc5d3885082b hooks: - id: interrogate - exclude: ^(docs|setup.py|tests) + exclude: ^(docs|tests) args: [--config=pyproject.toml] - repo: https://github.com/codespell-project/codespell @@ -56,12 +56,12 @@ repos: - id: add-trailing-comma - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.9 + rev: v0.1.15 hooks: - id: ruff - repo: https://github.com/tox-dev/pyproject-fmt - rev: 1.5.3 + rev: 1.7.0 hooks: - id: pyproject-fmt From 3189496240664c7efda636472409df2931f11b3e Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Thu, 1 Feb 2024 10:45:00 -0300 Subject: [PATCH 2/4] minor refactoring and sort index --- gliderpy/fetchers.py | 19 ++++++++----------- gliderpy/servers.py | 2 +- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/gliderpy/fetchers.py b/gliderpy/fetchers.py index f5b0f83..c47df35 100644 --- a/gliderpy/fetchers.py +++ b/gliderpy/fetchers.py @@ -30,10 +30,7 @@ def _to_pandas_multiple(glider_grab): glider_grab_copy = copy(glider_grab) for dataset_id in glider_grab_copy.datasets["Dataset ID"]: glider_grab_copy.fetcher.dataset_id = dataset_id - df = glider_grab_copy.fetcher.to_pandas( - index_col="time (UTC)", - parse_dates=True, - ) + df = glider_grab_copy.fetcher.to_pandas() dataset_url = glider_grab_copy.fetcher.get_download_url().split("?")[0] df = standardise_df(df, dataset_url) df_all.update({dataset_id: df}) @@ -45,8 +42,11 @@ def standardise_df(df, dataset_url): Standardise variable names in a dataset and add column for url """ df.columns = df.columns.str.lower() - df.rename(columns=dict(server_parameter_rename), inplace=True) - df.index.rename("time", inplace=True) + df = df.set_index("time (utc)") + df = df.rename(columns=server_parameter_rename) + df.index = pd.to_datetime(df.index) + # We need to sort b/c of the non-sequential submission of files due to the nature of glider data transmission. + df = df.sort_index() df["dataset_url"] = dataset_url return df @@ -79,10 +79,7 @@ def to_pandas(self): :return: pandas dataframe with datetime UTC as index, multiple dataset_ids dataframes are stored in a dictionary """ if self.fetcher.dataset_id: - df = self.fetcher.to_pandas( - index_col="time (UTC)", - parse_dates=True, - ) + df = self.fetcher.to_pandas() elif not self.fetcher.dataset_id and self.datasets is not None: df_all = _to_pandas_multiple(self) # We need to reset to avoid fetching a single dataset_id when making multiple requests. @@ -93,7 +90,7 @@ def to_pandas(self): f"Must provide a {self.fetcher.dataset_id} or `query` terms to download data.", ) - # Standardize variable names. + # Standardize variable names for the single dataset_id. dataset_url = self.fetcher.get_download_url().split("?")[0] df = standardise_df(df, dataset_url) return df diff --git a/gliderpy/servers.py b/gliderpy/servers.py index b33d3de..eae8b4f 100644 --- a/gliderpy/servers.py +++ b/gliderpy/servers.py @@ -3,7 +3,6 @@ """ - server_vars = { "https://gliders.ioos.us/erddap": [ "latitude", @@ -30,4 +29,5 @@ "salinity (1)": "salinity", "temp (degree_celsius)": "temperature", "temperature (celsius)": "temperature", + "time (utc)": "time", } From 91ba0dd8ffa5a6ae1df079c81b20570d8803446b Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Thu, 1 Feb 2024 10:46:24 -0300 Subject: [PATCH 3/4] remove duplicated key --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 23b6634..7223fce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,7 +86,6 @@ ignore = [ "tests", "tests/*", ] -## [tool.pytest.ini_options] filterwarnings = [ From 70bc748e8375943aa1093a6294f0215522b3355d Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Thu, 1 Feb 2024 11:02:16 -0300 Subject: [PATCH 4/4] add pyarrow --- requirements-dev.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/requirements-dev.txt b/requirements-dev.txt index 77d68bc..c4c6685 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,3 +1,5 @@ +# Pyarrow will be required in pandas 3.0, +# added here for better performance and to avoid a deprecation warning. cartopy check-manifest jupyter @@ -6,6 +8,7 @@ nbconvert nbsphinx palettable pre-commit +pyarrow pytest pytest-cov pytest-flake8