Skip to content

Commit

Permalink
Merge pull request #74 from ocefpaf/close_73
Browse files Browse the repository at this point in the history
implements a better multiple dfs return and cache results
  • Loading branch information
ocefpaf authored Oct 13, 2023
2 parents 59ac88b + 81fdc17 commit 29284a3
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 14 deletions.
36 changes: 24 additions & 12 deletions gliderpy/fetchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
"""

import functools
from copy import copy
from typing import Optional

import httpx
Expand All @@ -21,6 +23,23 @@
_server = "https://gliders.ioos.us/erddap"


@functools.lru_cache(maxsize=128)
def _to_pandas_multiple(glider_grab):
"""Thin wrapper to cache the results when multiple datasets are requested."""
df_all = {}
glider_grab_copy = copy(glider_grab)
for dataset_id in glider_grab_copy.datasets["Dataset ID"]:
glider_grab_copy.fetcher.dataset_id = dataset_id
df = glider_grab_copy.fetcher.to_pandas(
index_col="time (UTC)",
parse_dates=True,
)
dataset_url = glider_grab_copy.fetcher.get_download_url().split("?")[0]
df = standardise_df(df, dataset_url)
df_all.update({dataset_id: df})
return df_all


def standardise_df(df, dataset_url):
"""
Standardise variable names in a dataset and add column for url
Expand Down Expand Up @@ -57,25 +76,18 @@ def to_pandas(self):
"""
Fetches data from the server and reads into a pandas dataframe
:return: pandas dataframe with datetime UTC as index
:return: pandas dataframe with datetime UTC as index, multiple dataset_ids dataframes are stored in a dictionary
"""
if self.fetcher.dataset_id:
df = self.fetcher.to_pandas(
index_col="time (UTC)",
parse_dates=True,
)
elif not self.fetcher.dataset_id and self.datasets is not None:
df_all = []
for dataset_id in self.datasets["Dataset ID"]:
self.fetcher.dataset_id = dataset_id
df = self.fetcher.to_pandas(
index_col="time (UTC)",
parse_dates=True,
)
dataset_url = self.fetcher.get_download_url().split("?")[0]
df = standardise_df(df, dataset_url)
df_all.append(df)
return pd.concat(df_all)
df_all = _to_pandas_multiple(self)
# We need to reset to avoid fetching a single dataset_id when making multiple requests.
self.fetcher.dataset_id = None
return df_all
else:
raise ValueError(
f"Must provide a {self.fetcher.dataset_id} or `query` terms to download data.",
Expand Down
13 changes: 11 additions & 2 deletions notebooks/00-quick_intro.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,17 @@
"metadata": {},
"outputs": [],
"source": [
"df = glider_grab.to_pandas()\n",
"df.head()"
"datasets = glider_grab.to_pandas()\n",
"datasets.keys()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"datasets[\"ru23-20121025T1944\"].head()"
]
},
{
Expand Down

0 comments on commit 29284a3

Please sign in to comment.