diff --git a/gliderpy/fetchers.py b/gliderpy/fetchers.py index ab547da..82b6a93 100644 --- a/gliderpy/fetchers.py +++ b/gliderpy/fetchers.py @@ -3,6 +3,8 @@ """ +import functools +from copy import copy from typing import Optional import httpx @@ -21,6 +23,23 @@ _server = "https://gliders.ioos.us/erddap" +@functools.lru_cache(maxsize=128) +def _to_pandas_multiple(glider_grab): + """Thin wrapper to cache the results when multiple datasets are requested.""" + df_all = {} + glider_grab_copy = copy(glider_grab) + for dataset_id in glider_grab_copy.datasets["Dataset ID"]: + glider_grab_copy.fetcher.dataset_id = dataset_id + df = glider_grab_copy.fetcher.to_pandas( + index_col="time (UTC)", + parse_dates=True, + ) + dataset_url = glider_grab_copy.fetcher.get_download_url().split("?")[0] + df = standardise_df(df, dataset_url) + df_all.update({dataset_id: df}) + return df_all + + def standardise_df(df, dataset_url): """ Standardise variable names in a dataset and add column for url @@ -57,7 +76,7 @@ def to_pandas(self): """ Fetches data from the server and reads into a pandas dataframe - :return: pandas dataframe with datetime UTC as index + :return: pandas dataframe with datetime UTC as index, multiple dataset_ids dataframes are stored in a dictionary """ if self.fetcher.dataset_id: df = self.fetcher.to_pandas( @@ -65,17 +84,10 @@ def to_pandas(self): parse_dates=True, ) elif not self.fetcher.dataset_id and self.datasets is not None: - df_all = [] - for dataset_id in self.datasets["Dataset ID"]: - self.fetcher.dataset_id = dataset_id - df = self.fetcher.to_pandas( - index_col="time (UTC)", - parse_dates=True, - ) - dataset_url = self.fetcher.get_download_url().split("?")[0] - df = standardise_df(df, dataset_url) - df_all.append(df) - return pd.concat(df_all) + df_all = _to_pandas_multiple(self) + # We need to reset to avoid fetching a single dataset_id when making multiple requests. + self.fetcher.dataset_id = None + return df_all else: raise ValueError( f"Must provide a {self.fetcher.dataset_id} or `query` terms to download data.", diff --git a/notebooks/00-quick_intro.ipynb b/notebooks/00-quick_intro.ipynb index 3e05290..42920f5 100644 --- a/notebooks/00-quick_intro.ipynb +++ b/notebooks/00-quick_intro.ipynb @@ -114,8 +114,17 @@ "metadata": {}, "outputs": [], "source": [ - "df = glider_grab.to_pandas()\n", - "df.head()" + "datasets = glider_grab.to_pandas()\n", + "datasets.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "datasets[\"ru23-20121025T1944\"].head()" ] }, {