From d0fa8808c22b09931ba9803efdd59d84b666c3bd Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Fri, 13 Oct 2023 10:21:01 -0300 Subject: [PATCH 01/10] use a single server and add info URL to the fetcher results --- gliderpy/fetchers.py | 55 ++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/gliderpy/fetchers.py b/gliderpy/fetchers.py index 5d52167..1725aaf 100644 --- a/gliderpy/fetchers.py +++ b/gliderpy/fetchers.py @@ -12,13 +12,12 @@ from gliderpy.servers import ( server_parameter_rename, - server_select, server_vars, ) OptionalStr = Optional[str] -# This defaults to the IOOS glider DAC. +# Defaults to the IOOS glider DAC. _server = "https://gliders.ioos.us/erddap" @@ -36,16 +35,15 @@ def standardise_df(df, dataset_url): class GliderDataFetcher: """ Args: - server: a glider ERDDAP server URL + server: A glider ERDDAP server URL. Attributes: - dataset_id: a dataset unique id. - constraints: download constraints, default + dataset_id: A dataset unique id. + constraints: Download constraints, defaults same as query. """ def __init__(self, server=_server): - server = server_select(server) self.server = server self.fetcher = ERDDAP( server=server, @@ -81,12 +79,21 @@ def to_pandas(self): index_col="time (UTC)", parse_dates=True, ) - # Standardize variable names + # Standardize variable names. dataset_url = self.fetcher.get_download_url().split("?")[0] df = standardise_df(df, dataset_url) return df - def query(self, min_lat, max_lat, min_lon, max_lon, min_time, max_time): + def query( + self, + min_lat, + max_lat, + min_lon, + max_lon, + min_time, + max_time, + delayed=False, + ): """ Takes user supplied geographical and time constraints and adds them to the query @@ -106,7 +113,7 @@ def query(self, min_lat, max_lat, min_lon, max_lon, min_time, max_time): "longitude>=": min_lon, "longitude<=": max_lon, } - if not self.fetcher.dataset_id: + if not self.datasets: url = self.fetcher.get_search_url( search_for="glider", response="csv", @@ -117,6 +124,7 @@ def query(self, min_lat, max_lat, min_lon, max_lon, min_time, max_time): min_time=min_time, max_time=max_time, ) + self.query_url = url try: data = urlopen(url) except httpx.HTTPError as err: @@ -124,20 +132,16 @@ def query(self, min_lat, max_lat, min_lon, max_lon, min_time, max_time): f"Error, no datasets found in supplied range. Try relaxing your constraints: {self.fetcher.constraints}", ) from err return None - df = pd.read_csv(data) - self.datasets = df["Dataset ID"] - return df[["Title", "Institution", "Dataset ID"]] - - return self - - def platform(self, platform): - """ - - :param platform: platform and deployment id from ifremer - :return: search query with platform constraint applied - """ - self.fetcher.constraints["platform_deployment="] = platform - return self + df = pd.read_csv(data)[["Title", "Institution", "Dataset ID"]] + if not delayed: + df = df.loc[~df["Dataset ID"].str.endswith("delayed")] + info_urls = [ + self.fetcher.get_info_url(dataset_id=dataset_id, response="html") + for dataset_id in df["Dataset ID"] + ] + df["info_url"] = info_urls + self.datasets = df + return self.datasets class DatasetList: @@ -146,7 +150,7 @@ class DatasetList: Attributes: e: an ERDDAP server instance - TODO: search_terms: A list of terms to search the server for. Multiple terms will be combined as AND + TODO: search_terms: A list of terms to search the server for. Multiple terms will be combined as "AND." """ @@ -166,6 +170,3 @@ def get_ids(self): return self.dataset_ids else: raise ValueError(f"The {self.e.server} does not supported this operation.") - # TODO: List the platform_deployment variable - # if self.e.server == "https://erddap.ifremer.fr/erddap": - # platform_deployment From 212dc8c63813520c8c3bd0819c9846990e6c8167 Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Fri, 13 Oct 2023 10:21:37 -0300 Subject: [PATCH 02/10] add profile_id to the variables and remove all servers but gliderdac --- gliderpy/servers.py | 42 +----------------------------------------- 1 file changed, 1 insertion(+), 41 deletions(-) diff --git a/gliderpy/servers.py b/gliderpy/servers.py index bc02ee2..412412b 100644 --- a/gliderpy/servers.py +++ b/gliderpy/servers.py @@ -4,17 +4,6 @@ """ -server_alias = { - "National Glider Data Assembly Center": "https://gliders.ioos.us/erddap", - "NGDAC": "https://gliders.ioos.us/erddap", - "IOOS": "https://gliders.ioos.us/erddap", - "Ocean Observatories Initiative": "https://erddap.dataexplorer.oceanobservatories.org/erddap/index.html", - "OOI": "https://erddap.dataexplorer.oceanobservatories.org/erddap/index.html", - "Institut français de recherche pour l'exploitation de la mer": "https://www.ifremer.fr/erddap", - "ifremer": "https://www.ifremer.fr/erddap", - "ifremer.fr": "https://www.ifremer.fr/erddap", -} - server_vars = { "https://gliders.ioos.us/erddap": [ "pressure", @@ -23,14 +12,7 @@ "salinity", "temperature", "time", - ], - "http://www.ifremer.fr/erddap": [ - "time", - "latitude", - "longitude", - "PSAL", - "TEMP", - "PRES", + "profile_id", ], } @@ -48,25 +30,3 @@ "ctdgv_m_glider_instrument_sci_water_pressure_dbar (dbar)": "pressure", "dataset_url": "dataset_url", } - - -def server_select(server_string): - """ - Attempts to match the supplied string to a known ERDDAP server by address or alias - """ - if server_string in server_vars: - # If string matches exactly, return unchanged - return server_string - for server in server_vars: - # If string contains base ERDDAP address, return base ERDDAP address - if server in server_string: - return server - for alias in server_alias: - # If string matches one of the aliases, return the corresponding ERDDAP address - if server_string.lower() == alias.lower(): - return server_alias[alias] - # If the server is not recognised, print options of working servers and exit - raise ValueError( - "Supplied server/alias not recognised. Please use one of the following supported servers:\n" - f"{str(server_vars.keys())[10:-1]}", - ) From 2926fc159bea80c637365eea5faf4d4a9e33ae5a Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Fri, 13 Oct 2023 10:21:50 -0300 Subject: [PATCH 03/10] update pre-commits --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 077656e..59186df 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: trailing-whitespace - id: check-ast @@ -20,7 +20,7 @@ repos: language_version: python3 - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.5.1 + rev: v1.6.0 hooks: - id: mypy exclude: docs/source/conf.py From a01cbfc38d7b87b6aa410579555739a13a917feb Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Fri, 13 Oct 2023 10:22:02 -0300 Subject: [PATCH 04/10] test on py312 --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 73379ee..02e1091 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -10,7 +10,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10", "3.11", "3.12"] os: [windows-latest, ubuntu-latest, macos-latest] fail-fast: false From 62ca9547ea54fd33eadd3cbd8e60bb39ea21a228 Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Fri, 13 Oct 2023 10:22:09 -0300 Subject: [PATCH 05/10] remove env file --- environment.yml | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 environment.yml diff --git a/environment.yml b/environment.yml deleted file mode 100644 index b454c45..0000000 --- a/environment.yml +++ /dev/null @@ -1,9 +0,0 @@ -name: gliderpy -channels: - - conda-forge -dependencies: - - pandas - - erddapy - - matplotlib - - cartopy - - palettable From d07a14856f43a04b9f00fef74c910a09915acd79 Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Fri, 13 Oct 2023 10:46:07 -0300 Subject: [PATCH 06/10] remove ifremer tests --- tests/test_fetchers.py | 36 ++---------------------------------- 1 file changed, 2 insertions(+), 34 deletions(-) diff --git a/tests/test_fetchers.py b/tests/test_fetchers.py index e068adb..c64a3a4 100644 --- a/tests/test_fetchers.py +++ b/tests/test_fetchers.py @@ -14,9 +14,10 @@ def glider_grab(): def test_variables(glider_grab): expected = [ - "pressure", "latitude", "longitude", + "pressure", + "profile_id", "salinity", "temperature", "time", @@ -24,29 +25,6 @@ def test_variables(glider_grab): assert sorted(glider_grab.fetcher.variables) == sorted(expected) -# As above for ifremer ERDDAP - - -@pytest.fixture -@pytest.mark.web -def glider_grab_ifr(): - g = GliderDataFetcher("http://www.ifremer.fr/erddap") - g.fetcher.dataset_id = "OceanGlidersGDACTrajectories" - yield g - - -def test_variables_ifr(glider_grab_ifr): - expected = [ - "latitude", - "longitude", - "PRES", - "PSAL", - "TEMP", - "time", - ] - assert sorted(glider_grab_ifr.fetcher.variables) == sorted(expected) - - def test_standardise_variables_ioos(): glider_grab = GliderDataFetcher() glider_grab.fetcher.dataset_id = "whoi_406-20160902T1700" @@ -54,13 +32,3 @@ def test_standardise_variables_ioos(): variables = df.columns for var in variables: assert var in server_parameter_rename.values() - - -def test_standardise_variables_ifremer(): - glider_grab = GliderDataFetcher("http://www.ifremer.fr/erddap") - glider_grab.fetcher.dataset_id = "OceanGlidersGDACTrajectories" - glider_grab.query(-90, 90, -180, 180, "2015-09-20", "2015-09-27") - df = glider_grab.to_pandas() - variables = df.columns - for var in variables: - assert var in server_parameter_rename.values() From 6adfaff5385f82ff3c3bf9f4882b469470d64ce9 Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Fri, 13 Oct 2023 10:46:12 -0300 Subject: [PATCH 07/10] add profile_id --- gliderpy/servers.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/gliderpy/servers.py b/gliderpy/servers.py index 412412b..b33d3de 100644 --- a/gliderpy/servers.py +++ b/gliderpy/servers.py @@ -6,27 +6,28 @@ server_vars = { "https://gliders.ioos.us/erddap": [ - "pressure", "latitude", "longitude", + "pressure", + "profile_id", "salinity", "temperature", "time", - "profile_id", ], } server_parameter_rename = { - "latitude (degrees_north)": "latitude", - "longitude (degrees_east)": "longitude", - "salinity (1)": "salinity", - "psal (psu)": "salinity", "ctdgv_m_glider_instrument_practical_salinity (1)": "salinity", - "temperature (celsius)": "temperature", - "temp (degree_celsius)": "temperature", + "ctdgv_m_glider_instrument_sci_water_pressure_dbar (dbar)": "pressure", "ctdgv_m_glider_instrument_sci_water_temp (deg_c)": "temperature", + "dataset_url": "dataset_url", + "latitude (degrees_north)": "latitude", + "longitude (degrees_east)": "longitude", "pres (decibar)": "pressure", "pressure (dbar)": "pressure", - "ctdgv_m_glider_instrument_sci_water_pressure_dbar (dbar)": "pressure", - "dataset_url": "dataset_url", + "profile_id": "profile_id", + "psal (psu)": "salinity", + "salinity (1)": "salinity", + "temp (degree_celsius)": "temperature", + "temperature (celsius)": "temperature", } From 7bc600b7111c8d3ee94996a1224a564b926c0c87 Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Fri, 13 Oct 2023 11:16:02 -0300 Subject: [PATCH 08/10] fix fetcher for single and multiple datasets --- gliderpy/fetchers.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/gliderpy/fetchers.py b/gliderpy/fetchers.py index 1725aaf..ab547da 100644 --- a/gliderpy/fetchers.py +++ b/gliderpy/fetchers.py @@ -59,9 +59,14 @@ def to_pandas(self): :return: pandas dataframe with datetime UTC as index """ - if type(self.datasets) is pd.Series: + if self.fetcher.dataset_id: + df = self.fetcher.to_pandas( + index_col="time (UTC)", + parse_dates=True, + ) + elif not self.fetcher.dataset_id and self.datasets is not None: df_all = [] - for dataset_id in self.datasets: + for dataset_id in self.datasets["Dataset ID"]: self.fetcher.dataset_id = dataset_id df = self.fetcher.to_pandas( index_col="time (UTC)", @@ -71,14 +76,11 @@ def to_pandas(self): df = standardise_df(df, dataset_url) df_all.append(df) return pd.concat(df_all) + else: + raise ValueError( + f"Must provide a {self.fetcher.dataset_id} or `query` terms to download data.", + ) - if not self.fetcher.dataset_id: - return None - - df = self.fetcher.to_pandas( - index_col="time (UTC)", - parse_dates=True, - ) # Standardize variable names. dataset_url = self.fetcher.get_download_url().split("?")[0] df = standardise_df(df, dataset_url) From 6c60977035db751debe89cec4f524fa2688765e0 Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Fri, 13 Oct 2023 11:16:16 -0300 Subject: [PATCH 09/10] simplify docs --- notebooks/00-quick_intro.ipynb | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/notebooks/00-quick_intro.ipynb b/notebooks/00-quick_intro.ipynb index a1e827e..7040ec2 100644 --- a/notebooks/00-quick_intro.ipynb +++ b/notebooks/00-quick_intro.ipynb @@ -43,6 +43,7 @@ " \"longitude\",\n", " \"salinity\",\n", " \"temperature\",\n", + " \"profile_id\",\n", " \"time\",\n", "]\n", "\n", @@ -79,16 +80,18 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The variable names are standardised by gliderpy, making it easier to fetch from different data sources and comparing the results.\n", + "Much easier, right?\n", + "The variable names are standardized by gliderpy,\n", + "making it easier to fetch from different data sources and comparing the results.\n", "\n", - "`gliderpy` can subset the data on the server side by passing a geographic bounding box and time interval." + "The `gliderpy` libray can subset the data on the server side by passing a geographic bounding box and time interval." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Multiple datasets\n", + "### Querying multiple datasets\n", "\n", "The most common use is to search all datasets for data that falls within the certain space-time bounds." ] @@ -101,7 +104,16 @@ "source": [ "glider_grab = GliderDataFetcher()\n", "\n", - "glider_grab.query(10, 40, -90, 8, \"2010-01-01\", \"2013-06-02\")\n", + "df = glider_grab.query(10, 40, -90, 8, \"2010-01-01\", \"2013-06-02\")\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ "df = glider_grab.to_pandas()\n", "df.head()" ] @@ -112,7 +124,7 @@ "source": [ "### Dataset search\n", "\n", - "One can query all dataset_ids available." + "One can query all `dataset_id`s available in the server." ] }, { @@ -146,7 +158,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.11.6" } }, "nbformat": 4, From 6eac8ad3d3c125966069fcbea80dc04cce643cc1 Mon Sep 17 00:00:00 2001 From: Filipe Fernandes Date: Fri, 13 Oct 2023 11:19:03 -0300 Subject: [PATCH 10/10] fix typo --- notebooks/00-quick_intro.ipynb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/notebooks/00-quick_intro.ipynb b/notebooks/00-quick_intro.ipynb index 7040ec2..3e05290 100644 --- a/notebooks/00-quick_intro.ipynb +++ b/notebooks/00-quick_intro.ipynb @@ -19,7 +19,7 @@ "```\n", "\n", "gliderpy aims to make querying and downloading glider data easier.\n", - "Here is how one would build a query using `erddapy`:" + "Here is how one would build a query using erddapy:" ] }, { @@ -84,7 +84,7 @@ "The variable names are standardized by gliderpy,\n", "making it easier to fetch from different data sources and comparing the results.\n", "\n", - "The `gliderpy` libray can subset the data on the server side by passing a geographic bounding box and time interval." + "The gliderpy library can subset the data on the server side by passing a geographic bounding box and time interval." ] }, { @@ -124,7 +124,7 @@ "source": [ "### Dataset search\n", "\n", - "One can query all `dataset_id`s available in the server." + "One can query all dataset_ids available in the server." ] }, { @@ -138,7 +138,7 @@ "datasets = DatasetList()\n", "ds_ids = datasets.get_ids()\n", "\n", - "print(f\"found {len(ds_ids)} glider datasets on the server {datasets.e.server}\")" + "print(f\"found {len(ds_ids)} glider datasets on the server {datasets.e.server}.\")" ] } ],