From 520a96bea6bdf17ac26ff7f45a8a37c00baa5840 Mon Sep 17 00:00:00 2001 From: Romain Caneill Date: Tue, 30 Apr 2024 16:47:43 +0200 Subject: [PATCH 01/38] Update broken link --- docs/source/example workflow.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/example workflow.rst b/docs/source/example workflow.rst index 3e8f5bd..80b2736 100644 --- a/docs/source/example workflow.rst +++ b/docs/source/example workflow.rst @@ -12,7 +12,7 @@ dataretrieval Query for a GeoJSON from harmonize_wq import wrangle # File for area of interest - aoi_url = r'https://github.com/USEPA/harmonize-wq/raw/master/harmonize_wq/tests/data/PPBays_NCCA.geojson' + aoi_url = r'https://github.com/USEPA/harmonize-wq/raw/main/harmonize_wq/tests/data/PPBays_NCCA.geojson' # Build query query = {'characteristicName': ['Temperature, water', From 601cd7a1dfaf5477a29afbc85016f6b615940259 Mon Sep 17 00:00:00 2001 From: cristinamullin <46969696+cristinamullin@users.noreply.github.com> Date: Thu, 2 May 2024 13:13:27 -0400 Subject: [PATCH 02/38] Update Harmonize_Pensacola.Rmd --- demos/Harmonize_Pensacola.Rmd | 236 +++++++++++++++++++++------------- 1 file changed, 150 insertions(+), 86 deletions(-) diff --git a/demos/Harmonize_Pensacola.Rmd b/demos/Harmonize_Pensacola.Rmd index faec75c..cf87051 100644 --- a/demos/Harmonize_Pensacola.Rmd +++ b/demos/Harmonize_Pensacola.Rmd @@ -1,112 +1,163 @@ --- title: "harmonize-wq in R" +format: html +editor: visual author: "Justin Bousquin, Cristina Mullin, Marc Weber" -date: '2022-08-31' -output: rmarkdown::html_vignette +date: "`r Sys.Date()`" +output: + rmarkdown::html_vignette: + toc: true + fig_caption: yes + fig_height: 8 + fig_width: 8 vignette: > + %\VignetteEncoding{UTF-8} %\VignetteIndexEntry{harmonize-wq in R} - %\usepackage[utf8]{inputenc} %\VignetteEngine{knitr::rmarkdown} -editor_options: +editor_options: chunk_output_type: console + markdown: + wrap: 72 --- ```{r setup, include = FALSE} -# Set chunk options +library(knitr) + knitr::opts_chunk$set( - collapse = TRUE, - comment = "#>" + echo = TRUE, + warning = FALSE, + message = FALSE ) ``` -
- ## Overview -Standardize, clean, and wrangle Water Quality Portal data into more analytic-ready formats using the harmonize_wq package. US EPA’s Water Quality Portal (WQP) aggregates water quality, biological, and physical data provided by many organizations and has become an essential resource with tools to query and retrieval data using python or R. Given the variety of data and variety of data originators, using the data in analysis often requires data cleaning to ensure it meets the required quality standards and data wrangling to get it in a more analytic-ready format. Recognizing the definition of analysis-ready varies depending on the analysis, the harmonize_wq package is intended to be a flexible water quality specific framework to help: - -* Identify differences in data units (including speciation and basis) -* Identify differences in sampling or analytic methods -* Resolve data errors using transparent assumptions -* Reduce data to the columns that are most commonly needed -* Transform data from long to wide format - -Domain experts must decide what data meets their quality standards for data comparability and any thresholds for acceptance or rejection. - -
- -
+Standardize, clean, and wrangle Water Quality Portal data into more +analytic-ready formats using the harmonize_wq package. US EPA's Water +Quality Portal (WQP) aggregates water quality, biological, and physical +data provided by many organizations and has become an essential resource +with tools to query and retrieval data using python or R. Given the +variety of data and variety of data originators, using the data in +analysis often requires data cleaning to ensure it meets the required +quality standards and data wrangling to get it in a more analytic-ready +format. Recognizing the definition of analysis-ready varies depending on +the analysis, the harmonize_wq package is intended to be a flexible +water quality specific framework to help: + +- Identify differences in data units (including speciation and basis) +- Identify differences in sampling or analytic methods +- Resolve data errors using transparent assumptions +- Reduce data to the columns that are most commonly needed +- Transform data from long to wide format + +Domain experts must decide what data meets their quality standards for +data comparability and any thresholds for acceptance or rejection. ## Installation & Setup -#### Install the harmonize-wq package (Command Line) +#### Option 1: Install the harmonize-wq Package Using the Command Line To install and set up the harmonize-wq package using the command line: -1. If needed, re-install [miniforge](https://github.com/conda-forge/miniforge). Once miniforge is installed. Go to your start menu and open the Miniforge Prompt. -2. At the Miniforge Prompt: - - conda create --name wq_harmonize - - activate wq_harmonize - - conda install geopandas pip dataretrieval pint - - may need to update conda - - conda update -n base -c conda-forge conda - - pip install harmonize-wq - - pip install git+https://github.com/USEPA/harmonize-wq.git (dev version) +1. If needed, re-install + [miniforge](https://github.com/conda-forge/miniforge). Once + miniforge is installed. Go to your start menu and open the Miniforge + Prompt. +2. At the Miniforge Prompt, run: + - conda create --name wq_harmonize + - activate wq_harmonize + - conda install geopandas pip dataretrieval pint + - may need to update conda + - conda update -n base -c conda-forge conda + - pip install harmonize-wq + - pip install git+ (dev + version) -
+#### Option 2: Install the harmonize-wq Package Using R -#### Install the harmonize-wq package (R) +**Alternatively**, you may be able to set up your environment and import +the required Python packages using R. -**Alternatively**, you may be able to set up your environment and import the required Python packages using the block of R code below: +First, run the chunk below to install the reticulate package to use Python in R. -```{r, results = 'hide', eval=FALSE} -# If needed, install the reticulate package to use Python in R +```{r, results = 'hide'} install.packages("reticulate") library(reticulate) +``` -# The reticulate package will automatically look for an installation of Conda -# However, you may specify the location if needed using options(reticulate.conda_binary = 'dir') -options(reticulate.conda_binary = '~/AppData/Local/miniforge3/Scripts/conda.exe') +Conda is required to use EPA's harmonize-wq package. -# Create a new Python environment called "wq-reticulate" -# Note that the environment name may need to include the full path (e.g. "~/AppData/Local/miniforge3/envs/wq_harmonize") -conda_create("wq-reticulate") +There are multiple installers available for Conda +(see: ). -# Install the following packages to the newly created environment -conda_install("wq-reticulate", "geopandas") -conda_install("wq-reticulate", "pint") -conda_install("wq-reticulate", "dataretrieval") +One example installer is +[miniforge](https://github.com/conda-forge/miniforge). We use miniforge3 in this +example. -# Install the harmonize-wq package -# This only works with py_install() (pip), which defaults to virtualenvs -# Note that the environment name may need to include the full path (e.g. "~/AppData/Local/miniforge3/envs/wq_harmonize") -py_install("harmonize-wq", pip = TRUE, envname = "wq-reticulate") +Once miniforge3 (or another installer of your choice) is installed, the +reticulate package will automatically look for the installation of Conda (conda.exe) +on your computer. -# To install the dev version of harmonize-wq from GitHub -# Note that the environment name may need to include the full path (e.g. "~/AppData/Local/miniforge3/envs/wq_harmonize") -py_install("git+https://github.com/USEPA/harmonize-wq.git@new_release_0-3-8", pip = TRUE, envname = "wq-reticulate") +```{r, results = 'hide'} +# options(reticulate.conda_binary = 'dir') +``` -# Specify the Python environment to be used -use_condaenv("wq_harmonize") +However, you may still need to specify the location. If needed, update the code chuck below to specify the location of conda.exe on your computer. -# Test that your Python environment is correctly set up -# Both imports should return "Module(package_name)" -import("harmonize_wq") -import("dataretrieval") +```{r, results = 'hide'} +# update the 'dir' in this chuck to specify the location of conda.exe on your computer +# Note that the environment name may need to include the full path (e.g. "C:/Users/USERNAME/AppData/Local/miniforge3/Scripts/conda.exe") +options(reticulate.conda_binary = "C:/Users/CMULLI01/AppData/Local/miniforge3/Scripts/conda.exe") ``` -
+Next, update the code chunk below to create a new Python environment in the envs +folder on your computer called "wq_harmonize". + +```{r, results = 'hide'} +# Note that the environment name may need to include the full path (e.g. "C:/Users/USERNAME/AppData/Local/miniforge3/envs/wq_harmonize") +reticulate::conda_create("C:/Users/CMULLI01/AppData/Local/miniforge3/envs/wq_harmonize") +``` -#### Import required libraries +Install the following python and R packages to the newly created +Python environment called "wq_harmonize". -The full list of dependencies that should be installed to use the harmonize-wq package can be found in [`requirements.txt`](https://github.com/USEPA/harmonize-wq/blob/new_release_0-3-8/requirements.txt). **Note that `reticulate::repl_python()` must be called to execute these commands using the reticulate package in R.** +```{r, results = 'hide'} +reticulate::conda_install("wq_harmonize", "geopandas") # Python package +reticulate::conda_install("wq_harmonize", "pint") # Python package +reticulate::conda_install("wq_harmonize", "dataretrieval") # R package +``` + +Install EPA's harmonize-wq package. + +```{r, results = 'hide'} +# Install the most recent release of the harmonize-wq package +# This only works with py_install() (pip = TRUE), which defaults to use virtualenvs +reticulate::py_install("harmonize-wq", pip = TRUE, envname = "wq_harmonize") + +# Uncomment below to install the development version of harmonize-wq from GitHub instead (optional) +# py_install("git+https://github.com/USEPA/harmonize-wq.git@new_release_0-3-8", pip = TRUE, envname = "wq_harmonize") +``` + +Specify the Python environment to be used, "wq_harmonize", and test that your Python +environment is set up correctly. ```{r} -# Use reticulate to execute python commands -reticulate::repl_python() +# Specify environment to be used +reticulate::use_condaenv("wq_harmonize") + +# Test set up is correct +# Both imports should return "Module(package_name)" +reticulate::import("harmonize_wq") +reticulate::import("dataretrieval") ``` -```{python} +#### Import additional required libraries + +The full list of dependencies that should be installed to use the +harmonize-wq package can be found in +[`requirements.txt`](https://github.com/USEPA/harmonize-wq/blob/new_release_0-3-8/requirements.txt). + +```{python, results = 'hide'} # Use these reticulate imports to test the modules are installed import harmonize_wq import dataretrieval @@ -114,6 +165,8 @@ import os import pandas import geopandas import dataretrieval.wqp as wqp +import pint +import mapclassify from harmonize_wq import harmonize from harmonize_wq import convert from harmonize_wq import wrangle @@ -122,24 +175,30 @@ from harmonize_wq import location from harmonize_wq import visualize ``` -
+## harmonize-wq Usage: FL Bays Example -
+The following example illustrates a typical harmonization process using +the harmonize-wq package on WQP data retrieved from Perdido and +Pensacola Bays, FL. -## Usage +**Note that `reticulate::repl_python()` must be called first to execute +these commands using the reticulate package in R.** -The following example illustrates a typical harmonization process using the harmonize-wq package on WQP data retrieved from Perdido and Pensacola Bays, FL. +```{r, results = 'hide'} +# Use reticulate to execute python commands +reticulate::repl_python() +``` -First, determine an area of interest (AOI), build a query, and retrieve water temperature and Secchi disk depth data from WQP for the AOI using the dataretrieval package: +First, determine an area of interest (AOI), build a query, and retrieve +water temperature and Secchi disk depth data from the Water Quality Portal (WQP) +for the AOI using the dataretrieval package: -```{python, message=FALSE, warning=FALSE, error=FALSE} +```{python, error = F} # File for area of interest (Pensacola and Perdido Bays, FL) aoi_url = r'https://raw.githubusercontent.com/USEPA/harmonize-wq/main/harmonize_wq/tests/data/PPBays_NCCA.geojson' # Build query and get WQP data with dataretrieval -query = {'characteristicName': ['Temperature, water', - 'Depth, Secchi disk depth', - ]} +query = {'characteristicName': ['Temperature, water', 'Depth, Secchi disk depth',]} # Use harmonize-wq to wrangle query['bBox'] = wrangle.get_bounding_box(aoi_url) @@ -152,10 +211,14 @@ res_narrow, md_narrow = wqp.get_results(**query) res_narrow ``` -Next, harmonize and clean all results: +Next, harmonize and clean all results using the harmonize.harmonize_all, +clean.datetime, and clean.harmonize_depth functions. -```{python, message=FALSE, warning=FALSE, error=FALSE} -df_harmonized = harmonize.harmonize_all(res_narrow, errors='raise') +Enter a ? followed by the function name, for example ?harmonize.harmonize_all, +into the console for more details. + +```{python, error = F} +df_harmonized = harmonize.harmonize_all(res_narrow, errors = 'raise') df_harmonized # Clean up the datetime and sample depth columns @@ -164,9 +227,14 @@ df_cleaned = clean.harmonize_depth(df_cleaned) df_cleaned ``` -There are many columns in the data frame that are characteristic specific, that is they have different values for the same sample depending on the characteristic. To ensure one result for each sample after the transformation of the data, these columns must either be split, generating a new column for each characteristic with values, or moved out from the table if not being used. +There are many columns in the data frame that are characteristic +specific, that is they have different values for the same sample +depending on the characteristic. To ensure one result for each sample +after the transformation of the data, these columns must either be +split, generating a new column for each characteristic with values, or +removed from the table if not needed. -```{python, message=FALSE, warning=FALSE, error=FALSE} +```{python, error = F} # Split the QA_flag column into multiple characteristic specific QA columns df_full = wrangle.split_col(df_cleaned) @@ -183,15 +251,11 @@ df_wide.head() Finally, the cleaned and wrangled data may be visualized as a map: -```{python, message=FALSE, warning=FALSE, error=FALSE} +```{python, error = F} # Get harmonized stations clipped to the AOI stations_gdf, stations, site_md = location.get_harmonized_stations(query, aoi=aoi_url) # Map average temperature results at each station gdf_temperature = visualize.map_measure(df_wide, stations_gdf, 'Temperature') -gdf_temperature.plot(column='mean', cmap='OrRd', legend=True) +gdf_temperature.plot(column = 'mean', cmap = 'OrRd', legend = True) ``` - -
- -
\ No newline at end of file From d27520fed8e282ea9c234e8b6c21e5a5b42b23a1 Mon Sep 17 00:00:00 2001 From: cristinamullin <46969696+cristinamullin@users.noreply.github.com> Date: Thu, 2 May 2024 13:14:48 -0400 Subject: [PATCH 03/38] remove full path --- demos/Harmonize_Pensacola.Rmd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/demos/Harmonize_Pensacola.Rmd b/demos/Harmonize_Pensacola.Rmd index cf87051..b33d16e 100644 --- a/demos/Harmonize_Pensacola.Rmd +++ b/demos/Harmonize_Pensacola.Rmd @@ -107,7 +107,7 @@ However, you may still need to specify the location. If needed, update the code ```{r, results = 'hide'} # update the 'dir' in this chuck to specify the location of conda.exe on your computer # Note that the environment name may need to include the full path (e.g. "C:/Users/USERNAME/AppData/Local/miniforge3/Scripts/conda.exe") -options(reticulate.conda_binary = "C:/Users/CMULLI01/AppData/Local/miniforge3/Scripts/conda.exe") +options(reticulate.conda_binary = "~/AppData/Local/miniforge3/Scripts/conda.exe") ``` Next, update the code chunk below to create a new Python environment in the envs @@ -115,7 +115,7 @@ folder on your computer called "wq_harmonize". ```{r, results = 'hide'} # Note that the environment name may need to include the full path (e.g. "C:/Users/USERNAME/AppData/Local/miniforge3/envs/wq_harmonize") -reticulate::conda_create("C:/Users/CMULLI01/AppData/Local/miniforge3/envs/wq_harmonize") +reticulate::conda_create("~/AppData/Local/miniforge3/envs/wq_harmonize") ``` Install the following python and R packages to the newly created From 7446dfc7430f46ee3d6dee3664f481d6f3b3c6c4 Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Tue, 2 Jul 2024 09:02:49 -0500 Subject: [PATCH 04/38] Update requirements.txt Be more specific about lowest version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 6755148..32c2267 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ pandas<2.0 geopandas>=0.10.2, <0.13 # Has additional dependencies (pandas, numpy, shapely, fiona, etc.), tested on 0.10.2 pint>=0.18 # May work on earlier versions but this is what it was tested on -dataretrieval>=1.*, <1.0.5 # 1.0.5 will break datetime handling, <1.0 doesn't have required functions +dataretrieval>=1.0, <1.0.5 # 1.0.5 will break datetime handling, <1.0 doesn't have required functions descartes>=1.1.0 # May be required for mapping in some jupyter notebooks mapclassify>=2.4.0 # May be required for mapping in some jupyter notebooks requests From bfbe94192c197051c390fec5b7fe6c90b81f4a92 Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Tue, 2 Jul 2024 09:10:34 -0500 Subject: [PATCH 05/38] Update requirements.txt pin older version (32 vs 64 changes for dtype) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 32c2267..6bfc881 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +numpy<2.0 pandas<2.0 geopandas>=0.10.2, <0.13 # Has additional dependencies (pandas, numpy, shapely, fiona, etc.), tested on 0.10.2 pint>=0.18 # May work on earlier versions but this is what it was tested on @@ -5,4 +6,3 @@ dataretrieval>=1.0, <1.0.5 # 1.0.5 will break datetime handling, <1.0 doesn't h descartes>=1.1.0 # May be required for mapping in some jupyter notebooks mapclassify>=2.4.0 # May be required for mapping in some jupyter notebooks requests -numpy From 76c3a847b3173e512f688028e7aa523ff43cd327 Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Tue, 2 Jul 2024 09:31:21 -0500 Subject: [PATCH 06/38] Update domains.py Interesting new domain was added... --- harmonize_wq/domains.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/harmonize_wq/domains.py b/harmonize_wq/domains.py index 85a258b..eb00304 100644 --- a/harmonize_wq/domains.py +++ b/harmonize_wq/domains.py @@ -98,7 +98,7 @@ def get_domain_dict(table, cols=None): 'Extractable, organic-bnd', 'Extractable, other', 'Extractable, oxide-bound', 'Extractable, residual', 'Field', 'Filter/sieve residue', 'Filterable', 'Filtered field and/or lab', 'Filtered, field', 'Filtered, lab', - 'Fixed', 'Free Available', 'Inorganic', 'Leachable', + 'Fixed', 'Free Available', 'Inorganic', 'Leachable', 'Net (Hot)', 'Non-Filterable (Particle)', 'Non-settleable', 'Non-volatile', 'None', 'Organic', 'Pot. Dissolved', 'Semivolatile', 'Settleable', 'Sieved', 'Strong Acid Diss', 'Supernate', 'Suspended', 'Total', @@ -961,4 +961,4 @@ def characteristic_cols(category=None): {"Source": "USDOI/USGS", "Method": "I-6600-88"}, {"Source": "ASTM", "Method": "D515"}, ], - } \ No newline at end of file + } From ceef5c32f757a98d92a99f442c6cb07a54748643 Mon Sep 17 00:00:00 2001 From: Bousquin Date: Tue, 2 Jul 2024 15:23:40 -0500 Subject: [PATCH 07/38] Minor edits to text --- demos/Harmonize_Pensacola.Rmd | 75 ++++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 33 deletions(-) diff --git a/demos/Harmonize_Pensacola.Rmd b/demos/Harmonize_Pensacola.Rmd index b33d16e..bf1b42f 100644 --- a/demos/Harmonize_Pensacola.Rmd +++ b/demos/Harmonize_Pensacola.Rmd @@ -55,6 +55,16 @@ data comparability and any thresholds for acceptance or rejection. ## Installation & Setup +An installation of Conda is required to run EPA's harmonize-wq package +using R/reticulate. + +There are multiple installers available for Conda (see: +). + +One example installer is +[miniforge](https://github.com/conda-forge/miniforge). We use miniforge3 +in these examples. + #### Option 1: Install the harmonize-wq Package Using the Command Line To install and set up the harmonize-wq package using the command line: @@ -64,45 +74,44 @@ To install and set up the harmonize-wq package using the command line: miniforge is installed. Go to your start menu and open the Miniforge Prompt. 2. At the Miniforge Prompt, run: + - to update conda: + + > conda update -n base -c conda-forge conda + - conda create --name wq_harmonize + - activate wq_harmonize + - conda install geopandas pip dataretrieval pint - - may need to update conda - - conda update -n base -c conda-forge conda - - pip install harmonize-wq - - pip install git+ (dev - version) + + - pip install harmonize-wq (dev-version shown): + + > pip install git+ + #### Option 2: Install the harmonize-wq Package Using R **Alternatively**, you may be able to set up your environment and import the required Python packages using R. -First, run the chunk below to install the reticulate package to use Python in R. +First, run the chunk below to install the reticulate package to use +Python in R. ```{r, results = 'hide'} install.packages("reticulate") library(reticulate) ``` -Conda is required to use EPA's harmonize-wq package. - -There are multiple installers available for Conda -(see: ). - -One example installer is -[miniforge](https://github.com/conda-forge/miniforge). We use miniforge3 in this -example. - -Once miniforge3 (or another installer of your choice) is installed, the -reticulate package will automatically look for the installation of Conda (conda.exe) -on your computer. +Once miniforge3 (or another Conda installer of your choice) is +installed, the reticulate package will automatically look for the +installation of Conda (conda.exe) on your computer. ```{r, results = 'hide'} # options(reticulate.conda_binary = 'dir') ``` -However, you may still need to specify the location. If needed, update the code chuck below to specify the location of conda.exe on your computer. +However, you may still need to specify the conda.exe location. +To do so, update the code below to specify your conda.exe location ```{r, results = 'hide'} # update the 'dir' in this chuck to specify the location of conda.exe on your computer @@ -110,21 +119,21 @@ However, you may still need to specify the location. If needed, update the code options(reticulate.conda_binary = "~/AppData/Local/miniforge3/Scripts/conda.exe") ``` -Next, update the code chunk below to create a new Python environment in the envs -folder on your computer called "wq_harmonize". +Next, update the code chunk below to create a new Python environment in +the envs folder called "wq_harmonize". ```{r, results = 'hide'} # Note that the environment name may need to include the full path (e.g. "C:/Users/USERNAME/AppData/Local/miniforge3/envs/wq_harmonize") reticulate::conda_create("~/AppData/Local/miniforge3/envs/wq_harmonize") ``` -Install the following python and R packages to the newly created -Python environment called "wq_harmonize". +Install the following python packages to the newly created Python +environment called "wq_harmonize". ```{r, results = 'hide'} -reticulate::conda_install("wq_harmonize", "geopandas") # Python package -reticulate::conda_install("wq_harmonize", "pint") # Python package -reticulate::conda_install("wq_harmonize", "dataretrieval") # R package +reticulate::conda_install("wq_harmonize", "geopandas") +reticulate::conda_install("wq_harmonize", "pint") +reticulate::conda_install("wq_harmonize", "dataretrieval") ``` Install EPA's harmonize-wq package. @@ -138,8 +147,8 @@ reticulate::py_install("harmonize-wq", pip = TRUE, envname = "wq_harmonize") # py_install("git+https://github.com/USEPA/harmonize-wq.git@new_release_0-3-8", pip = TRUE, envname = "wq_harmonize") ``` -Specify the Python environment to be used, "wq_harmonize", and test that your Python -environment is set up correctly. +Specify the Python environment to be used, "wq_harmonize", and test that +your Python environment is set up correctly. ```{r} # Specify environment to be used @@ -190,8 +199,8 @@ reticulate::repl_python() ``` First, determine an area of interest (AOI), build a query, and retrieve -water temperature and Secchi disk depth data from the Water Quality Portal (WQP) -for the AOI using the dataretrieval package: +water temperature and Secchi disk depth data from the Water Quality +Portal (WQP) for the AOI using the dataretrieval package: ```{python, error = F} # File for area of interest (Pensacola and Perdido Bays, FL) @@ -211,11 +220,11 @@ res_narrow, md_narrow = wqp.get_results(**query) res_narrow ``` -Next, harmonize and clean all results using the harmonize.harmonize_all, +Next, harmonize and clean all results using the harmonize.harmonize_all, clean.datetime, and clean.harmonize_depth functions. -Enter a ? followed by the function name, for example ?harmonize.harmonize_all, -into the console for more details. +Enter a ? followed by the function name, for example +?harmonize.harmonize_all, into the console for more details. ```{python, error = F} df_harmonized = harmonize.harmonize_all(res_narrow, errors = 'raise') From 655c4bab0c4f5154460b18d49dee2288d2225b7a Mon Sep 17 00:00:00 2001 From: Bousquin Date: Tue, 2 Jul 2024 15:24:55 -0500 Subject: [PATCH 08/38] Fix plot --- demos/Harmonize_Pensacola.Rmd | 3 +++ 1 file changed, 3 insertions(+) diff --git a/demos/Harmonize_Pensacola.Rmd b/demos/Harmonize_Pensacola.Rmd index bf1b42f..3688f8c 100644 --- a/demos/Harmonize_Pensacola.Rmd +++ b/demos/Harmonize_Pensacola.Rmd @@ -176,6 +176,7 @@ import geopandas import dataretrieval.wqp as wqp import pint import mapclassify +import matplotlib.pyplot as plt from harmonize_wq import harmonize from harmonize_wq import convert from harmonize_wq import wrangle @@ -266,5 +267,7 @@ stations_gdf, stations, site_md = location.get_harmonized_stations(query, aoi=ao # Map average temperature results at each station gdf_temperature = visualize.map_measure(df_wide, stations_gdf, 'Temperature') +plt.figure() gdf_temperature.plot(column = 'mean', cmap = 'OrRd', legend = True) +plt.show() ``` From f345018aaa758cbb4f5921c810200a5da8ab2bb6 Mon Sep 17 00:00:00 2001 From: Bousquin Date: Tue, 2 Jul 2024 15:25:40 -0500 Subject: [PATCH 09/38] Install from deve version not pinned release (one less thing to update) --- demos/Harmonize_Pensacola.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demos/Harmonize_Pensacola.Rmd b/demos/Harmonize_Pensacola.Rmd index 3688f8c..3872371 100644 --- a/demos/Harmonize_Pensacola.Rmd +++ b/demos/Harmonize_Pensacola.Rmd @@ -144,7 +144,7 @@ Install EPA's harmonize-wq package. reticulate::py_install("harmonize-wq", pip = TRUE, envname = "wq_harmonize") # Uncomment below to install the development version of harmonize-wq from GitHub instead (optional) -# py_install("git+https://github.com/USEPA/harmonize-wq.git@new_release_0-3-8", pip = TRUE, envname = "wq_harmonize") +# py_install("git+https://github.com/USEPA/harmonize-wq.git", pip = TRUE, envname = "wq_harmonize") ``` Specify the Python environment to be used, "wq_harmonize", and test that From c3f494408ec662cefaef9d63b73087b787e5900c Mon Sep 17 00:00:00 2001 From: Bousquin Date: Tue, 2 Jul 2024 15:26:20 -0500 Subject: [PATCH 10/38] Resolve first error when knit locally --- demos/Harmonize_Pensacola.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demos/Harmonize_Pensacola.Rmd b/demos/Harmonize_Pensacola.Rmd index 3872371..397bdf3 100644 --- a/demos/Harmonize_Pensacola.Rmd +++ b/demos/Harmonize_Pensacola.Rmd @@ -98,7 +98,7 @@ First, run the chunk below to install the reticulate package to use Python in R. ```{r, results = 'hide'} -install.packages("reticulate") +install.packages("reticulate", repos = "http://cran.us.r-project.org") library(reticulate) ``` From 6a6e832b70de99dae6bada24e80cfa4252948fca Mon Sep 17 00:00:00 2001 From: Bousquin Date: Tue, 2 Jul 2024 16:22:59 -0500 Subject: [PATCH 11/38] Comment out some of the option 2 code so it doesn't run when knit-ing. Open to better options of how to format it as code but not always run it.. --- demos/Harmonize_Pensacola.Rmd | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/demos/Harmonize_Pensacola.Rmd b/demos/Harmonize_Pensacola.Rmd index 397bdf3..af08443 100644 --- a/demos/Harmonize_Pensacola.Rmd +++ b/demos/Harmonize_Pensacola.Rmd @@ -88,7 +88,6 @@ To install and set up the harmonize-wq package using the command line: > pip install git+ - #### Option 2: Install the harmonize-wq Package Using R **Alternatively**, you may be able to set up your environment and import @@ -110,41 +109,43 @@ installation of Conda (conda.exe) on your computer. # options(reticulate.conda_binary = 'dir') ``` -However, you may still need to specify the conda.exe location. -To do so, update the code below to specify your conda.exe location +However, you may still need to specify the conda.exe location. To do so, +update the last line of code below to specify your conda.exe location ```{r, results = 'hide'} # update the 'dir' in this chuck to specify the location of conda.exe on your computer -# Note that the environment name may need to include the full path (e.g. "C:/Users/USERNAME/AppData/Local/miniforge3/Scripts/conda.exe") -options(reticulate.conda_binary = "~/AppData/Local/miniforge3/Scripts/conda.exe") +# Note: that the environment name may need to include the full path (e.g. "C:/Users/USERNAME/AppData/Local/miniforge3/Scripts/conda.exe") +# options(reticulate.conda_binary = "~/AppData/Local/miniforge3/Scripts/conda.exe") ``` Next, update the code chunk below to create a new Python environment in -the envs folder called "wq_harmonize". +the envs folder called "wq_harmonize". Note that the environment name +may need to include the full path (e.g. +"C:/Users/USERNAME/AppData/Local/miniforge3/envs/wq_harmonize") ```{r, results = 'hide'} -# Note that the environment name may need to include the full path (e.g. "C:/Users/USERNAME/AppData/Local/miniforge3/envs/wq_harmonize") -reticulate::conda_create("~/AppData/Local/miniforge3/envs/wq_harmonize") +# reticulate::conda_create("~/AppData/Local/miniforge3/envs/wq_harmonize") ``` Install the following python packages to the newly created Python environment called "wq_harmonize". ```{r, results = 'hide'} -reticulate::conda_install("wq_harmonize", "geopandas") -reticulate::conda_install("wq_harmonize", "pint") -reticulate::conda_install("wq_harmonize", "dataretrieval") +#reticulate::conda_install("wq_harmonize", "geopandas") +#reticulate::conda_install("wq_harmonize", "pint") +#reticulate::conda_install("wq_harmonize", "dataretrieval") ``` -Install EPA's harmonize-wq package. +Uncomment to install EPA's harmonize-wq package most recent release or +development version. ```{r, results = 'hide'} # Install the most recent release of the harmonize-wq package # This only works with py_install() (pip = TRUE), which defaults to use virtualenvs -reticulate::py_install("harmonize-wq", pip = TRUE, envname = "wq_harmonize") +# reticulate::py_install("harmonize-wq", pip = TRUE, envname = "wq_harmonize") -# Uncomment below to install the development version of harmonize-wq from GitHub instead (optional) -# py_install("git+https://github.com/USEPA/harmonize-wq.git", pip = TRUE, envname = "wq_harmonize") +# Install the development version of harmonize-wq from GitHub (optional) +#py_install("git+https://github.com/USEPA/harmonize-wq.git", pip = TRUE, envname = "wq_harmonize") ``` Specify the Python environment to be used, "wq_harmonize", and test that From c72f0691a9e502b8b8f1e26374f202bb4cff8f48 Mon Sep 17 00:00:00 2001 From: Bousquin Date: Tue, 2 Jul 2024 16:25:00 -0500 Subject: [PATCH 12/38] First attempt to render the rmd in R (i.e. run the python code in R vignette) --- .github/workflows/test_r.yaml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_r.yaml b/.github/workflows/test_r.yaml index 708c92a..6dfcc4a 100644 --- a/.github/workflows/test_r.yaml +++ b/.github/workflows/test_r.yaml @@ -38,7 +38,9 @@ jobs: uses: r-lib/actions/setup-r@v2 with: r-version: 'release' - + + - uses: r-lib/actions/setup-renv@v2 + - uses: r-lib/actions/setup-pandoc@v2 - name: Update pip and install testing pkgs @@ -60,9 +62,14 @@ jobs: - name: Run pip env using R reticulate run: | - install.packages("reticulate") + install.packages(c("knitr", "rmarkdown", "reticulate"))' reticulate::import("harmonize_wq") shell: Rscript {0} + - name: Render-rmd + run: | + echo "Rendering the page..." + Rscript -e 'rmarkdown::render(input = "demos/harmonize_Pensacola.Rmd")' + - name: Run pytest run: pytest -v harmonize_wq From 15cd379014ca1d3f7ca0845f7e49d4bc8f5a85ad Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Tue, 2 Jul 2024 16:35:47 -0500 Subject: [PATCH 13/38] Update test_r.yaml Try without renv, fixed typo. --- .github/workflows/test_r.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_r.yaml b/.github/workflows/test_r.yaml index 6dfcc4a..e5f9c4d 100644 --- a/.github/workflows/test_r.yaml +++ b/.github/workflows/test_r.yaml @@ -39,7 +39,7 @@ jobs: with: r-version: 'release' - - uses: r-lib/actions/setup-renv@v2 + #- uses: r-lib/actions/setup-renv@v2 - uses: r-lib/actions/setup-pandoc@v2 @@ -62,7 +62,7 @@ jobs: - name: Run pip env using R reticulate run: | - install.packages(c("knitr", "rmarkdown", "reticulate"))' + install.packages(c("knitr", "rmarkdown", "reticulate")) reticulate::import("harmonize_wq") shell: Rscript {0} From de53490c6d8ac84abbe3575b9f6240a9249aee26 Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Tue, 2 Jul 2024 17:01:33 -0500 Subject: [PATCH 14/38] Update test_r.yaml Switch from pip to conda (required of reticulate) --- .github/workflows/test_r.yaml | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/.github/workflows/test_r.yaml b/.github/workflows/test_r.yaml index e5f9c4d..4eb5990 100644 --- a/.github/workflows/test_r.yaml +++ b/.github/workflows/test_r.yaml @@ -29,10 +29,13 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + - name: Install/set-up conda + uses: conda-incubator/setup-miniconda@v3 with: + auto-update-conda: true python-version: ${{ matrix.python-version }} + auto-activate-base: true + activate-environment: "" - name: Set up R uses: r-lib/actions/setup-r@v2 @@ -42,25 +45,18 @@ jobs: #- uses: r-lib/actions/setup-renv@v2 - uses: r-lib/actions/setup-pandoc@v2 - - - name: Update pip and install testing pkgs - run: | - python -VV - python -m pip install --upgrade pip - pip install pytest - - # fiona doesn't have wheels for windows - - if: matrix.os == 'windows-latest' - run: | - pip install https://github.com/cgohlke/geospatial-wheels/releases/download/v2023.7.16/GDAL-3.7.1-cp39-cp39-win_amd64.whl - pip install https://github.com/cgohlke/geospatial-wheels/releases/download/v2023.7.16/Fiona-1.9.4.post1-cp39-cp39-win_amd64.whl - - - name: Install package and dependencies + + - name: Install testing pkgs run: | - python -m pip install --no-deps . - pip install -r requirements.txt + conda install pytest + conda install geopandas>=0.10.2, <0.13 + conda install numpy<2.0 + conda install pandas<2.0 + conda install pint>=0.18 + conda install dataretrieval>=1.0, <1.0.5 + conda install requests - - name: Run pip env using R reticulate + - name: Run env using R reticulate run: | install.packages(c("knitr", "rmarkdown", "reticulate")) reticulate::import("harmonize_wq") From 4c6b60abf5704ffe68a8f4487c1d9e286dd8e900 Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Tue, 2 Jul 2024 17:27:02 -0500 Subject: [PATCH 15/38] Update test_r.yaml Try setting up env using reticulate instead of conda (since conda-incubator is currently restricted) --- .github/workflows/test_r.yaml | 38 +++++++++++++++++------------------ 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/.github/workflows/test_r.yaml b/.github/workflows/test_r.yaml index 4eb5990..3418617 100644 --- a/.github/workflows/test_r.yaml +++ b/.github/workflows/test_r.yaml @@ -28,14 +28,11 @@ jobs: python-version: "3.9" steps: - uses: actions/checkout@v3 - - - name: Install/set-up conda - uses: conda-incubator/setup-miniconda@v3 + + - name: setup python + uses: actions/setup-python@v4 with: - auto-update-conda: true - python-version: ${{ matrix.python-version }} - auto-activate-base: true - activate-environment: "" + python-version: ${{ matrix.config.python }} - name: Set up R uses: r-lib/actions/setup-r@v2 @@ -45,22 +42,25 @@ jobs: #- uses: r-lib/actions/setup-renv@v2 - uses: r-lib/actions/setup-pandoc@v2 - - - name: Install testing pkgs - run: | - conda install pytest - conda install geopandas>=0.10.2, <0.13 - conda install numpy<2.0 - conda install pandas<2.0 - conda install pint>=0.18 - conda install dataretrieval>=1.0, <1.0.5 - conda install requests - - name: Run env using R reticulate + - name: R depends + shell: Rscript {0} run: | install.packages(c("knitr", "rmarkdown", "reticulate")) - reticulate::import("harmonize_wq") + + - name: setup r-reticulate venv shell: Rscript {0} + run: | + library(reticulate) + + path_to_venv <- virtualenv_create( + envname = "r-reticulate", + python = Sys.which("python"), # placed on PATH by the setup-python action + packages = c( + "pytest", "numpy<2.0", "pandas<2.0", "geopandas>=0.10.2, <0.13", "pint>=0.18", + "dataretrieval>=1.0, <1.0.5", "requests" + ) + ) - name: Render-rmd run: | From 2c23f0fdb89587dc3ee4b6781bba3dc8d6fae2da Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Tue, 2 Jul 2024 17:35:21 -0500 Subject: [PATCH 16/38] Update test_r.yaml Combine install/run so env is not lost --- .github/workflows/test_r.yaml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test_r.yaml b/.github/workflows/test_r.yaml index 3418617..745e68d 100644 --- a/.github/workflows/test_r.yaml +++ b/.github/workflows/test_r.yaml @@ -48,7 +48,7 @@ jobs: run: | install.packages(c("knitr", "rmarkdown", "reticulate")) - - name: setup r-reticulate venv + - name: setup r-reticulate venv & render rmd shell: Rscript {0} run: | library(reticulate) @@ -61,11 +61,8 @@ jobs: "dataretrieval>=1.0, <1.0.5", "requests" ) ) - - - name: Render-rmd - run: | - echo "Rendering the page..." - Rscript -e 'rmarkdown::render(input = "demos/harmonize_Pensacola.Rmd")' + + rmarkdown::render(input = "demos/harmonize_Pensacola.Rmd")' - name: Run pytest run: pytest -v harmonize_wq From 821e90b32d8d9b40ee57e6dbe45860d11fa9cef5 Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Tue, 2 Jul 2024 17:45:48 -0500 Subject: [PATCH 17/38] Update test_r.yaml update env name --- .github/workflows/test_r.yaml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test_r.yaml b/.github/workflows/test_r.yaml index 745e68d..0e238ae 100644 --- a/.github/workflows/test_r.yaml +++ b/.github/workflows/test_r.yaml @@ -48,21 +48,24 @@ jobs: run: | install.packages(c("knitr", "rmarkdown", "reticulate")) - - name: setup r-reticulate venv & render rmd + - name: setup r-reticulate venv shell: Rscript {0} run: | library(reticulate) path_to_venv <- virtualenv_create( - envname = "r-reticulate", + envname = "wq-harmonize", python = Sys.which("python"), # placed on PATH by the setup-python action packages = c( "pytest", "numpy<2.0", "pandas<2.0", "geopandas>=0.10.2, <0.13", "pint>=0.18", "dataretrieval>=1.0, <1.0.5", "requests" ) ) - - rmarkdown::render(input = "demos/harmonize_Pensacola.Rmd")' + + - name: Render-rmd + run: | + echo "Rendering the page..." + Rscript -e 'rmarkdown::render(input = "demos/harmonize_Pensacola.Rmd")' - name: Run pytest run: pytest -v harmonize_wq From aedb45065c5d2257febfb9df0faa1cc2e0720aef Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Tue, 2 Jul 2024 17:50:47 -0500 Subject: [PATCH 18/38] Update test_r.yaml Combine again without typo --- .github/workflows/test_r.yaml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test_r.yaml b/.github/workflows/test_r.yaml index 0e238ae..a7a319c 100644 --- a/.github/workflows/test_r.yaml +++ b/.github/workflows/test_r.yaml @@ -48,7 +48,7 @@ jobs: run: | install.packages(c("knitr", "rmarkdown", "reticulate")) - - name: setup r-reticulate venv + - name: setup r-reticulate venv & render rmd shell: Rscript {0} run: | library(reticulate) @@ -61,11 +61,8 @@ jobs: "dataretrieval>=1.0, <1.0.5", "requests" ) ) - - - name: Render-rmd - run: | - echo "Rendering the page..." - Rscript -e 'rmarkdown::render(input = "demos/harmonize_Pensacola.Rmd")' + + rmarkdown::render(input = "demos/harmonize_Pensacola.Rmd") - name: Run pytest run: pytest -v harmonize_wq From 5f91eb5427ee7533b44b849339035b389dcd819a Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Tue, 2 Jul 2024 18:05:17 -0500 Subject: [PATCH 19/38] Update test_r.yaml use miniconda_install() (note before I was setting up venv) --- .github/workflows/test_r.yaml | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test_r.yaml b/.github/workflows/test_r.yaml index a7a319c..1ae0175 100644 --- a/.github/workflows/test_r.yaml +++ b/.github/workflows/test_r.yaml @@ -53,14 +53,21 @@ jobs: run: | library(reticulate) - path_to_venv <- virtualenv_create( - envname = "wq-harmonize", - python = Sys.which("python"), # placed on PATH by the setup-python action - packages = c( + packages = c( "pytest", "numpy<2.0", "pandas<2.0", "geopandas>=0.10.2, <0.13", "pint>=0.18", "dataretrieval>=1.0, <1.0.5", "requests" - ) - ) + ) + + reticulate::install_miniconda() + reticulate::conda_create("wq_harmonize") + reticulate::conda_install("wq_harmonize", packages) + + #path_to_venv <- virtualenv_create( + # envname = "wq_harmonize", + # python = Sys.which("python"), # placed on PATH by the setup-python action + # packages + #) + #use_virtualenv("wq_harmonize") rmarkdown::render(input = "demos/harmonize_Pensacola.Rmd") From 9295259bd7e0c75e1c4755b750f3e73ce7c2b7d2 Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Tue, 2 Jul 2024 18:13:46 -0500 Subject: [PATCH 20/38] Update test_r.yaml install dev package from repo... for tests this should use the currently checked out instead, but just trying to get it working. --- .github/workflows/test_r.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test_r.yaml b/.github/workflows/test_r.yaml index 1ae0175..e105c8a 100644 --- a/.github/workflows/test_r.yaml +++ b/.github/workflows/test_r.yaml @@ -68,6 +68,8 @@ jobs: # packages #) #use_virtualenv("wq_harmonize") + + reticulate::py_install("git+https://github.com/USEPA/harmonize-wq.git", pip = TRUE, envname = "wq_harmonize") rmarkdown::render(input = "demos/harmonize_Pensacola.Rmd") From a2d590cccd9af42436941ccb6f7872320dd16903 Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Tue, 2 Jul 2024 18:35:02 -0500 Subject: [PATCH 21/38] Update test_r.yaml Not sure why now it was case sensitive... --- .github/workflows/test_r.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_r.yaml b/.github/workflows/test_r.yaml index e105c8a..e397319 100644 --- a/.github/workflows/test_r.yaml +++ b/.github/workflows/test_r.yaml @@ -71,7 +71,7 @@ jobs: reticulate::py_install("git+https://github.com/USEPA/harmonize-wq.git", pip = TRUE, envname = "wq_harmonize") - rmarkdown::render(input = "demos/harmonize_Pensacola.Rmd") + rmarkdown::render(input = "demos/Harmonize_Pensacola.Rmd") - name: Run pytest run: pytest -v harmonize_wq From 78dad2c888c6c47e3ee2df2e09ec743cce28113c Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Tue, 2 Jul 2024 20:52:35 -0500 Subject: [PATCH 22/38] Update test_r.yaml pytest already check tests in test.yml, no need to run it a second time here. --- .github/workflows/test_r.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/test_r.yaml b/.github/workflows/test_r.yaml index e397319..02ad3f2 100644 --- a/.github/workflows/test_r.yaml +++ b/.github/workflows/test_r.yaml @@ -72,6 +72,3 @@ jobs: reticulate::py_install("git+https://github.com/USEPA/harmonize-wq.git", pip = TRUE, envname = "wq_harmonize") rmarkdown::render(input = "demos/Harmonize_Pensacola.Rmd") - - - name: Run pytest - run: pytest -v harmonize_wq From b104c36aa3d4d4ddddc8bf7270a5f42b4c178c69 Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Wed, 3 Jul 2024 09:46:06 -0500 Subject: [PATCH 23/38] Update test_r.yaml Skip unbutu, run all windows (conda) --- .github/workflows/test_r.yaml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/test_r.yaml b/.github/workflows/test_r.yaml index 02ad3f2..adb0636 100644 --- a/.github/workflows/test_r.yaml +++ b/.github/workflows/test_r.yaml @@ -21,11 +21,8 @@ jobs: strategy: matrix: - os: [ubuntu-latest, macos-latest] + os: [windows-latest, macos-latest] python-version: ['3.8', '3.9', '3.10', '3.11'] - include: - - os: windows-latest - python-version: "3.9" steps: - uses: actions/checkout@v3 From 0f16f2d794f220adeeaafba97c7f230d72a2afa7 Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Wed, 3 Jul 2024 10:46:04 -0500 Subject: [PATCH 24/38] Update test_r.yaml Trying on ubuntu again --- .github/workflows/test_r.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_r.yaml b/.github/workflows/test_r.yaml index adb0636..dc6a719 100644 --- a/.github/workflows/test_r.yaml +++ b/.github/workflows/test_r.yaml @@ -21,7 +21,7 @@ jobs: strategy: matrix: - os: [windows-latest, macos-latest] + os: [windows-latest, macos-latest, ubuntu-latest] python-version: ['3.8', '3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v3 From d7661c0b7021b0f4f30d41a1d28d834bef61387c Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Wed, 3 Jul 2024 11:47:28 -0500 Subject: [PATCH 25/38] Update test_r.yaml Try on older ubuntu version. --- .github/workflows/test_r.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_r.yaml b/.github/workflows/test_r.yaml index dc6a719..0568a01 100644 --- a/.github/workflows/test_r.yaml +++ b/.github/workflows/test_r.yaml @@ -21,7 +21,7 @@ jobs: strategy: matrix: - os: [windows-latest, macos-latest, ubuntu-latest] + os: [windows-latest, macos-latest, ubuntu-20.04.2] python-version: ['3.8', '3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v3 From 33165300bf9114cbb7da894325e8ef1a44037564 Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Wed, 3 Jul 2024 11:54:09 -0500 Subject: [PATCH 26/38] Update test_r.yaml bump action version up (getting warnings) --- .github/workflows/test_r.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_r.yaml b/.github/workflows/test_r.yaml index 0568a01..5a1e60b 100644 --- a/.github/workflows/test_r.yaml +++ b/.github/workflows/test_r.yaml @@ -24,10 +24,10 @@ jobs: os: [windows-latest, macos-latest, ubuntu-20.04.2] python-version: ['3.8', '3.9', '3.10', '3.11'] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: setup python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.config.python }} From 522e99813165bc5091e341d33ba98a0800f37b80 Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Wed, 3 Jul 2024 12:30:12 -0500 Subject: [PATCH 27/38] Update test_r.yaml Drop Ubuntu. Fix where python version is set from the matrix. Conda create with python version from matrix. --- .github/workflows/test_r.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test_r.yaml b/.github/workflows/test_r.yaml index 5a1e60b..3ae1df4 100644 --- a/.github/workflows/test_r.yaml +++ b/.github/workflows/test_r.yaml @@ -21,15 +21,15 @@ jobs: strategy: matrix: - os: [windows-latest, macos-latest, ubuntu-20.04.2] + os: [windows-latest, macos-latest] python-version: ['3.8', '3.9', '3.10', '3.11'] steps: - uses: actions/checkout@v4 - - name: setup python + - name: setup python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: - python-version: ${{ matrix.config.python }} + python-version: ${{ matrix.python-version }} - name: Set up R uses: r-lib/actions/setup-r@v2 @@ -56,7 +56,7 @@ jobs: ) reticulate::install_miniconda() - reticulate::conda_create("wq_harmonize") + reticulate::conda_create("wq_harmonize", python_version=${{ matrix.python-version }}) reticulate::conda_install("wq_harmonize", packages) #path_to_venv <- virtualenv_create( From 7ce2925f83d3dbd25c396d4dd94bb0d06243a185 Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Wed, 3 Jul 2024 12:45:33 -0500 Subject: [PATCH 28/38] Update test_r.yaml Try setting var from matrix using with since it seemed to cut out 3.1* for 3.10. --- .github/workflows/test_r.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test_r.yaml b/.github/workflows/test_r.yaml index 3ae1df4..da89144 100644 --- a/.github/workflows/test_r.yaml +++ b/.github/workflows/test_r.yaml @@ -47,6 +47,8 @@ jobs: - name: setup r-reticulate venv & render rmd shell: Rscript {0} + with: + python-version: ${{ matrix.python-version }} run: | library(reticulate) @@ -56,7 +58,7 @@ jobs: ) reticulate::install_miniconda() - reticulate::conda_create("wq_harmonize", python_version=${{ matrix.python-version }}) + reticulate::conda_create("wq_harmonize", python_version=python-version) reticulate::conda_install("wq_harmonize", packages) #path_to_venv <- virtualenv_create( From da72c56846fd3cecc71d307af0a6ff919e7d6d4c Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Wed, 3 Jul 2024 14:17:08 -0500 Subject: [PATCH 29/38] Update test_r.yaml Try again (hoping it won't truncate python 3.1x this time. --- .github/workflows/test_r.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/test_r.yaml b/.github/workflows/test_r.yaml index da89144..fa50e11 100644 --- a/.github/workflows/test_r.yaml +++ b/.github/workflows/test_r.yaml @@ -47,8 +47,6 @@ jobs: - name: setup r-reticulate venv & render rmd shell: Rscript {0} - with: - python-version: ${{ matrix.python-version }} run: | library(reticulate) @@ -58,7 +56,7 @@ jobs: ) reticulate::install_miniconda() - reticulate::conda_create("wq_harmonize", python_version=python-version) + reticulate::conda_create("wq_harmonize", python_version = ${{ matrix.python-version }}) reticulate::conda_install("wq_harmonize", packages) #path_to_venv <- virtualenv_create( From e16aad984e567e408a5e2eed02656913d367906a Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Wed, 3 Jul 2024 14:30:32 -0500 Subject: [PATCH 30/38] Update test_r.yaml try as sting? --- .github/workflows/test_r.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_r.yaml b/.github/workflows/test_r.yaml index fa50e11..9a324c1 100644 --- a/.github/workflows/test_r.yaml +++ b/.github/workflows/test_r.yaml @@ -56,7 +56,7 @@ jobs: ) reticulate::install_miniconda() - reticulate::conda_create("wq_harmonize", python_version = ${{ matrix.python-version }}) + reticulate::conda_create("wq_harmonize", python_version = "${{ matrix.python-version }}") reticulate::conda_install("wq_harmonize", packages) #path_to_venv <- virtualenv_create( From b125f65631d2395dcf3c15a3b3444afdaafd7389 Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Fri, 5 Jul 2024 09:51:38 -0500 Subject: [PATCH 31/38] Dynamic config pyproject.toml Use dynamic to specify depends from requirements.txt. This should be more maintainable. and may resolve some of the install issues reveries were having. setuptools>=61.0 already required. --- pyproject.toml | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0af85cc..3c603ad 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,20 +21,15 @@ classifiers = [ "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ] -dependencies = [ - "geopandas", - "pint", - "dataretrieval", - "pandas", - "numpy", - "requests", -] - -[project.optional-dependencies] -test = ["pytest"] -doc = ["sphinx"] +dynamic = ["dependencies", "optional-dependencies"] [project.urls] "Homepage" = "https://github.com/USEPA/harmonize-wq" "Documentation" = "https://usepa.github.io/harmonize-wq/" "Bug Tracker" = "https://github.com/USEPA/harmonize-wq/issues" + +[tool.setuptools.dynamic] +dependencies = { file = ["requirements.txt"] } + +[tool.setuptools.dynamic.optional-dependencies] +dev = { file = ["requirements-dev.txt"] } From 4042da1638a2017d14915f1fa26ac2c4d499c8df Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Fri, 5 Jul 2024 11:09:51 -0500 Subject: [PATCH 32/38] Update test_r.yaml Upload artifact at end to examine rendered html --- .github/workflows/test_r.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/test_r.yaml b/.github/workflows/test_r.yaml index 9a324c1..f61861e 100644 --- a/.github/workflows/test_r.yaml +++ b/.github/workflows/test_r.yaml @@ -69,3 +69,9 @@ jobs: reticulate::py_install("git+https://github.com/USEPA/harmonize-wq.git", pip = TRUE, envname = "wq_harmonize") rmarkdown::render(input = "demos/Harmonize_Pensacola.Rmd") + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + # Upload entire demos folder + path: '/demos' From c7862424ac31556641a445566d7b160497dbbf36 Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Fri, 5 Jul 2024 11:22:02 -0500 Subject: [PATCH 33/38] Update artifact test_r.yaml Based on examples on repo --- .github/workflows/test_r.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test_r.yaml b/.github/workflows/test_r.yaml index f61861e..1d7ae38 100644 --- a/.github/workflows/test_r.yaml +++ b/.github/workflows/test_r.yaml @@ -73,5 +73,6 @@ jobs: - name: Upload artifact uses: actions/upload-artifact@v4 with: + name: demos-artifact # Upload entire demos folder - path: '/demos' + path: /demos From c6ae473d8096553178fef79241521515813fc1ac Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Fri, 5 Jul 2024 11:30:51 -0500 Subject: [PATCH 34/38] Update test_r.yaml --- .github/workflows/test_r.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_r.yaml b/.github/workflows/test_r.yaml index 1d7ae38..85bef50 100644 --- a/.github/workflows/test_r.yaml +++ b/.github/workflows/test_r.yaml @@ -75,4 +75,4 @@ jobs: with: name: demos-artifact # Upload entire demos folder - path: /demos + path: './demos' From 0d5c07b91ab83959f63c789b366477484f449ef7 Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Fri, 5 Jul 2024 11:40:35 -0500 Subject: [PATCH 35/38] Update test_r.yaml Only upload on one runner (win 3.11 --- .github/workflows/test_r.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test_r.yaml b/.github/workflows/test_r.yaml index 85bef50..bd783c8 100644 --- a/.github/workflows/test_r.yaml +++ b/.github/workflows/test_r.yaml @@ -71,6 +71,7 @@ jobs: rmarkdown::render(input = "demos/Harmonize_Pensacola.Rmd") - name: Upload artifact + if: ${{ (matrix.os == 'windows-latest') && (matrix.python-version == 3.11) }} uses: actions/upload-artifact@v4 with: name: demos-artifact From 594dc109d3040cb3b91bf0eac667f4beba65f87e Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Fri, 5 Jul 2024 12:20:39 -0500 Subject: [PATCH 36/38] Wordsmith Harmonize_Pensacola.Rmd Updated text to be more accurate with how reticulate can be used --- demos/Harmonize_Pensacola.Rmd | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/demos/Harmonize_Pensacola.Rmd b/demos/Harmonize_Pensacola.Rmd index af08443..6d6a0dd 100644 --- a/demos/Harmonize_Pensacola.Rmd +++ b/demos/Harmonize_Pensacola.Rmd @@ -55,15 +55,12 @@ data comparability and any thresholds for acceptance or rejection. ## Installation & Setup -An installation of Conda is required to run EPA's harmonize-wq package -using R/reticulate. +Using R/reticulate requires an installation of Python to bind to, and EPA's harmonize-wq package must be run from a python environment with the packages it depends on. -There are multiple installers available for Conda (see: -). +For environment management, reticulate requires either conda (recommended) or virtualenv. -One example installer is -[miniforge](https://github.com/conda-forge/miniforge). We use miniforge3 -in these examples. +There are multiple installers available for [Conda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html). +The examples use miniforge3, one of several verions of [miniforge](https://github.com/conda-forge/miniforge). In R, [miniconda](https://docs.anaconda.com/miniconda/) can be installed using reticulate::install_miniconda(). #### Option 1: Install the harmonize-wq Package Using the Command Line @@ -80,9 +77,11 @@ To install and set up the harmonize-wq package using the command line: - conda create --name wq_harmonize - - activate wq_harmonize + - conda activate wq_harmonize - - conda install geopandas pip dataretrieval pint + - conda install dependencies (from requirements.txt): + + > conda install "numpy<2.0", "pandas<2.0", "geopandas>=0.10.2, <0.13", "pint>=0.18", "dataretrieval>=1.0, <1.0.5", "pip" - pip install harmonize-wq (dev-version shown): @@ -114,14 +113,14 @@ update the last line of code below to specify your conda.exe location ```{r, results = 'hide'} # update the 'dir' in this chuck to specify the location of conda.exe on your computer -# Note: that the environment name may need to include the full path (e.g. "C:/Users/USERNAME/AppData/Local/miniforge3/Scripts/conda.exe") +# Note: that the environment name may need to include the full path (e.g. "C:/Users//AppData/Local/miniforge3/Scripts/conda.exe") # options(reticulate.conda_binary = "~/AppData/Local/miniforge3/Scripts/conda.exe") ``` Next, update the code chunk below to create a new Python environment in the envs folder called "wq_harmonize". Note that the environment name may need to include the full path (e.g. -"C:/Users/USERNAME/AppData/Local/miniforge3/envs/wq_harmonize") +"C:/Users//AppData/Local/miniforge3/envs/wq_harmonize") ```{r, results = 'hide'} # reticulate::conda_create("~/AppData/Local/miniforge3/envs/wq_harmonize") @@ -131,9 +130,11 @@ Install the following python packages to the newly created Python environment called "wq_harmonize". ```{r, results = 'hide'} -#reticulate::conda_install("wq_harmonize", "geopandas") -#reticulate::conda_install("wq_harmonize", "pint") -#reticulate::conda_install("wq_harmonize", "dataretrieval") +# packages = c( +# "numpy<2.0", "pandas<2.0", "geopandas>=0.10.2, <0.13", "pint>=0.18", +# "dataretrieval>=1.0, <1.0.5", "pip" +# ) +# reticulate::conda_install("wq_harmonize", packages) ``` Uncomment to install EPA's harmonize-wq package most recent release or From 0adbba12b492c12bd655c4915b402519e8520c36 Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Fri, 5 Jul 2024 12:25:13 -0500 Subject: [PATCH 37/38] separators Harmonize_Pensacola.Rmd No separators for list to conda --- demos/Harmonize_Pensacola.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demos/Harmonize_Pensacola.Rmd b/demos/Harmonize_Pensacola.Rmd index 6d6a0dd..2fe0a2a 100644 --- a/demos/Harmonize_Pensacola.Rmd +++ b/demos/Harmonize_Pensacola.Rmd @@ -81,7 +81,7 @@ To install and set up the harmonize-wq package using the command line: - conda install dependencies (from requirements.txt): - > conda install "numpy<2.0", "pandas<2.0", "geopandas>=0.10.2, <0.13", "pint>=0.18", "dataretrieval>=1.0, <1.0.5", "pip" + > conda install "numpy<2.0" "pandas<2.0" "geopandas>=0.10.2, <0.13" "pint>=0.18" "dataretrieval>=1.0, <1.0.5" "pip" - pip install harmonize-wq (dev-version shown): From 646a5c3b5af1cd5211b3b7da98b1c2789daa30e3 Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Fri, 5 Jul 2024 18:05:21 -0500 Subject: [PATCH 38/38] Fix typos on README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3509b12..53a0e65 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ Recognizing the definition of analysis-ready varies depending on the analysis, t Domain experts must decide what data meets their quality standards for data comparability and any thresholds for acceptance or rejection. -For more complete, documentsion see [docs](https://usepa.github.io/harmonize-wq/index.html). For more complete tutorial information, see: [demos](https://github.com/USEPA/harmonize-wq/tree/main/demos) +For complete documentation see [docs](https://usepa.github.io/harmonize-wq/index.html). For more complete tutorial information see: [demos](https://github.com/USEPA/harmonize-wq/tree/main/demos) ## Quick Start