From 59412c3c8fd3e78730eea24ef505f811d5d32460 Mon Sep 17 00:00:00 2001
From: Romain Caneill <romain.caneill@ens-lyon.org>
Date: Fri, 10 May 2024 11:51:30 +0200
Subject: [PATCH 1/3] add jupyter as it is needed for doc

---
 requirements_dev.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements_dev.txt b/requirements_dev.txt
index 9283214..223dddb 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -4,4 +4,5 @@ sphinx
 sphinx_rtd_theme
 sphinxcontrib-spelling
 nbsphinx
-nbsphinx_link
\ No newline at end of file
+nbsphinx_link
+jupyterlab

From b548e2d416500ebd3b6a60ad529883493d13be44 Mon Sep 17 00:00:00 2001
From: Justin Bousquin <bousquin.justin@epa.gov>
Date: Tue, 9 Jul 2024 09:57:53 -0500
Subject: [PATCH 2/3] Update requirements_dev.txt

Co-authored-by: Romain Caneill <romain.caneill@ens-lyon.org>
---
 requirements_dev.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements_dev.txt b/requirements_dev.txt
index 223dddb..31c7331 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -5,4 +5,4 @@ sphinx_rtd_theme
 sphinxcontrib-spelling
 nbsphinx
 nbsphinx_link
-jupyterlab
+notebook

From 5b79f0f5fb5cc5850827dffd51a3bf0537990e85 Mon Sep 17 00:00:00 2001
From: Justin Bousquin <bousquin.justin@epa.gov>
Date: Tue, 9 Jul 2024 12:54:22 -0500
Subject: [PATCH 3/3] Update domains.py

Review edits: suggestion (domain.py): In harmonize_TADA_dict, we could use a groupby operation to avoid looping through the dataframe using python. TOCHECK
---
 harmonize_wq/domains.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/harmonize_wq/domains.py b/harmonize_wq/domains.py
index 2f7f1b9..07cf3ae 100644
--- a/harmonize_wq/domains.py
+++ b/harmonize_wq/domains.py
@@ -136,9 +136,8 @@ def harmonize_TADA_dict():
     csv = f'{TADA_DATA_URL}develop/inst/extdata/HarmonizationTemplate.csv'
     df = pandas.read_csv(csv)  # Read csv url to DataFrame
     full_dict = {}  # Setup results dict
-    # Loop over one unique characteristicName at a time
-    for char in df['TADA.CharacteristicName'].unique():
-        sub_df = df[df['TADA.CharacteristicName']==char]  # Mask by char
+    # Build dict one unique characteristicName at a time
+    for char, sub_df in df.groupby('TADA.CharacteristicName'):
         full_dict[char] = char_tbl_TADA(sub_df, char)  # Build dictionary
 
     # Domains to check agaisnt