From 59412c3c8fd3e78730eea24ef505f811d5d32460 Mon Sep 17 00:00:00 2001 From: Romain Caneill Date: Fri, 10 May 2024 11:51:30 +0200 Subject: [PATCH 1/3] add jupyter as it is needed for doc --- requirements_dev.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements_dev.txt b/requirements_dev.txt index 9283214..223dddb 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -4,4 +4,5 @@ sphinx sphinx_rtd_theme sphinxcontrib-spelling nbsphinx -nbsphinx_link \ No newline at end of file +nbsphinx_link +jupyterlab From b548e2d416500ebd3b6a60ad529883493d13be44 Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Tue, 9 Jul 2024 09:57:53 -0500 Subject: [PATCH 2/3] Update requirements_dev.txt Co-authored-by: Romain Caneill --- requirements_dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev.txt b/requirements_dev.txt index 223dddb..31c7331 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -5,4 +5,4 @@ sphinx_rtd_theme sphinxcontrib-spelling nbsphinx nbsphinx_link -jupyterlab +notebook From 5b79f0f5fb5cc5850827dffd51a3bf0537990e85 Mon Sep 17 00:00:00 2001 From: Justin Bousquin Date: Tue, 9 Jul 2024 12:54:22 -0500 Subject: [PATCH 3/3] Update domains.py Review edits: suggestion (domain.py): In harmonize_TADA_dict, we could use a groupby operation to avoid looping through the dataframe using python. TOCHECK --- harmonize_wq/domains.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/harmonize_wq/domains.py b/harmonize_wq/domains.py index 2f7f1b9..07cf3ae 100644 --- a/harmonize_wq/domains.py +++ b/harmonize_wq/domains.py @@ -136,9 +136,8 @@ def harmonize_TADA_dict(): csv = f'{TADA_DATA_URL}develop/inst/extdata/HarmonizationTemplate.csv' df = pandas.read_csv(csv) # Read csv url to DataFrame full_dict = {} # Setup results dict - # Loop over one unique characteristicName at a time - for char in df['TADA.CharacteristicName'].unique(): - sub_df = df[df['TADA.CharacteristicName']==char] # Mask by char + # Build dict one unique characteristicName at a time + for char, sub_df in df.groupby('TADA.CharacteristicName'): full_dict[char] = char_tbl_TADA(sub_df, char) # Build dictionary # Domains to check agaisnt