From 9434fca8abd92da78102bd6a94c15087cd5f41a8 Mon Sep 17 00:00:00 2001 From: Bousquin Date: Mon, 5 Feb 2024 15:33:58 -0600 Subject: [PATCH] Fix up docs for Fraction --- harmonize_wq/harmonize.py | 8 +++- harmonize_wq/wq_data.py | 78 +++++++++++++++++++++++++++++++++++---- 2 files changed, 78 insertions(+), 8 deletions(-) diff --git a/harmonize_wq/harmonize.py b/harmonize_wq/harmonize.py index f8fbb39..c3c2af7 100644 --- a/harmonize_wq/harmonize.py +++ b/harmonize_wq/harmonize.py @@ -467,8 +467,14 @@ def harmonize_all(df_in, errors='raise'): >>> df1.shape (359505, 35) + When running the function there may be read outs or warnings, as things are + encountered such as unexpected nutrient sample fractions: + >>> from harmonize_wq import harmonize >>> df_result_all = harmonize.harmonize_all(df1) + 2 Phosphorus sample fractions not in frac_dict + 1 Phosphorus sample fractions not in frac_dict found in expected domains, mapped to "Other_Phosphorus" + >>> df_result_all OrganizationIdentifier ... Other_Phosphorus 0 21FLHILL_WQX ... NaN @@ -654,7 +660,7 @@ def harmonize(df_in, char_val, units_out=None, errors='raise', if out_col=='Phosphorus': frac_dict = {'TP_Phosphorus': ['Total'], 'TDP_Phosphorus': ['Dissolved'], - 'Other_Phosphorus': [''],} + 'Other_Phosphorus': ['', nan],} else: frac_dict = 'TADA' frac_dict = wqp.fraction(frac_dict) # Run sample fraction on WQP diff --git a/harmonize_wq/wq_data.py b/harmonize_wq/wq_data.py index e500762..7b8d752 100644 --- a/harmonize_wq/wq_data.py +++ b/harmonize_wq/wq_data.py @@ -817,11 +817,78 @@ def fraction(self, frac_dict=None, catch_all=None, suffix=None, Examples -------- - Not fully implemented with TADA table yet. + Build pandas DataFrame to use as input: + + >>> from pandas import DataFrame + >>> df = DataFrame({'CharacteristicName': ['Phosphorus', 'Phosphorus',], + ... 'ResultMeasure/MeasureUnitCode': ['mg/l', 'mg/kg',], + ... 'ResultMeasureValue': ['1.0', '10',], + ... 'ResultSampleFractionText': ['Dissolved', ''], + ... }) + >>> df + CharacteristicName ... ResultSampleFractionText + 0 Phosphorus ... Dissolved + 1 Phosphorus ... + + [2 rows x 4 columns] + + Build WQ Characteristic Data class from pandas DataFrame: + + >>> from harmonize_wq import wq_data + >>> wq = wq_data.WQCharData(df, 'Phosphorus') + + Go through required checks and conversions + + >>> wq.check_units() + >>> dimension_dict, mol_list = wq.dimension_fixes() + >>> wq.replace_unit_by_dict(dimension_dict, wq.measure_mask()) + >>> wq.moles_convert(mol_list) + >>> wq.convert_units() + >>> wq.df.columns + Index(['CharacteristicName', 'ResultMeasure/MeasureUnitCode', + 'ResultMeasureValue', 'ResultSampleFractionText', 'Units', 'Phosphorus', + 'QA_flag'], + dtype='object') + >>> wq.df['Phosphorus'] + 0 1.0 milligram / liter + 1 10.000000000000002 milligram / liter + Name: Phosphorus, dtype: object + + These results may have differen, non-comprable sample fractions. First, + split results using a provided frac_dict (as used in harmonize()): + + >>> from numpy import nan + >>> frac_dict = {'TP_Phosphorus': ['Total'], + 'TDP_Phosphorus': ['Dissolved'], + 'Other_Phosphorus': ['', nan],} + >>> wq.fraction(frac_dict) + >>> wq.df.columns + Index(['CharacteristicName', 'ResultMeasure/MeasureUnitCode', + 'ResultMeasureValue', 'ResultSampleFractionText', 'Units', 'Phosphorus', + 'QA_flag', 'TDP_Phosphorus', 'Other_Phosphorus'], + dtype='object') + >>> wq.df[['TDP_Phosphorus', 'Other_Phosphorus']] + TDP_Phosphorus Other_Phosphorus + 0 1.0 milligram / liter NaN + 1 NaN 10.000000000000002 milligram / liter + + Alternatively, the sample fraction lists from tada can be used, in this case they are added: + + >>> wq.fraction('TADA') + >>> wq.df.columns + Index(['CharacteristicName', 'ResultMeasure/MeasureUnitCode', + 'ResultMeasureValue', 'ResultSampleFractionText', 'Units', 'Phosphorus', + 'QA_flag', 'TDP_Phosphorus', 'Other_Phosphorus', + 'TOTAL PHOSPHORUS_ MIXED FORMS'], + dtype='object') + >>> wq.df[['TOTAL PHOSPHORUS_ MIXED FORMS', 'Other_Phosphorus']] + TOTAL PHOSPHORUS_ MIXED FORMS Other_Phosphorus + 0 1.0 milligram / liter NaN + 1 NaN 10.000000000000002 milligram / liter """ # Check for sample fraction column harmonize.df_checks(self.df, [fract_col]) - + c_mask = self.c_mask fracs = list(set(self.df[c_mask][fract_col])) # List of fracs in data @@ -831,10 +898,9 @@ def fraction(self, frac_dict=None, catch_all=None, suffix=None, # Replace bad sample fraction w/ nan self.df = self._replace_in_col(fract_col, ' ', nan, c_mask) fracs.remove(' ') - + df_out = self.df # Set var for easier referencing char = list(set(df_out[self.c_mask]['CharacteristicName']))[0] - # Deal with lack of args if suffix is None: @@ -857,7 +923,7 @@ def fraction(self, frac_dict=None, catch_all=None, suffix=None, #else: dict was already provided if catch_all not in frac_dict.keys(): frac_dict[catch_all] = ['', nan] - # Make sure catch_all exists + # Make sure catch_all exists if not isinstance(frac_dict[catch_all], list): frac_dict[catch_all] = [frac_dict[catch_all]] @@ -900,8 +966,6 @@ def fraction(self, frac_dict=None, catch_all=None, suffix=None, self.df = df_out - return frac_dict - def dimension_fixes(self): """ Input/output for dimension handling.