From 38ba82a1e97d7df291ff648e2eb5cb7421577460 Mon Sep 17 00:00:00 2001 From: Bousquin Date: Wed, 13 Sep 2023 16:57:10 -0500 Subject: [PATCH] In doc examples, import just DataFrame from pandas (saves 7 char per line and is more consistent) --- harmonize_wq/basis.py | 32 +++++----- harmonize_wq/clean.py | 23 +++---- harmonize_wq/harmonize.py | 130 ++++++++++++++++---------------------- 3 files changed, 79 insertions(+), 106 deletions(-) diff --git a/harmonize_wq/basis.py b/harmonize_wq/basis.py index 97d3ca9..7c42ca6 100644 --- a/harmonize_wq/basis.py +++ b/harmonize_wq/basis.py @@ -94,12 +94,12 @@ def basis_from_unit(df_in, basis_dict, unit_col, basis_col='Speciation'): -------- Build dataFrame for example: - >>> import pandas - >>> df = pandas.DataFrame({'CharacteristicName': ['Phosphorus', 'Phosphorus',], - ... 'ResultMeasure/MeasureUnitCode': ['mg/l as P', 'mg/kg as P'], - ... 'ProviderName': ['NWIS', 'NWIS',], - ... 'Units': ['mg/l as P', 'mg/kg as P'], - ... }) + >>> from pandas import DataFrame + >>> df = DataFrame({'CharacteristicName': ['Phosphorus', 'Phosphorus',], + ... 'ResultMeasure/MeasureUnitCode': ['mg/l as P', 'mg/kg as P'], + ... 'ProviderName': ['NWIS', 'NWIS',], + ... 'Units': ['mg/l as P', 'mg/kg as P'], + ... }) >>> df CharacteristicName ResultMeasure/MeasureUnitCode ProviderName Units 0 Phosphorus mg/l as P NWIS mg/l as P @@ -168,12 +168,12 @@ def basis_from_methodSpec(df_in): -------- Build dataFrame for example: - >>> import pandas + >>> from pandas import DataFrame >>> from numpy import nan - >>> df = pandas.DataFrame({'CharacteristicName': ['Phosphorus', 'Phosphorus',], - ... 'MethodSpecificationName': ['as P', nan], - ... 'ProviderName': ['NWIS', 'NWIS',], - ... }) + >>> df = DataFrame({'CharacteristicName': ['Phosphorus', 'Phosphorus',], + ... 'MethodSpecificationName': ['as P', nan], + ... 'ProviderName': ['NWIS', 'NWIS',], + ... }) >>> df CharacteristicName MethodSpecificationName ProviderName 0 Phosphorus as P NWIS @@ -228,12 +228,12 @@ def update_result_basis(df_in, basis_col, unit_col): Build dataFrame for example: Note: 'Units' is used to preserve 'ResultMeasure/MeasureUnitCode' - >>> import pandas + >>> from pandas import DataFrame >>> from numpy import nan - >>> df = pandas.DataFrame({'CharacteristicName': ['Salinity', 'Salinity',], - ... 'ResultTemperatureBasisText': ['25 deg C', nan,], - ... 'Units': ['mg/mL @25C', 'mg/mL @25C'], - ... }) + >>> df = DataFrame({'CharacteristicName': ['Salinity', 'Salinity',], + ... 'ResultTemperatureBasisText': ['25 deg C', nan,], + ... 'Units': ['mg/mL @25C', 'mg/mL @25C'], + ... }) >>> df CharacteristicName ResultTemperatureBasisText Units 0 Salinity 25 deg C mg/mL @25C diff --git a/harmonize_wq/clean.py b/harmonize_wq/clean.py index 1a3d72f..7dcc30b 100644 --- a/harmonize_wq/clean.py +++ b/harmonize_wq/clean.py @@ -29,13 +29,12 @@ def datetime(df_in): Build dataFrame for example: - >>> import pandas + >>> from pandas import DataFrame >>> from numpy import nan - >>> df = pandas.DataFrame({'ActivityStartDate': ['2004-09-01', - '2004-07-01',], - ... 'ActivityStartTime/Time': ['10:01:00', nan,], - ... 'ActivityStartTime/TimeZoneCode': ['EST', nan], - ... }) + >>> df = DataFrame({'ActivityStartDate': ['2004-09-01', '2004-07-01',], + ... 'ActivityStartTime/Time': ['10:01:00', nan,], + ... 'ActivityStartTime/TimeZoneCode': ['EST', nan], + ... }) >>> df ActivityStartDate ActivityStartTime/Time ActivityStartTime/TimeZoneCode 0 2004-09-01 10:01:00 EST @@ -82,15 +81,11 @@ def harmonize_depth(df_in, units='meter'): Build dataFrame for example: - >>> import pandas + >>> from pandas import DataFrame >>> from numpy import nan - >>> df = pandas.DataFrame({'ResultDepthHeightMeasure/MeasureValue': ['3.0', - ... nan, - ... 10], - ... 'ResultDepthHeightMeasure/MeasureUnitCode': ['m', - ... nan, - ... 'ft'], - ... }) + >>> df = DataFrame({'ResultDepthHeightMeasure/MeasureValue': ['3.0', nan, 10], + ... 'ResultDepthHeightMeasure/MeasureUnitCode': ['m', nan, 'ft'], + ... }) >>> df ResultDepthHeightMeasure/MeasureValue ResultDepthHeightMeasure/MeasureUnitCode 0 3.0 m diff --git a/harmonize_wq/harmonize.py b/harmonize_wq/harmonize.py index 19892b9..d1aa518 100644 --- a/harmonize_wq/harmonize.py +++ b/harmonize_wq/harmonize.py @@ -47,13 +47,12 @@ class WQCharData(): -------- Build dataframe to use as input: - >>> import pandas + >>> from pandas import DataFrame >>> from numpy import nan - >>> df = pandas.DataFrame({'CharacteristicName': ['Phosphorus', - 'Temperature, water',], - ... 'ResultMeasure/MeasureUnitCode': [nan, nan], - ... 'ResultMeasureValue': ['1.0', '10.0',], - ... }) + >>> df = DataFrame({'CharacteristicName': ['Phosphorus', 'Temperature, water',], + ... 'ResultMeasure/MeasureUnitCode': [nan, nan], + ... 'ResultMeasureValue': ['1.0', '10.0',], + ... }) >>> df CharacteristicName ResultMeasure/MeasureUnitCode ResultMeasureValue 0 Phosphorus NaN 1.0 @@ -122,8 +121,7 @@ def _coerce_measure(self): self.df = df_out def _unit_mask(self, unit, column=None): - """Get mask that is characteristic specific (c_mask) and has required - units. + """Get mask that is characteristic specific (c_mask) and has required units. """ if column: # TODO: column for in vs out col, not being used, remove? @@ -275,14 +273,12 @@ def check_units(self, flag_col=None): -------- Build DataFrame to use as input: - >>> import pandas + >>> from pandas import DataFrame >>> from numpy import nan - >>> df = pandas.DataFrame({'CharacteristicName': ['Phosphorus', - ... 'Temperature, water', - ... 'Phosphorus',], - ... 'ResultMeasure/MeasureUnitCode': [nan, nan, 'Unknown',], - ... 'ResultMeasureValue': ['1.0', '67.0', '10',], - ... }) + >>> df = DataFrame({'CharacteristicName': ['Phosphorus', 'Temperature, water', 'Phosphorus',], + ... 'ResultMeasure/MeasureUnitCode': [nan, nan, 'Unknown',], + ... 'ResultMeasureValue': ['1.0', '67.0', '10',], + ... }) >>> df CharacteristicName ResultMeasure/MeasureUnitCode ResultMeasureValue 0 Phosphorus NaN 1.0 @@ -351,15 +347,13 @@ def check_basis(self, basis_col='MethodSpecificationName'): -------- Build DataFrame to use as input: - >>> import pandas + >>> from pandas import DataFrame >>> from numpy import nan - >>> df = pandas.DataFrame({'CharacteristicName': ['Phosphorus', - ... 'Temperature, water', - ... 'Phosphorus',], - ... 'ResultMeasure/MeasureUnitCode': ['mg/l as P', nan, 'mg/l',], - ... 'ResultMeasureValue': ['1.0', '67.0', '10',], - ... 'MethodSpecificationName': [nan, nan, 'as PO4',], - ... }) + >>> df = DataFrame({'CharacteristicName': ['Phosphorus', 'Temperature, water', 'Phosphorus',], + ... 'ResultMeasure/MeasureUnitCode': ['mg/l as P', nan, 'mg/l',], + ... 'ResultMeasureValue': ['1.0', '67.0', '10',], + ... 'MethodSpecificationName': [nan, nan, 'as PO4',], + ... }) >>> df CharacteristicName ... MethodSpecificationName 0 Phosphorus ... NaN @@ -508,14 +502,11 @@ def convert_units(self, default_unit=None, errors='raise'): -------- Build dataframe to use as input: - >>> import pandas - >>> df = pandas.DataFrame({'CharacteristicName': ['Phosphorus', - ... 'Temperature, water', - ... ], - ... 'ResultMeasure/MeasureUnitCode': ['mg/ml', - ... 'deg C'], - ... 'ResultMeasureValue': ['1.0', '10.0',], - ... }) + >>> from pandas import DataFrame + >>> df = DataFrame({'CharacteristicName': ['Phosphorus', 'Temperature, water',], + ... 'ResultMeasure/MeasureUnitCode': ['mg/ml', 'deg C'], + ... 'ResultMeasureValue': ['1.0', '10.0',], + ... }) >>> df CharacteristicName ResultMeasure/MeasureUnitCode ResultMeasureValue 0 Phosphorus mg/ml 1.0 @@ -563,14 +554,11 @@ def apply_conversion(self, convert_fun, unit, u_mask=None): -------- Build dataframe to use as input: - >>> import pandas - >>> df = pandas.DataFrame({'CharacteristicName': ['Dissolved oxygen (DO)', - ... 'Dissolved oxygen (DO)', - ... ], - ... 'ResultMeasure/MeasureUnitCode': ['mg/l', - ... '%'], - ... 'ResultMeasureValue': ['1.0', '10.0',], - ... }) + >>> from pandas import DataFrame + >>> df = DataFrame({'CharacteristicName': ['Dissolved oxygen (DO)', 'Dissolved oxygen (DO)',], + ... 'ResultMeasure/MeasureUnitCode': ['mg/l', '%'], + ... 'ResultMeasureValue': ['1.0', '10.0',], + ... }) >>> df CharacteristicName ResultMeasure/MeasureUnitCode ResultMeasureValue 0 Dissolved oxygen (DO) mg/l 1.0 @@ -622,13 +610,11 @@ def dimensions_list(self, m_mask=None): -------- Build DataFrame to use as input: - >>> import pandas - >>> df = pandas.DataFrame({'CharacteristicName': ['Phosphorus', - ... 'Phosphorus',], - ... 'ResultMeasure/MeasureUnitCode': ['mg/l', - ... 'mg/kg',], - ... 'ResultMeasureValue': ['1.0', '10',], - ... }) + >>> from pandas import DataFrame + >>> df = DataFrame({'CharacteristicName': ['Phosphorus', 'Phosphorus',], + ... 'ResultMeasure/MeasureUnitCode': ['mg/l', 'mg/kg',], + ... 'ResultMeasureValue': ['1.0', '10',], + ... }) >>> df CharacteristicName ResultMeasure/MeasureUnitCode ResultMeasureValue 0 Phosphorus mg/l 1.0 @@ -665,13 +651,11 @@ def replace_unit_str(self, old, new, mask=None): -------- Build DataFrame to use as input: - >>> import pandas - >>> df = pandas.DataFrame({'CharacteristicName': ['Temperature, water', - ... 'Temperature, water',], - ... 'ResultMeasure/MeasureUnitCode': ['deg C', - ... 'deg F',], - ... 'ResultMeasureValue': ['31', '87',], - ... }) + >>> from pandas import DataFrame + >>> df = DataFrame({'CharacteristicName': ['Temperature, water', 'Temperature, water',], + ... 'ResultMeasure/MeasureUnitCode': ['deg C', 'deg F',], + ... 'ResultMeasureValue': ['31', '87',], + ... }) >>> df CharacteristicName ResultMeasure/MeasureUnitCode ResultMeasureValue 0 Temperature, water deg C 31 @@ -715,13 +699,11 @@ def replace_unit_by_dict(self, val_dict, mask=None): -------- Build DataFrame to use as input: - >>> import pandas - >>> df = pandas.DataFrame({'CharacteristicName': ['Fecal Coliform', - ... 'Fecal Coliform',], - ... 'ResultMeasure/MeasureUnitCode': ['#/100ml', - ... 'MPN',], - ... 'ResultMeasureValue': ['1.0', '10',], - ... }) + >>> from pandas import DataFrame + >>> df = DataFrame({'CharacteristicName': ['Fecal Coliform', 'Fecal Coliform',], + ... 'ResultMeasure/MeasureUnitCode': ['#/100ml', 'MPN',], + ... 'ResultMeasureValue': ['1.0', '10',], + ... }) >>> df CharacteristicName ResultMeasure/MeasureUnitCode ResultMeasureValue 0 Fecal Coliform #/100ml 1.0 @@ -852,13 +834,11 @@ def dimension_fixes(self): -------- Build DataFrame to use as input: - >>> import pandas - >>> df = pandas.DataFrame({'CharacteristicName': ['Phosphorus', - ... 'Phosphorus',], - ... 'ResultMeasure/MeasureUnitCode': ['mg/l', - ... 'mg/kg',], - ... 'ResultMeasureValue': ['1.0', '10',], - ... }) + >>> from pandas import DataFrame + >>> df = DataFrame({'CharacteristicName': ['Phosphorus', 'Phosphorus',], + ... 'ResultMeasure/MeasureUnitCode': ['mg/l', 'mg/kg',], + ... 'ResultMeasureValue': ['1.0', '10',], + ... }) >>> df CharacteristicName ResultMeasure/MeasureUnitCode ResultMeasureValue 0 Phosphorus mg/l 1.0 @@ -914,13 +894,11 @@ def moles_convert(self, mol_list): -------- Build DataFrame to use as input: - >>> import pandas - >>> df = pandas.DataFrame({'CharacteristicName': ['Carbon', - ... 'Carbon',], - ... 'ResultMeasure/MeasureUnitCode': ['mg/l', - ... 'umol',], - ... 'ResultMeasureValue': ['1.0', '0.265',], - ... }) + >>> from pandas import DataFrame + >>> df = DataFrame({'CharacteristicName': ['Carbon', 'Carbon',], + ... 'ResultMeasure/MeasureUnitCode': ['mg/l', 'umol',], + ... 'ResultMeasureValue': ['1.0', '0.265',], + ... }) >>> df CharacteristicName ResultMeasure/MeasureUnitCode ResultMeasureValue 0 Carbon mg/l 1.0 @@ -980,8 +958,8 @@ def df_checks(df_in, columns=None): Check dataframe for column: - >>> import pandas - >>> df = pandas.DataFrame({'CharacteristicName': ['Phosphorus'],}) + >>> from pandas import DataFrame + >>> df = DataFrame({'CharacteristicName': ['Phosphorus'],}) >>> df CharacteristicName 0 Phosphorus