From 1eb097399de44cdbcf1716c58f67c9116f2bb816 Mon Sep 17 00:00:00 2001
From: "Sara A. Miskovich" <saramiskovich@gmail.com>
Date: Mon, 24 Jun 2024 22:17:09 -0700
Subject: [PATCH] add basic usage demo, clean up nrel module

---
 .../marimo_notebooks/nrel_buildstock_demo.py  | 229 ++++++++++++++++++
 examples/marimo_notebooks/resstock_demo.py    | 175 -------------
 sg2t/io/loadshapes/nrel/nbs.py                | 165 ++-----------
 3 files changed, 254 insertions(+), 315 deletions(-)
 create mode 100644 examples/marimo_notebooks/nrel_buildstock_demo.py
 delete mode 100644 examples/marimo_notebooks/resstock_demo.py

diff --git a/examples/marimo_notebooks/nrel_buildstock_demo.py b/examples/marimo_notebooks/nrel_buildstock_demo.py
new file mode 100644
index 0000000..d998cc5
--- /dev/null
+++ b/examples/marimo_notebooks/nrel_buildstock_demo.py
@@ -0,0 +1,229 @@
+import marimo
+
+__generated_with = "0.6.22"
+app = marimo.App()
+
+
+@app.cell
+def __(mo):
+    mo.md("# Accessing NREL's ResStock and ComStock Databases")
+    return
+
+
+@app.cell
+def __():
+    #import os, sys
+    import marimo as mo
+    #import pandas as pd
+    import matplotlib.pyplot as plt
+
+    from sg2t.io.loadshapes.nrel.nbs import BuildStock, API
+    from sg2t.io.loadshapes.nrel.naming import BUILDING_TYPES, HOME_TYPES
+    return API, BUILDING_TYPES, BuildStock, HOME_TYPES, mo, plt
+
+
+@app.cell
+def __(mo):
+    mo.md("The NREL Building Stock (NBS) `sg2t` module has two classes that deal with the data. The first one is the `API` class to access the data from the S3 bucket. The second is the `BuildStock` class that includes methods for cleaning and post-processing the data (mostly to format it to the `sg2t` standard).")
+    return
+
+
+@app.cell
+def __(mo):
+    mo.md("## Step 1: Pull the data using the API class")
+    return
+
+
+@app.cell
+def __(mo):
+    mo.md(
+        """
+        Currently, the API class (under `sg2t.io.loadshapes.nrel.nbs.API`) provides access to the ResStock and ComStock timeseries aggregates by state, county (for 2021 release year), and climate (Building America and IECC zones).
+
+        You can access each with the following methods:
+
+        ```python
+        api = API()
+        sector = "resstock"
+        btype = "single-family_detached"
+
+        # by state
+        state = "CA"
+        df = api.get_data_by_state(sector, by=state, type=btype)
+
+        # by county
+        county = "Alameda"
+        df = api.get_data_by_county(sector, by=state, type=btype, county_name=county)
+
+        # By climate (Building America)
+        climate = "hot-dry"
+        df = api.get_data_by_climate_ba(sector, by=climate, type=btype)
+
+        # or for IECC
+        climate = "1A"
+        df = api.get_data_by_climate_iecc(sector, by=climate, type=btype)
+        ```
+        """
+    )
+    return
+
+
+@app.cell
+def __(mo):
+    mo.md("The home and building types for both sectors are built into the `sg2t.io.loadshapes.nrel.naming` module as \"HOME_TYPES\" and \"BUILDING_TYPES\", respectively.")
+    return
+
+
+@app.cell
+def __(BUILDING_TYPES, HOME_TYPES):
+    print("Residential home types: ", HOME_TYPES)
+    print("Commercial building types: ", BUILDING_TYPES)
+    return
+
+
+@app.cell
+def __(mo):
+    mo.md(
+        """
+        Working Example
+
+        ```python
+        # Create an API instance 
+        api = API()
+
+        # Configuration 
+        metadata = {
+            "sector" : "Resstock",
+            "state" : "CA",
+            "county_name" : "Alameda",
+            "building_type" : HOME_TYPES[1]
+            }
+
+        # Pull data
+        # Needs:
+        # - By state: sector, state and building type
+        # - By county: sector, state and county and building type
+        # - By climate: sector, climate and building type
+        dataset = api.get_data_by_county(**metadata)
+
+        # Raw NREL data
+        dataset.head(3)
+        ```
+        """
+    )
+    return
+
+
+@app.cell
+def __(API, HOME_TYPES):
+    # Create an API instance 
+    api = API()
+
+    # Configuration 
+    metadata = {
+        "sector" : "Resstock",
+        "state" : "CA",
+        "county_name" : "Alameda",
+        "building_type" : HOME_TYPES[1]
+        }
+
+    # Pull data
+    # Needs:
+    # - By state: sector, state and building type
+    # - By county: sector, state and county and building type
+    # - By climate: sector, climate and building type
+    dataset = api.get_data_by_county(**metadata)
+    return api, dataset, metadata
+
+
+@app.cell
+def __(dataset):
+    # Raw NREL data
+    dataset.head(3)
+    return
+
+
+@app.cell
+def __(mo):
+    mo.md("## Step 2: Using the BuildStock class")
+    return
+
+
+@app.cell
+def __(mo):
+    mo.md("This step is for when you'd like to do some data cleaning/analysis in `sg2t` (or elsewhere), e.g., normalization, cleaner column names.")
+    return
+
+
+@app.cell
+def __(mo):
+    mo.md(
+        """
+        ### Example
+        ```python
+        blds_dataset = BuildStock(data=dataset, metadata=metadata)
+
+        # To normalize by square footage 
+        blds_dataset.normalize_by_sqft();
+        ```
+        """
+    )
+    return
+
+
+@app.cell
+def __(BuildStock, dataset, metadata):
+    blds_dataset = BuildStock(data=dataset, metadata=metadata)
+
+    # To normalize by square footage 
+    blds_dataset.normalize_by_sqft();
+
+    blds_dataset.data_normalized.head(3)
+    return blds_dataset,
+
+
+@app.cell
+def __(mo):
+    mo.md(
+        """
+        To plot the energy consumption (raw)
+        ```python
+        blds_dataset.data.plot(y=["out.site_energy.total.energy_consumption"])
+        ```
+        """
+    )
+    return
+
+
+@app.cell
+def __(blds_dataset):
+    blds_dataset.data.plot(y=["out.site_energy.total.energy_consumption"])
+    return
+
+
+@app.cell
+def __(mo):
+    mo.md(
+        """
+        To plot the normalized energy consumption
+        ```python
+        blds_dataset.data_normalized.plot(y=["out.site_energy.total.energy_consumption[kW/sf]"])
+        ```
+        """
+    )
+    return
+
+
+@app.cell
+def __(blds_dataset):
+    blds_dataset.data_normalized.plot(y=["out.site_energy.total.energy_consumption[kW/sf]"])
+    return
+
+
+@app.cell
+def __():
+    return
+
+
+if __name__ == "__main__":
+    app.run()
diff --git a/examples/marimo_notebooks/resstock_demo.py b/examples/marimo_notebooks/resstock_demo.py
deleted file mode 100644
index ec3d56a..0000000
--- a/examples/marimo_notebooks/resstock_demo.py
+++ /dev/null
@@ -1,175 +0,0 @@
-import marimo
-
-__generated_with = "0.2.13"
-app = marimo.App()
-
-
-@app.cell
-def __():
-    # Requirements
-    import os, sys
-    import marimo as mo
-    import pandas as pd
-    import matplotlib.pyplot as plt
-
-    from sg2t.io.loadshapes.nrel.nbs import BuildStock, API
-    return API, BuildStock, mo, os, pd, plt, sys
-
-
-@app.cell
-def __():
-    from sg2t.io.loadshapes.nrel.naming import BUILDING_TYPES, HOME_TYPES
-    return BUILDING_TYPES, HOME_TYPES
-
-
-@app.cell
-def __(BUILDING_TYPES):
-    BUILDING_TYPES
-    return
-
-
-@app.cell
-def __():
-    # Configuration 
-    metadata = {
-        "sector" : "ResStock",
-        "state" : "CA",
-        "county_name" : "Alameda",
-        "building_type" : "single-family_detached"
-        }
-    return metadata,
-
-
-@app.cell
-def __(API):
-    ""# Create an API instance 
-    api = API()
-    return api,
-
-
-@app.cell
-def __(api, metadata):
-    # Pull data first
-    # Needs:
-    # - By state: sector, state and building type
-    # - By county: sector, state and county and building type
-    # - By climate: sector, climate and building type
-    dataset = api.get_data_by_county(**metadata)
-    return dataset,
-
-
-@app.cell
-def __(BuildStock, dataset, metadata):
-    res = BuildStock(data=dataset, metadata=metadata) # instantiate with dataframe with index as dt timestamp
-    return res,
-
-
-@app.cell
-def __(res):
-    res.data.head(1)
-    return
-
-
-@app.cell
-def __(res):
-    res.normalize_by_sqft() # can only do for county data (for now?)
-    return
-
-
-@app.cell
-def __(res):
-    res.data.plot(y=["out.site_energy.total.energy_consumption"])
-    return
-
-
-@app.cell
-def __(res):
-    res.data_normalized.plot(y=["out.site_energy.total.energy_consumption[kW/sf]"])
-    return
-
-
-@app.cell
-def __():
-    # by climate
-    return
-
-
-@app.cell
-def __():
-    metadata_com = {
-        "sector" : "comstock",
-        "climate" : "hot-dry",
-        "building_type" : "largehotel"
-    }
-
-    # metadata_res = {
-    #     "sector" : "ResStock",
-    #     "climate" : "hot-dry",
-    #     "building_type" : "single-family_detached"
-    #     }
-    return metadata_com,
-
-
-@app.cell
-def __(api, metadata_com):
-    data_com_cli = api.get_data_by_climate_ba(**metadata_com)
-    return data_com_cli,
-
-
-@app.cell
-def __(data_com_cli):
-    data_com_cli
-    return
-
-
-@app.cell
-def __(BuildStock, data_com_cli, metadata_com):
-    com = BuildStock(data=data_com_cli, metadata=metadata_com)
-    return com,
-
-
-@app.cell
-def __(com):
-    com.normalize_by_sqft()
-    return
-
-
-@app.cell
-def __():
-    metadata_test = {
-        "sector" : "comstock",
-        "building_type" : "largehotel"
-    }
-    return metadata_test,
-
-
-@app.cell
-def __(BuildStock, data_com_cli, metadata_test):
-    com_test = BuildStock(data=data_com_cli, metadata=metadata_test)
-    return com_test,
-
-
-@app.cell
-def __():
-    # the way I'm planning to do it doesn't work because I can't pass a new kwarg to get data now, can I? or maybe I can make it a new kwargs... ugh
-
-    # doesn't make sense to have incompatible climate, but i either ask users to fix it by setting up a check (either county/state or climate in there) or I change the system
-    return
-
-
-app._unparsable_cell(
-    r"""
-    # by state
-    com_state_meta = {
-        \"sector\" : \"comstock\",
-        \"state\" : \"MI\",
-        \"building_type\" : \"largehotel\"
-    }
-    com_state = 
-    """,
-    name="__"
-)
-
-
-if __name__ == "__main__":
-    app.run()
diff --git a/sg2t/io/loadshapes/nrel/nbs.py b/sg2t/io/loadshapes/nrel/nbs.py
index f6c19ac..95a7eb9 100644
--- a/sg2t/io/loadshapes/nrel/nbs.py
+++ b/sg2t/io/loadshapes/nrel/nbs.py
@@ -24,79 +24,21 @@ class BuildStock(IOBase):
      dataset into sg2t tools.
     """
     def __init__(self,
-                 data, # TODO: update docstrings
-                 metadata,  # TODO: implement or remove
-                 api=None,
-                 config_name="config.ini", # TODO: implement or remove
-                 config_key="io.nrel.api", # TODO: implement or remove
+                 data,
+                 metadata,
+                 api=None
                  ):
         """ ResStock object initialization.
-
-        Parameters
-        ----------
-        config_name : str
-            Name of configuration file in sg2t.config, optional.
-
-        config_key : str
-            Key in config corresponding to this class, required if
-            config_name is given.
-
-        metadata_file : str
-            Full path to JSON file containing the metadata for this
-             type of data.
+        # TODO: update docstrings
         """
-        # TODO: drop base class?
-        # super().__init__(config_name, config_key, metadata_file)
         self.raw_data = data
-        # self.kwargs = kwargs
-        # self.weather_gisjoint = self.load_weather_location()
         self.data = self._format_data()
         self.data_normalized = None
         self.metadata = metadata
         self.api = api
-        self.validate_metadata()
-
-    # def load_weather_location(self):
-    #     # TODO: check that metadata exists
-    #     if not self.metadata:
-    #         return "None"
-    #     try:
-    #         gisj_metadata = self.metadata["file"]["GISJOINT ID"]
-    #         return gisj_metadata
-    #     except KeyError:
-    #         return "None"
-
-    def validate_metadata(self):
-        # TODO: also check that there's no overlap? (e.g. both county and climate keys are there)
-        try:
-            assert "sector" in self.metadata
-            assert ("state" in self.metadata) or \
-                   ("state" in self.metadata and "county" in self.metadata ) or \
-                   ("climate" in self.metadata)
-        except AssertionError:
-            print("Please specify the sector in the metadata, and: \n \
-                  - the state, or \n \
-                  - the state and county, or \n \
-                  - the climate")
+        self._validate_metadata()
 
     def _format_data(self):
-        """Changes the format of the loaded tmy3 data self.data to follow
-        a standard format with standard column names. See `mapping.py`.
-
-        This only reorders the columns, putting required ones first, and others
-        next, and removes redundant/unused columns.
-        """
-        # self.keys_map = get_map(self.metadata_file)
-        # # Save original dataframe
-        # raw_data = self.data
-        # # Create new dataframe
-        # cols = list(self.keys_map.keys())
-        # data = pd.DataFrame(columns=cols)
-        # for key in list(self.keys_map.keys()):
-        #     print(key, self.keys_map[key])
-        #     data[key] = raw_data[self.keys_map[key]]
-        #
-        # self.data = data
 
         self.data = self.raw_data.copy()
 
@@ -115,6 +57,20 @@ def _format_data(self):
 
         return self.data
 
+    def _validate_metadata(self):
+        # TODO: do I actually need this
+        keys_set = set(self.metadata.keys())
+        try:
+            assert keys_set == {'sector', 'state', 'building_type'}  or \
+                   keys_set == {'sector', 'state', 'county_name', 'building_type'}  or \
+                   keys_set == {'sector', 'climate', 'building_type'}
+
+        except AssertionError:
+            print("Please specify the sector in the metadata, and: \n \
+                  - the state, or \n \
+                  - the state and county, or \n \
+                  - the climate")
+
     def normalize_by_sqft(self):
         """ Normalize county-level data by square footage
         and return energy/SF for each building type
@@ -122,10 +78,12 @@ def normalize_by_sqft(self):
         if "county" not in self.data.columns:
             raise Exception("Must have county level data specified to call this method.")
 
-        # create API object # TODO: maybe change how this is implemented
+        # create API object
+        # TODO: maybe change how this is implemented
+        # TODO: maybe can pass metadata if cached to speed it up
         self.api = self.api if self.api else API()
 
-        # TODO: why does this take 17s?
+        # TODO: speed up (takes 17s)
         # get SF per build type
         meta = self.api.get_metadata(self.metadata["sector"])
         area = meta.groupby(["county", "building_type"]).sum()
@@ -145,7 +103,7 @@ def normalize_by_sqft(self):
         self.data_normalized.set_index(["county", "building_type"], inplace=True)
         self.data_normalized = self.data_normalized.join(area)
 
-        dt = 0.25 # this shouldn't change for ResStock and ComStock, TODO: confirm this
+        dt = 0.25 # same for ResStock and ComStock
         columns = []
         for column in self.data_normalized.columns:
             if column.endswith("consumption"):
@@ -229,25 +187,9 @@ class API:
     AWS S# bucket.
     https://data.openei.org/submissions/4520
     """
-    def __init__(self,
-                 # source: str,
-                 config_name="config.ini",
-                 config_key="io.nrel.api"
-                 ):
+    def __init__(self):
         """ API object initialization
-
-        Parameters
-        ----------
-        source: str
-            Desired source of data to pull. Currently supports: ResStock, ComStock.
-
-        config_name : str
-            Name of configuration file in sg2t.config or cache directory to obtain API path settings.
         """
-        self.source = None
-        self.config_name = config_name
-        self.config_key = config_key
-        self.config = self.load_config(self.config_name, self.config_key)
         # API paths
         # 2021 release has county breakdown
         # 2021 release does *not* take upgrades as input
@@ -268,63 +210,6 @@ def __init__(self,
         # Geographic information
         self.df_geoinfo = self.get_geoinfo()
 
-        # # API options
-        # self.api_options = {
-        #             "resstock" :
-        #                 { "state" : self.get_data_resstock_by_state, # state, hometype
-        #                   "county" : self.get_data_resstock_by_county, # state, county, hometype
-        #                   "climate-ba" : self.get_data_resstock_by_climatezone,  # climate, hometype
-        #                   "climate-iecc" : self.get_data_resstock_by_climatezone_iecc, # climate, hometype
-        #                   },
-        #
-        #             "comstock" :
-        #                 { "state" : self.get_data_comstock_by_state, # state, hometype
-        #                   "county" : self.get_data_comstock_by_county, # state, county, hometype
-        #                   "climate-ba" : self.get_data_comstock_by_climatezone,  # climate, hometype
-        #                   "climate-iecc" : self.get_data_comstock_by_climatezone_iecc, # climate, hometype
-        #                   },
-        #        }
-
-    # def get_data(self, sector, building_type, state=None, county=None, climate=None):
-    #     sector = sector.lower()
-    #
-    #     if (state and climate) or (county and climate):
-    #         # if state and county then county level is taken
-    #         raise "Please specify the query type (state, state/county, or climate)."
-    #
-    #     # Get dataframe
-    #     if state:
-    #         if county:
-    #             return self.api_options[sector]["county"](state=state, county_name=county, building_type=building_type)
-    #         else:
-    #             return self.api_options[sector]["state"](state=state, building_type=building_type)
-    #     elif climate:
-    #         if climate in self.climate_zones_ba:
-    #             return self.api_options[sector]["climate-ba"](climate=climate, building_type=building_type)
-    #         elif climate in self.climate_zones_iecc:
-    #             return self.api_options[sector]["climate-iecc"](climate=climate, building_type=building_type)
-    #         else:
-    #             raise Exception("Invalid option. Please pass either state, county, or climate info.")
-
-    def load_config(self, config_name=None, key=None):
-        """Load configuration.
-
-        PARAMETERS
-        ----------
-        config_name : str
-            Name of configuration file in sg2t.config, optional.
-
-        key : str
-            Key in config corresponding to this class, required if
-            config_name is given.
-
-        RETURNS
-        -------
-        config : dict
-            Configuration dict, if any, otherwise None.
-        """
-        return load_config(config_name, key)
-
     def get_geoinfo(self):
         # This file is identical between ResStock and Comstock
         sector = "resstock"