From 1eb097399de44cdbcf1716c58f67c9116f2bb816 Mon Sep 17 00:00:00 2001 From: "Sara A. Miskovich" Date: Mon, 24 Jun 2024 22:17:09 -0700 Subject: [PATCH] add basic usage demo, clean up nrel module --- .../marimo_notebooks/nrel_buildstock_demo.py | 229 ++++++++++++++++++ examples/marimo_notebooks/resstock_demo.py | 175 ------------- sg2t/io/loadshapes/nrel/nbs.py | 165 ++----------- 3 files changed, 254 insertions(+), 315 deletions(-) create mode 100644 examples/marimo_notebooks/nrel_buildstock_demo.py delete mode 100644 examples/marimo_notebooks/resstock_demo.py diff --git a/examples/marimo_notebooks/nrel_buildstock_demo.py b/examples/marimo_notebooks/nrel_buildstock_demo.py new file mode 100644 index 0000000..d998cc5 --- /dev/null +++ b/examples/marimo_notebooks/nrel_buildstock_demo.py @@ -0,0 +1,229 @@ +import marimo + +__generated_with = "0.6.22" +app = marimo.App() + + +@app.cell +def __(mo): + mo.md("# Accessing NREL's ResStock and ComStock Databases") + return + + +@app.cell +def __(): + #import os, sys + import marimo as mo + #import pandas as pd + import matplotlib.pyplot as plt + + from sg2t.io.loadshapes.nrel.nbs import BuildStock, API + from sg2t.io.loadshapes.nrel.naming import BUILDING_TYPES, HOME_TYPES + return API, BUILDING_TYPES, BuildStock, HOME_TYPES, mo, plt + + +@app.cell +def __(mo): + mo.md("The NREL Building Stock (NBS) `sg2t` module has two classes that deal with the data. The first one is the `API` class to access the data from the S3 bucket. The second is the `BuildStock` class that includes methods for cleaning and post-processing the data (mostly to format it to the `sg2t` standard).") + return + + +@app.cell +def __(mo): + mo.md("## Step 1: Pull the data using the API class") + return + + +@app.cell +def __(mo): + mo.md( + """ + Currently, the API class (under `sg2t.io.loadshapes.nrel.nbs.API`) provides access to the ResStock and ComStock timeseries aggregates by state, county (for 2021 release year), and climate (Building America and IECC zones). + + You can access each with the following methods: + + ```python + api = API() + sector = "resstock" + btype = "single-family_detached" + + # by state + state = "CA" + df = api.get_data_by_state(sector, by=state, type=btype) + + # by county + county = "Alameda" + df = api.get_data_by_county(sector, by=state, type=btype, county_name=county) + + # By climate (Building America) + climate = "hot-dry" + df = api.get_data_by_climate_ba(sector, by=climate, type=btype) + + # or for IECC + climate = "1A" + df = api.get_data_by_climate_iecc(sector, by=climate, type=btype) + ``` + """ + ) + return + + +@app.cell +def __(mo): + mo.md("The home and building types for both sectors are built into the `sg2t.io.loadshapes.nrel.naming` module as \"HOME_TYPES\" and \"BUILDING_TYPES\", respectively.") + return + + +@app.cell +def __(BUILDING_TYPES, HOME_TYPES): + print("Residential home types: ", HOME_TYPES) + print("Commercial building types: ", BUILDING_TYPES) + return + + +@app.cell +def __(mo): + mo.md( + """ + Working Example + + ```python + # Create an API instance + api = API() + + # Configuration + metadata = { + "sector" : "Resstock", + "state" : "CA", + "county_name" : "Alameda", + "building_type" : HOME_TYPES[1] + } + + # Pull data + # Needs: + # - By state: sector, state and building type + # - By county: sector, state and county and building type + # - By climate: sector, climate and building type + dataset = api.get_data_by_county(**metadata) + + # Raw NREL data + dataset.head(3) + ``` + """ + ) + return + + +@app.cell +def __(API, HOME_TYPES): + # Create an API instance + api = API() + + # Configuration + metadata = { + "sector" : "Resstock", + "state" : "CA", + "county_name" : "Alameda", + "building_type" : HOME_TYPES[1] + } + + # Pull data + # Needs: + # - By state: sector, state and building type + # - By county: sector, state and county and building type + # - By climate: sector, climate and building type + dataset = api.get_data_by_county(**metadata) + return api, dataset, metadata + + +@app.cell +def __(dataset): + # Raw NREL data + dataset.head(3) + return + + +@app.cell +def __(mo): + mo.md("## Step 2: Using the BuildStock class") + return + + +@app.cell +def __(mo): + mo.md("This step is for when you'd like to do some data cleaning/analysis in `sg2t` (or elsewhere), e.g., normalization, cleaner column names.") + return + + +@app.cell +def __(mo): + mo.md( + """ + ### Example + ```python + blds_dataset = BuildStock(data=dataset, metadata=metadata) + + # To normalize by square footage + blds_dataset.normalize_by_sqft(); + ``` + """ + ) + return + + +@app.cell +def __(BuildStock, dataset, metadata): + blds_dataset = BuildStock(data=dataset, metadata=metadata) + + # To normalize by square footage + blds_dataset.normalize_by_sqft(); + + blds_dataset.data_normalized.head(3) + return blds_dataset, + + +@app.cell +def __(mo): + mo.md( + """ + To plot the energy consumption (raw) + ```python + blds_dataset.data.plot(y=["out.site_energy.total.energy_consumption"]) + ``` + """ + ) + return + + +@app.cell +def __(blds_dataset): + blds_dataset.data.plot(y=["out.site_energy.total.energy_consumption"]) + return + + +@app.cell +def __(mo): + mo.md( + """ + To plot the normalized energy consumption + ```python + blds_dataset.data_normalized.plot(y=["out.site_energy.total.energy_consumption[kW/sf]"]) + ``` + """ + ) + return + + +@app.cell +def __(blds_dataset): + blds_dataset.data_normalized.plot(y=["out.site_energy.total.energy_consumption[kW/sf]"]) + return + + +@app.cell +def __(): + return + + +if __name__ == "__main__": + app.run() diff --git a/examples/marimo_notebooks/resstock_demo.py b/examples/marimo_notebooks/resstock_demo.py deleted file mode 100644 index ec3d56a..0000000 --- a/examples/marimo_notebooks/resstock_demo.py +++ /dev/null @@ -1,175 +0,0 @@ -import marimo - -__generated_with = "0.2.13" -app = marimo.App() - - -@app.cell -def __(): - # Requirements - import os, sys - import marimo as mo - import pandas as pd - import matplotlib.pyplot as plt - - from sg2t.io.loadshapes.nrel.nbs import BuildStock, API - return API, BuildStock, mo, os, pd, plt, sys - - -@app.cell -def __(): - from sg2t.io.loadshapes.nrel.naming import BUILDING_TYPES, HOME_TYPES - return BUILDING_TYPES, HOME_TYPES - - -@app.cell -def __(BUILDING_TYPES): - BUILDING_TYPES - return - - -@app.cell -def __(): - # Configuration - metadata = { - "sector" : "ResStock", - "state" : "CA", - "county_name" : "Alameda", - "building_type" : "single-family_detached" - } - return metadata, - - -@app.cell -def __(API): - ""# Create an API instance - api = API() - return api, - - -@app.cell -def __(api, metadata): - # Pull data first - # Needs: - # - By state: sector, state and building type - # - By county: sector, state and county and building type - # - By climate: sector, climate and building type - dataset = api.get_data_by_county(**metadata) - return dataset, - - -@app.cell -def __(BuildStock, dataset, metadata): - res = BuildStock(data=dataset, metadata=metadata) # instantiate with dataframe with index as dt timestamp - return res, - - -@app.cell -def __(res): - res.data.head(1) - return - - -@app.cell -def __(res): - res.normalize_by_sqft() # can only do for county data (for now?) - return - - -@app.cell -def __(res): - res.data.plot(y=["out.site_energy.total.energy_consumption"]) - return - - -@app.cell -def __(res): - res.data_normalized.plot(y=["out.site_energy.total.energy_consumption[kW/sf]"]) - return - - -@app.cell -def __(): - # by climate - return - - -@app.cell -def __(): - metadata_com = { - "sector" : "comstock", - "climate" : "hot-dry", - "building_type" : "largehotel" - } - - # metadata_res = { - # "sector" : "ResStock", - # "climate" : "hot-dry", - # "building_type" : "single-family_detached" - # } - return metadata_com, - - -@app.cell -def __(api, metadata_com): - data_com_cli = api.get_data_by_climate_ba(**metadata_com) - return data_com_cli, - - -@app.cell -def __(data_com_cli): - data_com_cli - return - - -@app.cell -def __(BuildStock, data_com_cli, metadata_com): - com = BuildStock(data=data_com_cli, metadata=metadata_com) - return com, - - -@app.cell -def __(com): - com.normalize_by_sqft() - return - - -@app.cell -def __(): - metadata_test = { - "sector" : "comstock", - "building_type" : "largehotel" - } - return metadata_test, - - -@app.cell -def __(BuildStock, data_com_cli, metadata_test): - com_test = BuildStock(data=data_com_cli, metadata=metadata_test) - return com_test, - - -@app.cell -def __(): - # the way I'm planning to do it doesn't work because I can't pass a new kwarg to get data now, can I? or maybe I can make it a new kwargs... ugh - - # doesn't make sense to have incompatible climate, but i either ask users to fix it by setting up a check (either county/state or climate in there) or I change the system - return - - -app._unparsable_cell( - r""" - # by state - com_state_meta = { - \"sector\" : \"comstock\", - \"state\" : \"MI\", - \"building_type\" : \"largehotel\" - } - com_state = - """, - name="__" -) - - -if __name__ == "__main__": - app.run() diff --git a/sg2t/io/loadshapes/nrel/nbs.py b/sg2t/io/loadshapes/nrel/nbs.py index f6c19ac..95a7eb9 100644 --- a/sg2t/io/loadshapes/nrel/nbs.py +++ b/sg2t/io/loadshapes/nrel/nbs.py @@ -24,79 +24,21 @@ class BuildStock(IOBase): dataset into sg2t tools. """ def __init__(self, - data, # TODO: update docstrings - metadata, # TODO: implement or remove - api=None, - config_name="config.ini", # TODO: implement or remove - config_key="io.nrel.api", # TODO: implement or remove + data, + metadata, + api=None ): """ ResStock object initialization. - - Parameters - ---------- - config_name : str - Name of configuration file in sg2t.config, optional. - - config_key : str - Key in config corresponding to this class, required if - config_name is given. - - metadata_file : str - Full path to JSON file containing the metadata for this - type of data. + # TODO: update docstrings """ - # TODO: drop base class? - # super().__init__(config_name, config_key, metadata_file) self.raw_data = data - # self.kwargs = kwargs - # self.weather_gisjoint = self.load_weather_location() self.data = self._format_data() self.data_normalized = None self.metadata = metadata self.api = api - self.validate_metadata() - - # def load_weather_location(self): - # # TODO: check that metadata exists - # if not self.metadata: - # return "None" - # try: - # gisj_metadata = self.metadata["file"]["GISJOINT ID"] - # return gisj_metadata - # except KeyError: - # return "None" - - def validate_metadata(self): - # TODO: also check that there's no overlap? (e.g. both county and climate keys are there) - try: - assert "sector" in self.metadata - assert ("state" in self.metadata) or \ - ("state" in self.metadata and "county" in self.metadata ) or \ - ("climate" in self.metadata) - except AssertionError: - print("Please specify the sector in the metadata, and: \n \ - - the state, or \n \ - - the state and county, or \n \ - - the climate") + self._validate_metadata() def _format_data(self): - """Changes the format of the loaded tmy3 data self.data to follow - a standard format with standard column names. See `mapping.py`. - - This only reorders the columns, putting required ones first, and others - next, and removes redundant/unused columns. - """ - # self.keys_map = get_map(self.metadata_file) - # # Save original dataframe - # raw_data = self.data - # # Create new dataframe - # cols = list(self.keys_map.keys()) - # data = pd.DataFrame(columns=cols) - # for key in list(self.keys_map.keys()): - # print(key, self.keys_map[key]) - # data[key] = raw_data[self.keys_map[key]] - # - # self.data = data self.data = self.raw_data.copy() @@ -115,6 +57,20 @@ def _format_data(self): return self.data + def _validate_metadata(self): + # TODO: do I actually need this + keys_set = set(self.metadata.keys()) + try: + assert keys_set == {'sector', 'state', 'building_type'} or \ + keys_set == {'sector', 'state', 'county_name', 'building_type'} or \ + keys_set == {'sector', 'climate', 'building_type'} + + except AssertionError: + print("Please specify the sector in the metadata, and: \n \ + - the state, or \n \ + - the state and county, or \n \ + - the climate") + def normalize_by_sqft(self): """ Normalize county-level data by square footage and return energy/SF for each building type @@ -122,10 +78,12 @@ def normalize_by_sqft(self): if "county" not in self.data.columns: raise Exception("Must have county level data specified to call this method.") - # create API object # TODO: maybe change how this is implemented + # create API object + # TODO: maybe change how this is implemented + # TODO: maybe can pass metadata if cached to speed it up self.api = self.api if self.api else API() - # TODO: why does this take 17s? + # TODO: speed up (takes 17s) # get SF per build type meta = self.api.get_metadata(self.metadata["sector"]) area = meta.groupby(["county", "building_type"]).sum() @@ -145,7 +103,7 @@ def normalize_by_sqft(self): self.data_normalized.set_index(["county", "building_type"], inplace=True) self.data_normalized = self.data_normalized.join(area) - dt = 0.25 # this shouldn't change for ResStock and ComStock, TODO: confirm this + dt = 0.25 # same for ResStock and ComStock columns = [] for column in self.data_normalized.columns: if column.endswith("consumption"): @@ -229,25 +187,9 @@ class API: AWS S# bucket. https://data.openei.org/submissions/4520 """ - def __init__(self, - # source: str, - config_name="config.ini", - config_key="io.nrel.api" - ): + def __init__(self): """ API object initialization - - Parameters - ---------- - source: str - Desired source of data to pull. Currently supports: ResStock, ComStock. - - config_name : str - Name of configuration file in sg2t.config or cache directory to obtain API path settings. """ - self.source = None - self.config_name = config_name - self.config_key = config_key - self.config = self.load_config(self.config_name, self.config_key) # API paths # 2021 release has county breakdown # 2021 release does *not* take upgrades as input @@ -268,63 +210,6 @@ def __init__(self, # Geographic information self.df_geoinfo = self.get_geoinfo() - # # API options - # self.api_options = { - # "resstock" : - # { "state" : self.get_data_resstock_by_state, # state, hometype - # "county" : self.get_data_resstock_by_county, # state, county, hometype - # "climate-ba" : self.get_data_resstock_by_climatezone, # climate, hometype - # "climate-iecc" : self.get_data_resstock_by_climatezone_iecc, # climate, hometype - # }, - # - # "comstock" : - # { "state" : self.get_data_comstock_by_state, # state, hometype - # "county" : self.get_data_comstock_by_county, # state, county, hometype - # "climate-ba" : self.get_data_comstock_by_climatezone, # climate, hometype - # "climate-iecc" : self.get_data_comstock_by_climatezone_iecc, # climate, hometype - # }, - # } - - # def get_data(self, sector, building_type, state=None, county=None, climate=None): - # sector = sector.lower() - # - # if (state and climate) or (county and climate): - # # if state and county then county level is taken - # raise "Please specify the query type (state, state/county, or climate)." - # - # # Get dataframe - # if state: - # if county: - # return self.api_options[sector]["county"](state=state, county_name=county, building_type=building_type) - # else: - # return self.api_options[sector]["state"](state=state, building_type=building_type) - # elif climate: - # if climate in self.climate_zones_ba: - # return self.api_options[sector]["climate-ba"](climate=climate, building_type=building_type) - # elif climate in self.climate_zones_iecc: - # return self.api_options[sector]["climate-iecc"](climate=climate, building_type=building_type) - # else: - # raise Exception("Invalid option. Please pass either state, county, or climate info.") - - def load_config(self, config_name=None, key=None): - """Load configuration. - - PARAMETERS - ---------- - config_name : str - Name of configuration file in sg2t.config, optional. - - key : str - Key in config corresponding to this class, required if - config_name is given. - - RETURNS - ------- - config : dict - Configuration dict, if any, otherwise None. - """ - return load_config(config_name, key) - def get_geoinfo(self): # This file is identical between ResStock and Comstock sector = "resstock"