From 91a819293523e30d373cf1939d0061d06efe61ce Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Fri, 21 Apr 2023 08:48:22 -0700 Subject: [PATCH 01/49] updates with v0 release data --- baus/datasources.py | 254 ++++------------- baus/models.py | 95 +++---- baus/postprocessing.py | 12 +- baus/preprocessing.py | 70 +---- baus/subsidies.py | 58 ++-- baus/summaries.py | 330 +++++++++------------- baus/utils.py | 18 -- baus/variables.py | 86 +++--- configs/developer/developer_settings.yaml | 106 ++++--- configs/mapping.yaml | 143 ---------- configs/paths.yaml | 13 - 11 files changed, 348 insertions(+), 837 deletions(-) delete mode 100644 configs/mapping.yaml delete mode 100644 configs/paths.yaml diff --git a/baus/datasources.py b/baus/datasources.py index 037ce44e1..3d687b282 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -8,7 +8,6 @@ from urbansim.utils import misc import orca from baus import preprocessing -from baus.utils import geom_id_to_parcel_id, parcel_id_to_geom_id from baus.utils import nearest_neighbor import yaml @@ -265,18 +264,13 @@ def baseyear_taz_controls(): @orca.table(cache=True) def base_year_summary_taz(mapping): - df = pd.read_csv(os.path.join('output', - 'baseyear_taz_summaries_2010.csv'), - dtype={'taz1454': np.int64}, - index_col="zone_id") - cmap = mapping["county_id_tm_map"] - df['COUNTY_NAME'] = df.COUNTY.map(cmap) + df = pd.read_csv(os.path.join('output', 'baseyear_taz_summaries_2010.csv'), dtype={'taz1454': np.int64}, index_col="taz_tm1") return df # non-residential rent data @orca.table(cache=True) -def costar(store, parcels): +def costar(parcels): df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), 'basis_inputs/parcels_buildings_agents/2015_08_29_costar.csv')) df["PropertyType"] = df.PropertyType.replace("General Retail", "Retail") @@ -294,26 +288,8 @@ def costar(store, parcels): @orca.table(cache=True) -def zoning_lookup(): - - file = os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/zoning/2020_11_05_zoning_lookup_hybrid_pba50.csv") - print('Version of zoning_lookup: {}'.format(file)) - - return pd.read_csv(file, dtype={'id': np.int64}, index_col='id') - - -@orca.table(cache=True) -def zoning_existing(parcels, zoning_lookup): - - file = os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/zoning/2020_11_05_zoning_parcels_hybrid_pba50.csv") - print('Version of zoning_parcels: {}'.format(file)) - - df = pd.read_csv(file, dtype={'geom_id': np.int64, 'PARCEL_ID': np.int64, 'zoning_id': np.int64}, index_col="geom_id") - df = pd.merge(df, zoning_lookup.to_frame(), left_on="zoning_id", right_index=True) - - df = geom_id_to_parcel_id(df, parcels) - - return df +def zoning_existing(zoning_lookup): + return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/zoning/boc_v0.csv") @orca.table(cache=True) @@ -335,20 +311,8 @@ def new_tpp_id(): @orca.table(cache=True) -def maz(): - maz = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/maz_geography.csv"), - dtype={'MAZ': np.int64, 'TAZ': np.int64}) - maz = maz.drop_duplicates('MAZ').set_index('MAZ') - taz1454 = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/maz22_taz1454.csv"), - dtype={'maz': np.int64, 'TAZ1454': np.int64}, index_col='maz') - maz['taz1454'] = taz1454.TAZ1454 - return maz - - -@orca.table(cache=True) -def parcel_to_maz(): - return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/2020_08_17_parcel_to_maz22.csv"), - dtype={'PARCEL_ID': np.int64, 'maz': np.int64}, index_col="PARCEL_ID") +def travel_model_zones(): + return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/travel_model_zones_v0.csv")) @orca.table(cache=True) @@ -413,7 +377,7 @@ def tm1_tm2_maz_forecast_inputs(tm1_tm2_regional_demographic_forecast): @orca.table(cache=True) -def zoning_strategy(parcels_geography, mapping): +def zoning_strategy(growth_geographies, mapping): strategy_zoning = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), 'plan_strategies/zoning_mods.csv')) @@ -433,22 +397,13 @@ def add_drop_helper(col, val): join_col = 'zoningmodcat' print('join_col of zoningmods is {}'.format(join_col)) - return pd.merge(parcels_geography.to_frame().reset_index(), strategy_zoning, on=join_col, how='left').set_index('parcel_id') + return pd.merge(growth_geographies.to_frame().reset_index(), strategy_zoning, on=join_col, how='left').set_index('geo_id') @orca.table(cache=True) -def parcels(store): - df = store['parcels'] - # add a lat/lon to synthetic parcels to avoid a Pandana error - df.loc[2054503, "x"] = -122.1697 - df.loc[2054503, "y"] = 37.4275 - df.loc[2054504, "x"] = -122.1697 - df.loc[2054504, "y"] = 37.4275 - df.loc[2054505, "x"] = -122.1697 - df.loc[2054505, "y"] = 37.4275 - df.loc[2054506, "x"] = -122.1697 - df.loc[2054506, "y"] = 37.4275 - return df +def parcels(): + df = os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/parcels_v0.csv") + return df.set_index("geo_id") @orca.table(cache=True) @@ -456,52 +411,9 @@ def parcels_zoning_calculations(parcels): return pd.DataFrame(index=parcels.index) -@orca.table() -def taz(zones): - return zones - - @orca.table(cache=True) -def parcel_rejections(): - url = "https://forecast-feedback.firebaseio.com/parcelResults.json" - return pd.read_json(url, orient="index").set_index("geomId") - - -@orca.table(cache=True) -def parcels_geography(parcels): - - file = os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/2021_02_25_parcels_geography.csv") - print('Versin of parcels_geography: {}'.format(file)) - df = pd.read_csv(file, dtype={'PARCEL_ID': np.int64, 'geom_id': np.int64, 'jurisdiction_id': np.int64},index_col="geom_id") - df = geom_id_to_parcel_id(df, parcels) - - # this will be used to map juris id to name - juris_name = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/census_id_to_name.csv"), - dtype={'census_id': np.int64}, index_col="census_id").name10 - - df["juris_name"] = df.jurisdiction_id.map(juris_name) - - df.loc[2054504, "juris_name"] = "Marin County" - df.loc[2054505, "juris_name"] = "Santa Clara County" - df.loc[2054506, "juris_name"] = "Marin County" - df.loc[572927, "juris_name"] = "Contra Costa County" - - # assert no empty juris values - assert True not in df.juris_name.isnull().value_counts() - - df["pda_id"] = df.pda_id.str.lower() - df["gg_id"] = df.gg_id.str.lower() - df["tra_id"] = df.tra_id.str.lower() - df['juris_tra'] = df.juris + '-' + df.tra_id - df["ppa_id"] = df.ppa_id.str.lower() - df['juris_ppa'] = df.juris + '-' + df.ppa_id - df["sesit_id"] = df.sesit_id.str.lower() - df['juris_sesit'] = df.juris + '-' + df.sesit_id - - df['coc_id'] = df.coc_id.str.lower() - df['juris_coc'] = df.juris + '-' + df.coc_id - - return df +def growth_geographies(): + return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/growth_geographies_v0.csv") @orca.table(cache=True) @@ -570,120 +482,51 @@ def manual_edits(): return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/edits/manual_edits.csv")) -def reprocess_dev_projects(df): - # if dev projects with the same parcel id have more than one build - # record, we change the later ones to add records - we don't want to - # constantly be redeveloping projects, but it's a common error for users - # to make in their development project configuration - df = df.sort_values(["geom_id", "year_built"]) - prev_geom_id = None - for index, rec in df.iterrows(): - if rec.geom_id == prev_geom_id: - df.loc[index, "action"] = "add" - prev_geom_id = rec.geom_id - - return df - - # shared between demolish and build tables below -def get_dev_projects_table(parcels): +def get_dev_projects_table(): df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), - "basis_inputs/parcels_buildings_agents/2021_0309_1939_development_projects.csv"), - dtype={'PARCEL_ID': np.int64, 'geom_id': np.int64}) - df = reprocess_dev_projects(df) - orca.add_injectable("devproj_len", len(df)) - - df = df.dropna(subset=['geom_id']) - - cnts = df.geom_id.isin(parcels.geom_id).value_counts() - if False in cnts.index: - print("%d MISSING GEOMIDS!" % cnts.loc[False]) - - df = df[df.geom_id.isin(parcels.geom_id)] - - geom_id = df.geom_id # save for later - df = df.set_index("geom_id") - df = geom_id_to_parcel_id(df, parcels).reset_index() # use parcel id - df["geom_id"] = geom_id.values # add it back again cause it goes away - orca.add_injectable("devproj_len_geomid", len(df)) - + "basis_inputs/parcels_buildings_agents/dev_pipeline_v0.csv")) + df = df.set_index("geo_id") return df @orca.table(cache=True) -def demolish_events(parcels): - df = get_dev_projects_table(parcels) - +def demolish_events(): + df = get_dev_projects_table() # keep demolish and build records + # build records will be used to demolish the existing building on a parcel where a pipeline project is occuring + # demolish events will be demolished return df[df.action.isin(["demolish", "build"])] @orca.table(cache=True) -def development_projects(parcels, mapping): - df = get_dev_projects_table(parcels) - - for col in [ - 'residential_sqft', 'residential_price', 'non_residential_rent']: - df[col] = 0 - df["redfin_sale_year"] = 2012 # default base year - df["redfin_sale_price"] = np.nan # null sales price - df["stories"] = df.stories.fillna(1) - df["building_sqft"] = df.building_sqft.fillna(0) - df["non_residential_sqft"] = df.non_residential_sqft.fillna(0) - df["residential_units"] = df.residential_units.fillna(0).astype("int") - df["preserved_units"] = 0.0 - df["inclusionary_units"] = 0.0 - df["subsidized_units"] = 0.0 - - df["building_type"] = df.building_type.replace("HP", "OF") - df["building_type"] = df.building_type.replace("GV", "OF") - df["building_type"] = df.building_type.replace("SC", "OF") - - building_types = mapping["building_type_map"].keys() - # only deal with building types we recorgnize - # otherwise hedonics break - # currently: 'HS', 'HT', 'HM', 'OF', 'HO', 'SC', 'IL', - # 'IW', 'IH', 'RS', 'RB', 'MR', 'MT', 'ME', 'PA', 'PA2' - df = df[df.building_type.isin(building_types)] - - # we don't predict prices for schools and hotels right now - df = df[~df.building_type.isin(["SC", "HO"])] - - # need a year built to get built - df = df.dropna(subset=["year_built"]) +def development_projects(): + df = get_dev_projects_table() + # keep add and build records + # build records will be built on a parcel + # add records will be added to a parcel where a building already exists df = df[df.action.isin(["add", "build"])] - orca.add_injectable("devproj_len_proc", len(df)) - - print("Describe of development projects") # this makes sure dev projects has all the same columns as buildings - # which is the point of this method + print("Describe of development projects") print(df[orca.get_table('buildings').local_columns].describe()) return df -def print_error_if_not_available(store, table): - if table not in store: - raise Exception( - "%s not found in store - you need to preprocess" % table + - " the data with:\n python baus.py --mode preprocessing -c") - return store[table] - - @orca.table(cache=True) -def jobs(store): - return print_error_if_not_available(store, 'jobs_preproc') +def jobs(): + return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/jobs.csv") @orca.table(cache=True) -def households(store): - return print_error_if_not_available(store, 'households_preproc') +def households(): + return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/households.csv") @orca.table(cache=True) -def buildings(store): - return print_error_if_not_available(store, 'buildings_preproc') +def buildings(): + return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/buildings.csv") @orca.table(cache=True) @@ -777,9 +620,6 @@ def telecommute_sqft_per_job_adjusters(): def taz_geography(superdistricts_geography, mapping): tg = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/taz_geography.csv"), dtype={'zone': np.int64, 'superdistrcit': np.int64, 'county': np.int64}, index_col="zone") - cmap = mapping["county_id_tm_map"] - tg['county_name'] = tg.county.map(cmap) - # we want "subregion" geography on the taz_geography table # we have to go get it from the superdistricts_geography table and join # using the superdistrcit id @@ -789,13 +629,6 @@ def taz_geography(superdistricts_geography, mapping): return tg -# these are shapes - "zones" in the bay area -@orca.table(cache=True) -def zones(store): - # sort index so it prints out nicely when we want it to - return store['zones'].sort_index() - - # SLR progression by year @orca.table(cache=True) def slr_progression(): @@ -866,11 +699,24 @@ def accessory_units(): return df +@orca.table(cache=True) +def nodev_sites(): + df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/nodev_sites_v0.csv"), index_col="geo_id") + return df + + +@orca.table(cache=True) +def institutions(): + df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/institutions.csv"), index_col="geo_id") + return df + + + + # this specifies the relationships between tables orca.broadcast('buildings', 'residential_units', cast_index=True, onto_on='building_id') orca.broadcast('residential_units', 'households', cast_index=True, onto_on='unit_id') -orca.broadcast('parcels_geography', 'buildings', cast_index=True, onto_on='parcel_id') -orca.broadcast('parcels', 'buildings', cast_index=True, onto_on='parcel_id') -# not defined in urbansim_Defaults -orca.broadcast('tmnodes', 'buildings', cast_index=True, onto_on='tmnode_id') -orca.broadcast('taz_geography', 'parcels', cast_index=True, onto_on='zone_id') \ No newline at end of file +orca.broadcast('growth_geographies', 'buildings', cast_index=True, onto_on='geo_id') +orca.broadcast('parcels', 'buildings', cast_index=True, onto_on='geo_id') +# not defined in urbansim_defaults +orca.broadcast('tmnodes', 'buildings', cast_index=True, onto_on='tmnode_id') \ No newline at end of file diff --git a/baus/models.py b/baus/models.py index b5dbb3f77..35c9d397d 100644 --- a/baus/models.py +++ b/baus/models.py @@ -14,9 +14,7 @@ from urbansim_defaults import models, utils from baus import datasources, subsidies, summaries, variables -from baus.utils import \ - add_buildings, geom_id_to_parcel_id, groupby_random_choice, \ - parcel_id_to_geom_id, round_series_match_target +from baus.utils import add_buildings, groupby_random_choice, round_series_match_target @orca.step() @@ -45,16 +43,6 @@ def households_transition(households, household_controls, year, transition_reloc return ret -# this is a list of parcel_ids which are to be treated as static -@orca.injectable() -def static_parcels(developer_settings, parcels): - # list of geom_ids to not relocate - static_parcels = developer_settings["static_parcels"] - # geom_ids -> parcel_ids - return geom_id_to_parcel_id( - pd.DataFrame(index=static_parcels), parcels).index.values - - def _proportional_jobs_model( target_ratio, # ratio of jobs of this sector to households sector, # empsix sector @@ -127,11 +115,11 @@ def _proportional_jobs_model( @orca.step() -def accessory_units_strategy(run_setup, year, buildings, parcels, accessory_units): +def accessory_units_strategy(year, buildings, parcels, accessory_units): add_units = accessory_units[str(year)] - buildings_juris = misc.reindex(parcels.juris, buildings.parcel_id) + buildings_juris = misc.reindex(parcels.jurisdiction, buildings.geo_id) res_buildings = buildings_juris[buildings.general_type == "Residential"] add_buildings = groupby_random_choice(res_buildings, add_units) @@ -228,7 +216,7 @@ def jobs_relocation(jobs, employment_relocation_rates, run_setup, employment_rel static_parcels, buildings): # get buildings that are on those parcels - static_buildings = buildings.index[buildings.parcel_id.isin(static_parcels)] + static_buildings = buildings.index[buildings.geo_id.isin(static_parcels)] rates = employment_relocation_rates.local # update the relocation rates with the adjusters if adjusters are being used @@ -258,7 +246,7 @@ def jobs_relocation(jobs, employment_relocation_rates, run_setup, employment_rel def household_relocation(households, household_relocation_rates, run_setup, static_parcels, buildings): # get buildings that are on those parcels - static_buildings = buildings.index[buildings.parcel_id.isin(static_parcels)] + static_buildings = buildings.index[buildings.geo_id.isin(static_parcels)] rates = household_relocation_rates.local # update the relocation rates with the renter protections strategy if applicable @@ -283,19 +271,13 @@ def household_relocation(households, household_relocation_rates, run_setup, stat households.update_col_from_series("building_id", pd.Series(-1, index=index), cast=True) -# this deviates from the step in urbansim_defaults only in how it deals with -# demolished buildings - this version only demolishes when there is a row to -# demolish in the csv file - this also allows building multiple buildings and -# just adding capacity on an existing parcel, by adding one building at a time +# this deviates from the step in urbansim_defaults when there are multiple projects on a parcel: +# instead of redeveloping the parcel each time, it adds each building to the parcel @orca.step() def scheduled_development_events(buildings, development_projects, demolish_events, summary, year, parcels, mapping, years_per_iter, - parcels_geography, building_sqft_per_job, static_parcels, base_year, run_setup): + growth_geographies, building_sqft_per_job, static_parcels, base_year, run_setup): # first demolish - # 6/3/20: current approach is to grab projects from the simulation year - # and previous four years, however the base year is treated differently, - # eg 2015 pulls 2015-2010 - # this should be improved in the future so that the base year - # also runs SDEM, eg 2015 pulls 2015-2014, while 2010 pulls 2010 projects + # grab projects from the simulation year and previous four years, except for 2015 which pulls 2015-2010 projects if year == (base_year + years_per_iter): demolish = demolish_events.to_frame().query("%d <= year_built <= %d" % (year - years_per_iter, year)) else: @@ -303,20 +285,16 @@ def scheduled_development_events(buildings, development_projects, demolish_event print("Demolishing/building %d buildings" % len(demolish)) l1 = len(buildings) - buildings = utils._remove_developed_buildings(buildings.to_frame(buildings.local_columns), demolish, + buildings = utils._remove_developed_buildings(buildings.to_frame(buildings.local_columns), demolish, unplace_agents=["households", "jobs"]) - orca.add_injectable('static_parcels', np.append(static_parcels, demolish.loc[demolish.action == 'build', 'parcel_id'])) + + orca.add_injectable('static_parcels', np.append(static_parcels, demolish.loc[demolish.action == 'build', 'geo_id'])) orca.add_table("buildings", buildings) buildings = orca.get_table("buildings") - print("Demolished %d buildings" % (l1 - len(buildings))) - print(" (this number is smaller when parcel has no existing buildings)") + print("Demolished %d buildings on parcels with pipeline projects being built" % (l1 - len(buildings))) # then build - # 6/3/20: current approach is to grab projects from the simulation year - # and previous four years, however the base year is treated differently, - # eg 2015 pulls 2015-2010 - # this should be improved in the future so that the base year - # also runs SDEM, eg 2015 pulls 2015-2014, while 2010 pulls 2010 projects + # grab projects from the simulation year and previous four years, except for 2015 which pulls 2015-2010 projects if year == (base_year + years_per_iter): dps = development_projects.to_frame().query("%d <= year_built <= %d" % (year - years_per_iter, year)) else: @@ -327,14 +305,14 @@ def scheduled_development_events(buildings, development_projects, demolish_event new_buildings = utils.scheduled_development_events(buildings, dps, remove_developed_buildings=False, unplace_agents=['households', 'jobs']) + new_buildings["form"] = new_buildings.building_type.map(mapping['building_type_map']).str.lower() new_buildings["job_spaces"] = new_buildings.non_residential_sqft / new_buildings.building_type.fillna("OF").map(building_sqft_per_job) new_buildings["job_spaces"] = new_buildings.job_spaces.fillna(0).astype('int') - new_buildings["geom_id"] = parcel_id_to_geom_id(new_buildings.parcel_id) new_buildings["SDEM"] = True new_buildings["subsidized"] = False - new_buildings["zone_id"] = misc.reindex(parcels.zone_id, new_buildings.parcel_id) + new_buildings["zone_id"] = misc.reindex(parcels.zone_id, new_buildings.geo_id) if run_setup['run_vmt_fee_res_for_res_strategy'] or ["run_sb743_strategy"]: vmt_fee_categories = orca.get_table("vmt_fee_categories") new_buildings["vmt_res_cat"] = misc.reindex(vmt_fee_categories.res_cat, new_buildings.zone_id) @@ -343,15 +321,15 @@ def scheduled_development_events(buildings, development_projects, demolish_event new_buildings["vmt_nonres_cat"] = misc.reindex(vmt_fee_categories.nonres_cat, new_buildings.zone_id) del new_buildings["zone_id"] - new_buildings["pda_id"] = parcels_geography.pda_id.loc[new_buildings.parcel_id].values - new_buildings["tra_id"] = parcels_geography.tra_id.loc[new_buildings.parcel_id].values - new_buildings["ppa_id"] = parcels_geography.ppa_id.loc[new_buildings.parcel_id].values - new_buildings["sesit_id"] = parcels_geography.sesit_id.loc[new_buildings.parcel_id].values - new_buildings["coc_id"] = parcels_geography.coc_id.loc[new_buildings.parcel_id].values - new_buildings["juris_tra"] = parcels_geography.juris_tra.loc[new_buildings.parcel_id].values - new_buildings["juris_ppa"] = parcels_geography.juris_ppa.loc[new_buildings.parcel_id].values - new_buildings["juris_sesit"] = parcels_geography.juris_sesit.loc[new_buildings.parcel_id].values - new_buildings["juris_coc"] = parcels_geography.juris_coc.loc[new_buildings.parcel_id].values + new_buildings["pda_id"] = growth_geographies.pda_id.loc[new_buildings.geo_id].values + new_buildings["tra_id"] = growth_geographies.tra_id.loc[new_buildings.geo_id].values + new_buildings["ppa_id"] = growth_geographies.ppa_id.loc[new_buildings.geo_id].values + new_buildings["sesit_id"] = growth_geographies.sesit_id.loc[new_buildings.geo_id].values + new_buildings["coc_id"] = growth_geographies.coc_id.loc[new_buildings.geo_id].values + new_buildings["juris_tra"] = growth_geographies.juris_tra.loc[new_buildings.geo_id].values + new_buildings["juris_ppa"] = growth_geographies.juris_ppa.loc[new_buildings.geo_id].values + new_buildings["juris_sesit"] = growth_geographies.juris_sesit.loc[new_buildings.geo_id].values + new_buildings["juris_coc"] = growth_geographies.juris_coc.loc[new_buildings.geo_id].values summary.add_parcel_output(new_buildings) @@ -423,7 +401,7 @@ def add_extra_columns_func(df): if "parcel_size" not in df: df["parcel_size"] = \ - orca.get_table("parcels").parcel_size.loc[df.parcel_id] + orca.get_table("parcels").parcel_size.loc[df.geo_id] if orca.is_injectable("year") and "year_built" not in df: df["year_built"] = orca.get_injectable("year") @@ -647,8 +625,7 @@ def retail_developer(jobs, buildings, parcels, nodes, feasibility, target -= d.non_residential_sqft # add redeveloped sqft to target - filt = "general_type == 'Retail' and parcel_id == %d" % \ - d["parcel_id"] + filt = "general_type == 'Retail' and geo_id == %d" % d["geo_id"] target += bldgs.query(filt).non_residential_sqft.sum() devs.append(d) @@ -822,7 +799,7 @@ def developer_reprocess(buildings, year, years_per_iter, jobs, print("Attempting to add ground floor retail to %d devs" % len(new_buildings)) retail = parcel_is_allowed_func("retail") - new_buildings = new_buildings[retail.loc[new_buildings.parcel_id].values] + new_buildings = new_buildings[retail.loc[new_buildings.geo_id].values] print("Disallowing dev on these parcels:") print(" %d devs left after retail disallowed" % len(new_buildings)) @@ -844,7 +821,7 @@ def developer_reprocess(buildings, year, years_per_iter, jobs, # retail in areas that are underserved right now - this is defined as # the location where the retail ratio (ratio of income to retail sqft) # is greater than the median - ratio = parcels.retail_ratio.loc[new_buildings.parcel_id] + ratio = parcels.retail_ratio.loc[new_buildings.geo_id] new_buildings = new_buildings[ratio.values > ratio.median()] print("Adding %d sqft of ground floor retail in %d locations" % @@ -871,7 +848,7 @@ def developer_reprocess(buildings, year, years_per_iter, jobs, sqft_by_gtype / 1000000.0) -def proportional_job_allocation(parcel_id): +def proportional_job_allocation(): # this method takes a parcel and increases the number of jobs on the # parcel in proportion to the ratio of sectors that existed in the base yr # this is because elcms can't get the distribution right in some cases, eg @@ -879,9 +856,8 @@ def proportional_job_allocation(parcel_id): # institutions and not subject to the market # get buildings on this parcel - buildings = orca.get_table("buildings").to_frame( - ["parcel_id", "job_spaces", "zone_id", "year_built"]).\ - query("parcel_id == %d" % parcel_id) + buildings = orca.get_table("buildings").to_frame(["geo_id", "job_spaces", "zone_id", "year_built"]).\ + query("geo_id == %d" % geo_id) # get jobs in those buildings all_jobs = orca.get_table("jobs").local @@ -906,8 +882,7 @@ def proportional_job_allocation(parcel_id): # make sure index is incrementing new_jobs.index = new_jobs.index + 1 + np.max(all_jobs.index.values) - print("Adding {} new jobs to parcel {} with proportional model".format( - num_new_jobs, parcel_id)) + print("Adding {} new jobs to parcel {} with proportional model".format(num_new_jobs, geo_id)) print(new_jobs.head()) all_jobs = all_jobs.append(new_jobs) orca.add_table("jobs", all_jobs) @@ -915,8 +890,8 @@ def proportional_job_allocation(parcel_id): @orca.step() def static_parcel_proportional_job_allocation(static_parcels): - for parcel_id in static_parcels: - proportional_job_allocation(parcel_id) + for geo_id in static_parcels: + proportional_job_allocation(geo_id) def make_network(name, weight_col, max_distance): diff --git a/baus/postprocessing.py b/baus/postprocessing.py index 647c5a4f6..d359f479d 100644 --- a/baus/postprocessing.py +++ b/baus/postprocessing.py @@ -532,13 +532,13 @@ def GEO_SUMMARY_LOADER(run_num, geo, parcel_baseyear, parcel_endyear): zoningtag = 'zoningmodcat' - parcel_baseyear = parcel_baseyear[['parcel_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4', + parcel_baseyear = parcel_baseyear[['geo_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4', 'residential_units','deed_restricted_units', 'inclusionary_units', 'subsidized_units','preserved_units']] - parcel_endyear = parcel_endyear[['parcel_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4', + parcel_endyear = parcel_endyear[['geo_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4', 'residential_units','deed_restricted_units','inclusionary_units', 'subsidized_units','preserved_units','juris',zoningtag]] - parcel_data = parcel_baseyear.merge(parcel_endyear, on = 'parcel_id', how = 'left').fillna(0) + parcel_data = parcel_baseyear.merge(parcel_endyear, on = 'geo_id', how = 'left').fillna(0) if 0 in parcel_data.juris.values: dropindex = parcel_data[parcel_data['juris'] == 0].index parcel_data.drop(dropindex,inplace = True) @@ -563,13 +563,13 @@ def TWO_GEO_SUMMARY_LOADER(run_num, geo1, geo2, parcel_baseyear, parcel_endyear) zoningtag = 'zoningmodcat' - parcel_baseyear = parcel_baseyear[['parcel_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4', + parcel_baseyear = parcel_baseyear[['geo_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4', 'residential_units','deed_restricted_units', 'inclusionary_units', 'subsidized_units', 'preserved_units']] - parcel_endyear = parcel_endyear[['parcel_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4', + parcel_endyear = parcel_endyear[['geo_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4', 'residential_units','deed_restricted_units','inclusionary_units', 'subsidized_units','preserved_units','juris',zoningtag]] - parcel_data = parcel_baseyear.merge(parcel_endyear, on = 'parcel_id', how = 'left').fillna(0) + parcel_data = parcel_baseyear.merge(parcel_endyear, on = 'geo_id', how = 'left').fillna(0) if 0 in parcel_data.juris.values: dropindex = parcel_data[parcel_data['juris'] == 0].index parcel_data.drop(dropindex,inplace = True) diff --git a/baus/preprocessing.py b/baus/preprocessing.py index 111dd438d..77d4ee98e 100644 --- a/baus/preprocessing.py +++ b/baus/preprocessing.py @@ -279,84 +279,16 @@ def correct_baseyear_vacancies(buildings, parcels, jobs, store): @orca.step() def preproc_buildings(store, parcels, manual_edits): - # start with buildings from urbansim_defaults - df = store['buildings'] - - # add source of buildings data (vs pipeline, developer model) - df['source'] = 'h5_inputs' - - # this is code from urbansim_defaults - df["residential_units"] = pd.concat( - [df.residential_units, - store.households_preproc.building_id.value_counts()], - axis=1).max(axis=1) - - df["preserved_units"] = 0.0 - df["inclusionary_units"] = 0.0 - df["subsidized_units"] = 0.0 - - # XXX need to make sure jobs don't exceed capacity - # drop columns we don't needed df = df.drop(['development_type_id', 'improvement_value', 'sqft_per_unit', 'nonres_rent_per_sqft', 'res_price_per_sqft', - 'redfin_home_type', 'costar_property_type', + 'redfin_home_type', 'costar_property_type' 'costar_rent'], axis=1) - # apply manual edits - edits = manual_edits.local - edits = edits[edits.table == 'buildings'] - for index, row, col, val in \ - edits[["id", "attribute", "new_value"]].itertuples(): - df.set_value(row, col, val) - - df["residential_units"] = df.residential_units.fillna(0) - - # for some reason nonres can be more than total sqft - df["building_sqft"] = pd.DataFrame({ - "one": df.building_sqft, - "two": df.residential_sqft + df.non_residential_sqft}).max(axis=1) - - df["building_type"] = df.building_type_id.map({ - 0: "O", - 1: "HS", - 2: "HT", - 3: "HM", - 4: "OF", - 5: "HO", - 6: "SC", - 7: "IL", - 8: "IW", - 9: "IH", - 10: "RS", - 11: "RB", - 12: "MR", - 13: "MT", - 14: "ME", - 15: "PA", - 16: "PA2" - }) - - del df["building_type_id"] # we won't use building type ids anymore - - # keeps parking lots from getting redeveloped - df["building_sqft"][df.building_type.isin(["PA", "PA2"])] = 0 - df["non_residential_sqft"][df.building_type.isin(["PA", "PA2"])] = 0 - - # don't know what an other building type id, set to office - df["building_type"] = df.building_type.replace("O", "OF") - # set default redfin sale year to 2012 df["redfin_sale_year"] = df.redfin_sale_year.fillna(2012) - df["residential_price"] = 0.0 - df["non_residential_rent"] = 0.0 - - df = assign_deed_restricted_units(df, parcels) - - store['buildings_preproc'] = df - # this runs after the others because it needs access to orca-assigned # columns - in particular is needs access to the non-residential sqft and # job spaces columns diff --git a/baus/subsidies.py b/baus/subsidies.py index 201893687..ea28c3214 100644 --- a/baus/subsidies.py +++ b/baus/subsidies.py @@ -55,17 +55,17 @@ def coffer(account_strategies): @orca.step() -def preserve_affordable(year, base_year, preservation, residential_units, taz_geography, buildings, parcels_geography): +def preserve_affordable(preservation, residential_units, travel_model_zones, buildings, growth_geographies): # join several geography columns to units table so that we can apply units res_units = residential_units.to_frame() bldgs = buildings.to_frame() - parcels_geog = parcels_geography.to_frame() - taz_geog = taz_geography.to_frame() + growth_geogs = growth_geographies.to_frame() + tm_geogs = travel_model_zones.to_frame() - res_units = res_units.merge(bldgs[['parcel_id']], left_on='building_id', right_index=True, how='left').\ - merge(parcels_geog[['gg_id', 'sesit_id', 'tra_id', 'juris']], left_on='parcel_id', right_index=True, how='left').\ - merge(taz_geog, left_on='zone_id', right_index=True, how='left') + res_units = res_units.merge(bldgs[['geo_id']], left_on='building_id', right_index=True, how='left').\ + merge(growth_geogs[['gg_id', 'sesit_id', 'tra_id', 'juris']], left_on='geo_id', right_index=True, how='left').\ + merge(tm_geogs, left_on='taz_tm1', right_index=True, how='left') s = preservation["housing_preservation"]["settings"] @@ -356,13 +356,12 @@ def policy_modifications_of_profit(feasibility, parcels): @orca.step() -def calculate_vmt_fees(run_setup, account_strategies, year, buildings, coffer, summary, years_per_iter): +def calculate_vmt_fees(run_setup, account_strategies, year, coffer, summary, years_per_iter): vmt_settings = account_strategies["acct_settings"]["vmt_settings"] # this is the frame that knows which devs are subsidized df = summary.parcel_output - # grabs projects in the simulation period that are not subsidized df = df.query("%d <= year_built < %d and subsidized != True" % (year, year + years_per_iter)) @@ -372,18 +371,13 @@ def calculate_vmt_fees(run_setup, account_strategies, year, buildings, coffer, s print("%d projects pass the vmt filter" % len(df)) total_fees = 0 - if run_setup["run_vmt_fee_res_for_res_strategy"]: - - # maps the vmt fee amounts designated in the policy settings to - # the projects based on their categorized vmt levels + # maps the vmt fee amounts designated in the policy settings to the projects based on their categorized vmt levels df["res_for_res_fees"] = df.vmt_res_cat.map(vmt_settings["res_for_res_fee_amounts"]) total_fees += (df.res_for_res_fees * df.residential_units).sum() print("Applying vmt fees to %d units" % df.residential_units.sum()) - if run_setup["run_vmt_fee_com_for_res_strategy"]: - - df["com_for_res_fees"] = df.vmt_nonres_cat.map( vmt_settings["com_for_res_fee_amounts"]) + df["com_for_res_fees"] = df.vmt_nonres_cat.map(vmt_settings["com_for_res_fee_amounts"]) total_fees += (df.com_for_res_fees * df.non_residential_sqft).sum() print("Applying vmt fees to %d commerical sqft" % df.non_residential_sqft.sum()) @@ -397,21 +391,11 @@ def calculate_vmt_fees(run_setup, account_strategies, year, buildings, coffer, s total_fees = 0 if run_setup["run_vmt_fee_com_for_com_strategy"]: - - # assign fees by county - # assign county to parcels - county_lookup = orca.get_table("parcels_subzone").to_frame() - county_lookup = county_lookup[["county"]].rename(columns={'county': 'county3'}) - county_lookup.reset_index(inplace=True) - county_lookup = county_lookup.rename(columns={'PARCEL_ID': 'PARCELID'}) - df = df.merge(county_lookup, left_on='parcel_id', right_on='PARCELID', how='left') - + # this step is only needed if summary_output doesn't get a county column + df = df.merge(orca.get_table("parcels").to_frame(), on='geo_id', columns=['county']) # assign fee to parcels based on county - counties3 = ['ala', 'cnc', 'mar', 'nap', 'scl', 'sfr', 'smt', 'sol', 'son'] - counties = ['alameda', 'contra_costa', 'marin', 'napa', 'santa_clara', 'san_francisco', 'san_mateo', 'solano', 'sonoma'] - for county3, county in zip(counties3, counties): - df.loc[df["county3"] == county3, "com_for_com_fees"] = df.vmt_nonres_cat.map(vmt_settings["com_for_com_fee_amounts"][county]) - + for county in df.county.unique(): + df.loc[df["county"] == county, "com_for_com_fees"] = df.vmt_nonres_cat.map(vmt_settings["com_for_com_fee_amounts"][county]) total_fees += (df.com_for_com_fees * df.non_residential_sqft).sum() print("Applying vmt fees to %d commerical sqft" % df.non_residential_sqft.sum()) @@ -437,17 +421,11 @@ def calculate_jobs_housing_fees(account_strategies, year, coffer, summary, years for key, acct in jobs_housing_settings.items(): - # assign jurisdiction to parcels - juris_lookup = orca.get_table("parcels_geography").to_frame() - juris_lookup = juris_lookup[['PARCEL_ID', 'juris_name']].rename(columns={'PARCEL_ID': 'PARCELID', 'juris_name': 'jurisname'}) - - county_lookup = orca.get_table("parcels_subzone").to_frame().reset_index() - county_lookup = county_lookup[['PARCEL_ID', 'county']].rename(columns={'PARCEL_ID': 'PARCELID', 'county': 'county3'}) - - df = df.merge(juris_lookup, left_on='parcel_id', right_on='PARCELID', how='left').merge(county_lookup, on='PARCELID', how='left') + # this step is only needed if summary_output doesn't get a juris county column + df = df.merge(orca.get_table("parcels").to_frame(), on='geo_id', columns=['jurisdiction', 'county']) # calculate jobs-housing fees for each county's acct - df_sub = df.loc[df.county3 == acct["county_name"]] + df_sub = df.loc[df.county == acct["county_name"]] print("Applying jobs-housing fees to %d commerical sqft" % df_sub.non_residential_sqft.sum()) @@ -486,7 +464,7 @@ def subsidized_office_developer(feasibility, coffer, formula, year, add_extra_co # in order off the top feasibility = feasibility.sort_values(['max_profit_per_sqft']) - # make parcel_id available + # make geo_id available feasibility = feasibility.reset_index() print("%.0f subsidy with %d developments to choose from" % (total_subsidy, len(feasibility))) @@ -520,7 +498,7 @@ def subsidized_office_developer(feasibility, coffer, formula, year, add_extra_co "non_residential_sqft": d["non_residential_sqft"], "juris": d["juris"], "tra_id": d["tra_id"], - "parcel_id": d["parcel_id"], + "geo_id": d["geo_id"], "index": dev_id } diff --git a/baus/summaries.py b/baus/summaries.py index c39f8e79f..a98b30cd6 100644 --- a/baus/summaries.py +++ b/baus/summaries.py @@ -216,12 +216,6 @@ def norm_and_round(s): write("Current share of units which are greenfield development:\n%s" % norm_and_round(df.residential_units.groupby(greenfield).sum())) - cmap = mapping["county_id_tm_map"] - jobs_by_county = jobs.zone_id.map(taz_geography.county).map(cmap).value_counts() - households_by_county = households.zone_id.map(taz_geography.county).map(cmap).value_counts() - jobs_by_housing = jobs_by_county / households_by_county.replace(0, 1) - write("Jobs/housing balance:\n" + str(jobs_by_housing)) - f.close() @@ -488,18 +482,6 @@ def geographic_summary(parcels, households, jobs, buildings, run_setup, run_numb format(run_number, year)) df.to_csv(uf_summary_csv) - # Summarize Logsums - if year in [2010, 2015, 2020, 2025, 2030, 2035, 2040, 2045, 2050]: - zones = orca.get_table('zones') - df = zones.to_frame(['zone_cml', 'zone_cnml', 'zone_combo_logsum']) - df.to_csv(os.path.join(orca.get_injectable("outputs_dir"), - "run%d_taz_logsums_%d.csv" - % (run_number, year))) - parcels = orca.get_table('parcels') - df = parcels.to_frame(['cml', 'cnml', 'combo_logsum']) - df.to_csv(os.path.join(orca.get_injectable("outputs_dir"), - "run%d_parcel_logsums_%d.csv" - % (run_number, year))) @orca.step() def building_summary(parcels, run_number, year, @@ -550,31 +532,31 @@ def parcel_summary(parcels, buildings, households, jobs, run_number, year, parce join_col = 'zoningmodcat' if join_col in parcels_geography.to_frame().columns: - parcel_gg = parcels_geography.to_frame(["parcel_id", join_col, "juris"]) - df = df.merge(parcel_gg, on='parcel_id', how='left') + parcel_gg = parcels_geography.to_frame(["geo_id", join_col, "juris"]) + df = df.merge(parcel_gg, on='geo_id', how='left') - households_df = orca.merge_tables('households', [buildings, households], columns=['parcel_id', 'base_income_quartile']) + households_df = orca.merge_tables('households', [buildings, households], columns=['geo_id', 'base_income_quartile']) # add households by quartile on each parcel for i in range(1, 5): - df['hhq%d' % i] = households_df[households_df.base_income_quartile == i].parcel_id.value_counts() - df["tothh"] = households_df.groupby('parcel_id').size() + df['hhq%d' % i] = households_df[households_df.base_income_quartile == i].geo_id.value_counts() + df["tothh"] = households_df.groupby('geo_id').size() building_df = orca.merge_tables('buildings', [parcels, buildings], - columns=['parcel_id', 'residential_units', 'deed_restricted_units', + columns=['geo_id', 'residential_units', 'deed_restricted_units', 'preserved_units', 'inclusionary_units', 'subsidized_units']) - df['residential_units'] = building_df.groupby('parcel_id')['residential_units'].sum() - df['deed_restricted_units'] = building_df.groupby('parcel_id')['deed_restricted_units'].sum() - df['preserved_units'] = building_df.groupby('parcel_id')['preserved_units'].sum() - df['inclusionary_units'] = building_df.groupby('parcel_id')['inclusionary_units'].sum() - df['subsidized_units'] = building_df.groupby('parcel_id')['subsidized_units'].sum() + df['residential_units'] = building_df.groupby('geo_id')['residential_units'].sum() + df['deed_restricted_units'] = building_df.groupby('geo_id')['deed_restricted_units'].sum() + df['preserved_units'] = building_df.groupby('geo_id')['preserved_units'].sum() + df['inclusionary_units'] = building_df.groupby('geo_id')['inclusionary_units'].sum() + df['subsidized_units'] = building_df.groupby('geo_id')['subsidized_units'].sum() - jobs_df = orca.merge_tables('jobs', [buildings, jobs], columns=['parcel_id', 'empsix']) + jobs_df = orca.merge_tables('jobs', [buildings, jobs], columns=['geo_id', 'empsix']) # add jobs by empsix category on each parcel for cat in jobs_df.empsix.unique(): - df[cat] = jobs_df[jobs_df.empsix == cat].parcel_id.value_counts() - df["totemp"] = jobs_df.groupby('parcel_id').size() + df[cat] = jobs_df[jobs_df.empsix == cat].geo_id.value_counts() + df["totemp"] = jobs_df.groupby('geo_id').size() df.to_csv(os.path.join(orca.get_injectable("outputs_dir"), "run%d_parcel_data_%d.csv" % (run_number, year))) @@ -585,7 +567,7 @@ def parcel_summary(parcels, buildings, households, jobs, run_number, year, parce # do diff with initial year df2 = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "run%d_parcel_data_%d.csv" % - (run_number, initial_year)), index_col="parcel_id") + (run_number, initial_year)), index_col="geo_id") for col in df.columns: @@ -637,34 +619,25 @@ def parcel_summary(parcels, buildings, households, jobs, run_number, year, parce format(run_number, geo_1 + geo_2))) @orca.step() -def travel_model_output(parcels, households, jobs, buildings, zones, maz, year, summary, final_year, - tm1_taz1_forecast_inputs, run_number, base_year_summary_taz, taz_geography, +def travel_model_output(parcels, households, jobs, buildings, year, summary, final_year, + tm1_taz1_forecast_inputs, run_number, base_year_summary_taz, travel_model_zones, tm1_tm2_maz_forecast_inputs, tm1_tm2_regional_demographic_forecast, tm1_tm2_regional_controls): - if year not in [2010, 2015, 2020, 2025, 2030, 2035, 2040, 2045, 2050]: - # only summarize for years which are multiples of 5 - return - parcels = parcels.to_frame() - parcels["zone_id_x"] = parcels.zone_id - orca.add_table('parcels', parcels) - parcels = orca.get_table("parcels") + parcels = parcels.to_frame().merge(travel_model_zones, on='geo_id', columns=["taz_tm1", "maz_tm2"]) - households_df = orca.merge_tables('households', + households_df = orca.merge_tables('households', [parcels, buildings, households], - columns=['zone_id', 'zone_id_x', + columns=['taz_tm1', 'base_income_quartile', 'income', 'persons', 'maz_id']) - households_df["zone_id"] = households_df.zone_id_x - - taz_df = pd.DataFrame(index=zones.index) + taz_tm1_df = pd.DataFrame(index=travel_model_zones.taz_tm1.unique().index) - taz_df["sd"] = taz_geography.superdistrict - taz_df["zone"] = zones.index - taz_df["county"] = taz_geography.county - taz_df["county_name"] = taz_geography.county_name + taz_tm1_df["zone"] = zones.index + taz_tm1_df["sd"] = taz_geography.superdistrict + taz_tm1_df["county"] = taz_geography.county jobs_df = orca.merge_tables( 'jobs', @@ -672,72 +645,55 @@ def travel_model_output(parcels, households, jobs, buildings, zones, maz, year, columns=['zone_id', 'zone_id_x', 'empsix'] ) - # totally baffled by this - after joining the three tables we have three - # zone_ids, one from the parcel table, one from buildings, and one from - # jobs and the one called zone_id has null values while there others do not - # going to change this while I think about this - turns out this has to do - # with major caching issue which has been reported upstream - - # the null values are present in the jobs table, however when you merge the - # tables, the zone_id columns from the other tables don't have null values - # however on lumodel, these duplicate columns don't get created in the - # merge so a copy of zone_id (zone_id_x) is added to parcels to ensure - # it doesn't get dropped + jobs_df = orca.merge_tables('jobs', + [parcels, buildings, jobs], + columns=['taz_tm1', 'empsix']) - # the same has now been repeated for households (as described with lumodel) + # check if needed- only the zone_id_x works, while zone_id contains null # no duplicate zone_ids emerged to use, so one was created from parcels - # a taz column existed, but was not consistently the same as zone_id - - jobs_df["zone_id"] = jobs_df.zone_id_x def getsectorcounts(sector): - return jobs_df.query("empsix == '%s'" % sector).\ - groupby('zone_id').size() + return jobs_df.query("empsix == '%s'" % sector).groupby('taz_tm1').size() - taz_df["agrempn"] = getsectorcounts("AGREMPN") - taz_df["fpsempn"] = getsectorcounts("FPSEMPN") - taz_df["herempn"] = getsectorcounts("HEREMPN") - taz_df["retempn"] = getsectorcounts("RETEMPN") - taz_df["mwtempn"] = getsectorcounts("MWTEMPN") - taz_df["othempn"] = getsectorcounts("OTHEMPN") - taz_df["totemp"] = jobs_df.groupby('zone_id').size() + taz_tm1_df["agrempn"] = getsectorcounts("AGREMPN") + taz_tm1_df["fpsempn"] = getsectorcounts("FPSEMPN") + taz_tm1_df["herempn"] = getsectorcounts("HEREMPN") + taz_tm1_df["retempn"] = getsectorcounts("RETEMPN") + taz_tm1_df["mwtempn"] = getsectorcounts("MWTEMPN") + taz_tm1_df["othempn"] = getsectorcounts("OTHEMPN") + taz_tm1_df["totemp"] = jobs_df.groupby('taz_tm1').size() def gethhcounts(filter): - return households_df.query(filter).groupby('zone_id').size() + return households_df.query(filter).groupby('taz_tm1').size() - taz_df["hhincq1"] = gethhcounts("base_income_quartile == 1") - taz_df["hhincq2"] = gethhcounts("base_income_quartile == 2") - taz_df["hhincq3"] = gethhcounts("base_income_quartile == 3") - taz_df["hhincq4"] = gethhcounts("base_income_quartile == 4") - taz_df["hhpop"] = households_df.groupby('zone_id').persons.sum() - taz_df["tothh"] = households_df.groupby('zone_id').size() + taz_tm1_df["hhincq1"] = gethhcounts("base_income_quartile == 1") + taz_tm1_df["hhincq2"] = gethhcounts("base_income_quartile == 2") + taz_tm1_df["hhincq3"] = gethhcounts("base_income_quartile == 3") + taz_tm1_df["hhincq4"] = gethhcounts("base_income_quartile == 4") + taz_tm1_df["hhpop"] = households_df.groupby('taz_tm1').persons.sum() + taz_tm1_df["tothh"] = households_df.groupby('taz_tm1').size() zone_forecast_inputs = tm1_taz1_forecast_inputs.to_frame() zone_forecast_inputs.index = zone_forecast_inputs.zone_id - taz_df["shpop62p"] = zone_forecast_inputs.sh_62plus - taz_df["gqpop"] = zone_forecast_inputs["gqpop" + str(year)[-2:]].fillna(0) + taz_tm1_df["shpop62p"] = zone_forecast_inputs.sh_62plus + taz_tm1_df["gqpop"] = zone_forecast_inputs["gqpop" + str(year)[-2:]].fillna(0) - taz_df["totacre"] = zone_forecast_inputs.totacre_abag + taz_tm1_df["totacre"] = zone_forecast_inputs.totacre_abag # total population = group quarters plus households population - taz_df["totpop"] = (taz_df.hhpop + taz_df.gqpop).fillna(0) + taz_tm1_df["totpop"] = (taz_df.hhpop + taz_df.gqpop).fillna(0) - buildings_df = buildings.to_frame(['zone_id', + buildings_df = buildings.to_frame(['taz_tm1', 'building_type', 'residential_units', 'building_sqft', 'lot_size_per_unit']) - taz_df["res_units"] = buildings_df.\ - groupby('zone_id').residential_units.sum() - - taz_df["mfdu"] = buildings_df.\ - query("building_type == 'HM' or building_type == 'MR'").\ - groupby('zone_id').residential_units.sum() - - taz_df["sfdu"] = buildings_df.\ - query("building_type == 'HS' or building_type == 'HT'").\ - groupby('zone_id').residential_units.sum() + taz_tm1_df["res_units"] = buildings_df.groupby('zone_id').residential_units.sum() + taz_tm1_df["mfdu"] = buildings_df.query("building_type == 'HM' or building_type == 'MR'").\ + groupby('taz_tm1').residential_units.sum() + taz_tm1_df["sfdu"] = buildings_df.query("building_type == 'HS' or building_type == 'HT'").\ + groupby('taz_tm1').residential_units.sum() f = orca.get_injectable('parcel_first_building_type_is') @@ -745,99 +701,85 @@ def count_acres_with_mask(mask): mask *= parcels.acres return mask.groupby(parcels.zone_id).sum() - taz_df["resacre_unweighted"] = count_acres_with_mask( - f('residential') | f('mixedresidential')) + taz_tm1_df["resacre_unweighted"] = count_acres_with_mask(f('residential') | f('mixedresidential')) + taz_tm1_df["ciacre_unweighted"] = count_acres_with_mask(f('select_non_residential')) + taz_tm1_df["ciacre"] = scaled_ciacre(base_year_summary_taz.CIACRE_UNWEIGHTED, taz_tm1_df.ciacre_unweighted) + taz_tm1_df["resacre"] = scaled_resacre(base_year_summary_taz.RESACRE_UNWEIGHTED, taz_tm1_df.resacre_unweighted) - taz_df["ciacre_unweighted"] = count_acres_with_mask( - f('select_non_residential')) - - taz_df["ciacre"] = scaled_ciacre( - base_year_summary_taz.CIACRE_UNWEIGHTED, taz_df.ciacre_unweighted) - taz_df["resacre"] = scaled_resacre( - base_year_summary_taz.RESACRE_UNWEIGHTED, taz_df.resacre_unweighted) rc = tm1_tm2_regional_controls.to_frame() - taz_df = add_population(taz_df, year, rc) - taz_df.totpop = taz_df.hhpop + taz_df.gqpop - taz_df = add_employment(taz_df, year, rc) - taz_df["density_pop"] = taz_df.totpop / taz_df.totacre - taz_df["density_pop"] = taz_df["density_pop"].fillna(0) - taz_df["density_emp"] = (2.5 * taz_df.totemp) / taz_df.totacre - taz_df["density_emp"] = taz_df["density_emp"].fillna(0) - taz_df["density"] = taz_df["density_pop"] + taz_df["density_emp"] - taz_df["areatype"] = pd.cut( - taz_df.density, - bins=[0, 6, 30, 55, 100, 300, np.inf], - labels=[5, 4, 3, 2, 1, 0] - ) - taz_df = add_age_categories(taz_df, year, rc) - orca.add_table('taz_summary_1', taz_df) - - summary.add_zone_output(taz_df, "travel_model_output", year) + taz_tm1_df = add_population(taz_tm1_df, year, rc) + taz_tm1_df.totpop = taz_df.hhpop + taz_df.gqpop + taz_tm1_df = add_employment(taz_tm1_df, year, rc) + taz_tm1_df["density_pop"] = taz_tm1_df.totpop / taz_tm1_df.totacre + taz_tm1_df["density_pop"] = taz_tm1_df["density_pop"].fillna(0) + taz_tm1_df["density_emp"] = (2.5 * taz_tm1_df.totemp) / taz_tm1_tm1_df.totacre + taz_tm1_df["density_emp"] = taz_tm1_df["density_emp"].fillna(0) + taz_tm1_df["density"] = taz_tm1_df["density_pop"] + taz_tm1_df["density_emp"] + taz_tm1_df["areatype"] = pd.cut(taz_tm1_df.density, + bins=[0, 6, 30, 55, 100, 300, np.inf], + labels=[5, 4, 3, 2, 1, 0]) + taz_tm1_df = add_age_categories(taz_tm1_df, year, rc) + orca.add_table('taz_summary_1', taz_tm1_df) + + summary.add_zone_output(taz_tm1_df, "travel_model_output", year) summary.write_zone_output() - # otherwise it loses precision - if summary.parcel_output is not None\ - and "geom_id" in summary.parcel_output: - summary.parcel_output["geom_id"] = \ - summary.parcel_output.geom_id.astype('str') - summary.write_parcel_output(add_xy={ "xy_table": "parcels", - "foreign_key": "parcel_id", + "foreign_key": "geo_id", "x_col": "x", "y_col": "y" }) # uppercase columns to match travel model template - taz_df.columns = \ - [x.upper() for x in taz_df.columns] + taz_tm1_df.columns = [x.upper() for x in taz_tm1_df.columns] + + maz_tm2_df = pd.DataFrame(index=travel_model_zones.maz_tm2.unique().index) - maz = maz.to_frame(['TAZ', 'COUNTY', 'county_name', 'taz1454']) mazi = tm1_tm2_maz_forecast_inputs.to_frame() mazi_yr = str(year)[2:] - households_df.maz_id = households_df.maz_id.fillna(213906) - maz["hhpop"] = households_df.groupby('maz_id').persons.sum() - maz["tothh"] = households_df.groupby('maz_id').size() - tothh = taz_df.TOTHH.sum() - maz = add_households(maz, tothh) - maz['gq_type_univ'] = mazi['gqpopu' + mazi_yr] - maz['gq_type_mil'] = mazi['gqpopm' + mazi_yr] - maz['gq_type_othnon'] = mazi['gqpopo' + mazi_yr] - maz['gq_tot_pop'] = maz['gq_type_univ'] + maz['gq_type_mil']\ - + maz['gq_type_othnon'] - tot_gqpop = maz.gq_tot_pop.sum() - rdf = tm1_tm2_regional_demographic_forecast.to_frame() + maz_tm2_df["hhpop"] = households_df.groupby('maz_id').persons.sum() + maz_tm2_df["tothh"] = households_df.groupby('maz_id').size() + + tothh = taz_tm1_df.tothh.sum() + maz_tm2_df = add_households(maz, tothh) + maz_tm2_df['gq_type_univ'] = mazi['gqpopu' + mazi_yr] + maz_tm2_df['gq_type_mil'] = mazi['gqpopm' + mazi_yr] + maz_tm2_df['gq_type_othnon'] = mazi['gqpopo' + mazi_yr] + maz_tm2_df['gq_tot_pop'] = maz_tm2_df['gq_type_univ'] + maz_tm2_df['gq_type_mil'] + maz_tm2_df['gq_type_othnon'] + tot_gqpop = maz_tm2_df.gq_tot_pop.sum() + + rdf = tm1_tm2_regional_demographic_forecast.to_frame() tfi = tm1_taz1_forecast_inputs.to_frame() tfi.index = tfi.TAZ1454 - taz_df['gq_type_univ'] = maz.groupby('taz1454' - ).gq_type_univ.sum().fillna(0) - taz_df['gq_type_mil'] = maz.groupby('taz1454').gq_type_mil.sum().fillna(0) - taz_df['gq_type_othnon'] = maz.groupby('taz1454' - ).gq_type_othnon.sum().fillna(0) - taz_df['gq_tot_pop'] = maz.groupby('taz1454').gq_tot_pop.sum().fillna(0) - - taz_df['hh'] = taz_df.TOTHH - taz_df['hh_size_1'] = taz_df['TOTHH'] * tfi.shrs1_2010 - taz_df['hh_size_2'] = taz_df['TOTHH'] * tfi.shrs2_2010 - taz_df['hh_size_3'] = taz_df['TOTHH'] * tfi.shrs3_2010 - taz_df['hh_size_4_plus'] = taz_df['TOTHH'] * tfi.shrs4_2010 - - taz_df['county'] = maz.groupby('taz1454').COUNTY.first() - taz_df['county_name'] = maz.groupby('taz1454').county_name.first() - - taz_df['hh_wrks_0'] = taz_df['TOTHH'] * tfi.shrw0_2010 - taz_df['hh_wrks_1'] = taz_df['TOTHH'] * tfi.shrw1_2010 - taz_df['hh_wrks_2'] = taz_df['TOTHH'] * tfi.shrw2_2010 - taz_df['hh_wrks_3_plus'] = taz_df['TOTHH'] * tfi.shrw3_2010 - - taz_df['hh_kids_no'] = taz_df['TOTHH'] * tfi.shrn_2010 - taz_df['hh_kids_yes'] = taz_df['TOTHH'] * tfi.shry_2010 - taz_df = adjust_hhsize(taz_df, year, rdf, tothh) - taz_df = adjust_hhwkrs(taz_df, year, rdf, tothh) - taz_df = adjust_hhkids(taz_df, year, rdf, tothh) - del taz_df['hh'] + + taz_tm1_df['gq_type_univ'] = maz.groupby('taz1454').gq_type_univ.sum().fillna(0) + taz_tm1_df['gq_type_mil'] = maz.groupby('taz1454').gq_type_mil.sum().fillna(0) + taz_tm1_df['gq_type_othnon'] = maz.groupby('taz1454').gq_type_othnon.sum().fillna(0) + taz_tm1_df['gq_tot_pop'] = maz.groupby('taz1454').gq_tot_pop.sum().fillna(0) + + taz_tm1_df['hh'] = taz_tm1_df.tothh + taz_tm1_df['hh_size_1'] = taz_tm1_df['tothh'] * tfi.shrs1_2010 + taz_tm1_df['hh_size_2'] = taz_tm1_df['tothh'] * tfi.shrs2_2010 + taz_tm1_df['hh_size_3'] = taz_tm1_df['tothh'] * tfi.shrs3_2010 + taz_tm1_df['hh_size_4_plus'] = taz_tm1_df['tothh'] * tfi.shrs4_2010 + + taz_tm1_df['county'] = maz.groupby('taz1454').COUNTY.first() + taz_tm1_df['county_name'] = maz.groupby('taz1454').county_name.first() + + taz_tm1_df['hh_wrks_0'] = taz_tm1_df['tothh'] * tfi.shrw0_2010 + taz_tm1_df['hh_wrks_1'] = taz_tm1_df['tothh'] * tfi.shrw1_2010 + taz_tm1_df['hh_wrks_2'] = taz_tm1_df['tothh'] * tfi.shrw2_2010 + taz_tm1_df['hh_wrks_3_plus'] = taz_tm1_df['tothh'] * tfi.shrw3_2010 + + taz_tm1_df['hh_kids_no'] = taz_df['tothh'] * tfi.shrn_2010 + taz_tm1_df['hh_kids_yes'] = taz_df['tothh'] * tfi.shry_2010 + taz_tm1_df = adjust_hhsize(taz_df, year, rdf, tothh) + taz_tm1_df = adjust_hhwkrs(taz_df, year, rdf, tothh) + taz_tm1_df = adjust_hhkids(taz_df, year, rdf, tothh) + del taz_tm1_df['hh'] taz_df.index.name = 'TAZ' @@ -846,16 +788,8 @@ def count_acres_with_mask(mask): # aggregate TAZ summaries to create county summaries - county_df = pd.DataFrame(index=['San Francisco', - 'San Mateo', - 'Santa Clara', - 'Alameda', - 'Contra Costa', - 'Solano', - 'Napa', - 'Sonoma', - 'Marin']) - + county_df = pd.DataFrame(index=['San Francisco', 'San Mateo', 'Santa Clara', 'Alameda', + 'Contra Costa', 'Solano', 'Napa', 'Sonoma','Marin']) county_df["COUNTY_NAME"] = county_df.index taz_cols = ["AGREMPN", "FPSEMPN", "HEREMPN", "RETEMPN", "MWTEMPN", @@ -869,28 +803,20 @@ def count_acres_with_mask(mask): taz_df_grouped = taz_df.groupby('county_name').sum() county_df[col] = taz_df_grouped[col] - county_df["DENSITY"] = \ - (county_df.TOTPOP + (2.5 * county_df.TOTEMP)) / county_df.TOTACRE + county_df["DENSITY"] = (county_df.TOTPOP + (2.5 * county_df.TOTEMP)) / county_df.TOTACRE county_df["AREATYPE"] = pd.cut( county_df.DENSITY, bins=[0, 6, 30, 55, 100, 300, np.inf], - labels=[5, 4, 3, 2, 1, 0] - ) + labels=[5, 4, 3, 2, 1, 0]) - base_year_summary_taz = \ - base_year_summary_taz.to_frame() - base_year_summary_county = \ - base_year_summary_taz.groupby('COUNTY_NAME').sum() - base_year_summary_county_ciacre = \ - base_year_summary_county['CIACRE_UNWEIGHTED'] - base_year_summary_county_resacre = \ - base_year_summary_county['RESACRE_UNWEIGHTED'] + base_year_summary_taz = base_year_summary_taz.to_frame() + base_year_summary_county = base_year_summary_taz.groupby('COUNTY_NAME').sum() + base_year_summary_county_ciacre = base_year_summary_county['CIACRE_UNWEIGHTED'] + base_year_summary_county_resacre = base_year_summary_county['RESACRE_UNWEIGHTED'] - county_df["CIACRE"] = scaled_ciacre( - base_year_summary_county_ciacre, county_df.CIACRE_UNWEIGHTED) - county_df["RESACRE"] = scaled_resacre( - base_year_summary_county_resacre, county_df.RESACRE_UNWEIGHTED) + county_df["CIACRE"] = scaled_ciacre(base_year_summary_county_ciacre, county_df.CIACRE_UNWEIGHTED) + county_df["RESACRE"] = scaled_resacre(base_year_summary_county_resacre, county_df.RESACRE_UNWEIGHTED) county_df = county_df[["COUNTY_NAME", "AGREMPN", "FPSEMPN", "HEREMPN", "RETEMPN", "MWTEMPN", "OTHEMPN", "TOTEMP", @@ -1599,10 +1525,10 @@ def write(s): # print out demolished buildings eq_demolish = eq_demolish.to_frame() eq_demolish_taz = misc.reindex(parcels.zone_id, - eq_demolish.parcel_id) + eq_demolish.geo_id) eq_demolish['taz'] = eq_demolish_taz eq_demolish['count'] = 1 - eq_demolish = eq_demolish.drop(['parcel_id', 'year_built', + eq_demolish = eq_demolish.drop(['geo_id', 'year_built', 'redfin_sale_year'], axis=1) eq_demolish = eq_demolish.groupby(['taz']).sum() eq_demolish.to_csv(os.path.join(orca.get_injectable("outputs_dir"), @@ -1614,7 +1540,7 @@ def write(s): retrofit_bldgs_tot = orca.get_table("retrofit_bldgs_tot") retrofit_bldgs_tot = retrofit_bldgs_tot.to_frame() retrofit_bldgs_tot_taz = misc.reindex(parcels.zone_id, - retrofit_bldgs_tot.parcel_id) + retrofit_bldgs_tot.geoid) retrofit_bldgs_tot['taz'] = retrofit_bldgs_tot_taz retrofit_bldgs_tot['count'] = 1 retrofit_bldgs_tot = retrofit_bldgs_tot[[ @@ -1633,7 +1559,7 @@ def write(s): if year in [2030, 2035, 2050] and eq: buildings = buildings.to_frame() buildings_taz = misc.reindex(parcels.zone_id, - buildings.parcel_id) + buildings.geo_id) buildings['taz'] = buildings_taz buildings['count'] = 1 buildings = buildings[['taz', 'count', 'residential_units', diff --git a/baus/utils.py b/baus/utils.py index 5aa7872bf..86801f0a2 100644 --- a/baus/utils.py +++ b/baus/utils.py @@ -93,24 +93,6 @@ def nearest_neighbor(df1, df2): return df1.index.values[indexes] -# need to reindex from geom id to the id used on parcels -def geom_id_to_parcel_id(df, parcels): - s = parcels.geom_id # get geom_id - s = pd.Series(s.index, index=s.values) # invert series - df["new_index"] = s.loc[df.index] # get right parcel_id for each geom_id - df = df.dropna(subset=["new_index"]) - df["new_index"] = df.new_index.astype('int') - df = df.set_index("new_index", drop=True) - df.index.name = "parcel_id" - return df - - -def parcel_id_to_geom_id(s): - parcels = orca.get_table("parcels") - g = parcels.geom_id # get geom_id - return pd.Series(g.loc[s.values].values, index=s.index) - - # This is best described by example. Imagine s is a series where the # index is parcel ids and the values are cities, while counts is a # series where the index is cities and the values are counts. You diff --git a/baus/variables.py b/baus/variables.py index d0ecb3e5c..bcc128153 100644 --- a/baus/variables.py +++ b/baus/variables.py @@ -83,8 +83,8 @@ def naics(jobs): @orca.column('jobs', cache=True) -def empsix_id(jobs, mapping): - return jobs.empsix.map(mapping['empsix_name_to_id']) +def empsix_id(jobs): + return jobs.empsix ############################# @@ -182,7 +182,7 @@ def price_per_sqft(buildings): @orca.column('buildings', cache=True) def transit_type(buildings, parcels_geography): - return misc.reindex(parcels_geography.tpp_id, buildings.parcel_id).\ + return misc.reindex(parcels_geography.tpp_id, buildings.geo_id).\ reindex(buildings.index).fillna('none') @@ -193,17 +193,17 @@ def unit_price(buildings): @orca.column('buildings', cache=True) def tmnode_id(buildings, parcels): - return misc.reindex(parcels.tmnode_id, buildings.parcel_id) + return misc.reindex(parcels.tmnode_id, buildings.geo_id) @orca.column('buildings') def juris_ave_income(parcels, buildings): - return misc.reindex(parcels.juris_ave_income, buildings.parcel_id) + return misc.reindex(parcels.juris_ave_income, buildings.geo_id) @orca.column('buildings', cache=True) def is_sanfran(parcels, buildings): - return misc.reindex(parcels.is_sanfran, buildings.parcel_id) + return misc.reindex(parcels.is_sanfran, buildings.geo_id) @orca.column('buildings', cache=True) @@ -300,17 +300,17 @@ def residential_price(buildings, residential_units, developer_settings): @orca.column('buildings', cache=True, cache_scope='iteration') def cml(buildings, parcels): - return misc.reindex(parcels.cml, buildings.parcel_id) + return misc.reindex(parcels.cml, buildings.geo_id) @orca.column('buildings', cache=True, cache_scope='iteration') def cnml(buildings, parcels): - return misc.reindex(parcels.cnml, buildings.parcel_id) + return misc.reindex(parcels.cnml, buildings.geo_id) @orca.column('buildings', cache=True, cache_scope='iteration') def combo_logsum(buildings, parcels): - return misc.reindex(parcels.combo_logsum, buildings.parcel_id) + return misc.reindex(parcels.combo_logsum, buildings.geo_id) ##################### @@ -331,8 +331,8 @@ def retail_ratio(nodes): ##################### @orca.column('parcels') -def maz_id(parcels, parcel_to_maz): - return parcel_to_maz.maz.reindex(parcels.index) +def maz_id(parcels, travel_model_zones): + return travel_model_zones.maz_tm2.reindex(parcels.index) @orca.column("parcels") @@ -354,7 +354,7 @@ def retail_ratio(parcels, nodes): # attribute on the buildings @orca.column('parcels', cache=True) def stories(buildings): - return buildings.stories.groupby(buildings.parcel_id).max() + return buildings.stories.groupby(buildings.geo_id).max() @orca.column('parcels', cache=True) @@ -593,9 +593,9 @@ def parcel_is_allowed(form): @orca.column('parcels') -def first_building_type(buildings, parcels): - df = buildings.to_frame(columns=['building_type', 'parcel_id']) - return df.groupby('parcel_id').building_type.first() +def first_building_type(buildings): + df = buildings.to_frame(columns=['building_type', 'geo_id']) + return df.groupby('geo_id').building_type.first() @orca.injectable(autocall=False) @@ -623,25 +623,10 @@ def juris_ave_income(households, buildings, parcels_geography, parcels): # missing values with 1800 - for use with development limits @orca.column('parcels') def newest_building(parcels, buildings): - return buildings.year_built.groupby(buildings.parcel_id).max().\ + return buildings.year_built.groupby(buildings.geo_id).max().\ reindex(parcels.index).fillna(1800) -# this returns the set of parcels which have been marked as -# disapproved by "the button" - only equals true when disallowed -@orca.column('parcels', cache=True) -def manual_nodev(parcel_rejections, parcels): - df1 = parcels.to_frame(['x', 'y']).dropna(subset=['x', 'y']) - df2 = parcel_rejections.to_frame(['lng', 'lat']) - df2 = df2[parcel_rejections.state == "denied"] - df2 = df2[["lng", "lat"]] # need to change the order - ind = nearest_neighbor(df1, df2) - - s = pd.Series(False, parcels.index) - s.loc[ind.flatten()] = True - return s.astype('int') - - @orca.column('parcels') def oldest_building_age(parcels, year): return year - parcels.oldest_building.replace(9999, 0) @@ -655,25 +640,26 @@ def is_sanfran(parcels_geography, buildings, parcels): @orca.column('parcels', cache=True) def total_non_residential_sqft(parcels, buildings): - return buildings.non_residential_sqft.groupby(buildings.parcel_id).sum().\ + return buildings.non_residential_sqft.groupby(buildings.geo_id).sum().\ reindex(parcels.index).fillna(0) +# these are parcels where development is off-limits @orca.column('parcels') -def nodev(zoning_existing, parcels, static_parcels): - # nodev from zoning - s1 = zoning_existing.nodev.reindex(parcels.index).\ - fillna(0).astype('bool') - # nodev from static parcels - this marks nodev those parcels which are - # marked as "static" - any parcels which should not be considered by the - # developer model may be marked as static - s2 = parcels.index.isin(static_parcels) - # nodev from sea level rise- determined by hazards.py model - if 'slr_nodev' in parcels.columns: - s3 = np.array(parcels['slr_nodev']) - return s1 | s2 | s3 - else: - return s1 | s2 +def nodev(parcels, nodev_sites): + # the table tells us what category of nodev the various entries are: + # manual, sea level rise, preservation area, etc. + nd = nodev_sites[nodev_sites["no_dev"] == 1].index + nd.append(static_parcels.index) + return nd.reindex(parcels.index) + + +# these are parcels where households and jobs don't move +@orca.injectable() +def static_parcels(institutions): + static_parcels = institutions.index.values + # development projects sites then get added to this in the year they are added + return static_parcels # get built far but set to nan for small parcels @@ -741,7 +727,7 @@ def max_dua(parcels_zoning_calculations, parcels, zoning_adjusters): @orca.column('parcels') def general_type(parcels, buildings): - s = buildings.general_type.groupby(buildings.parcel_id).first() + s = buildings.general_type.groupby(buildings.geo_id).first() return s.reindex(parcels.index).fillna("Vacant") @@ -798,8 +784,8 @@ def land_cost(parcels): @orca.column('parcels', cache=True) -def county(parcels, mapping): - return parcels.county_id.map(mapping["county_id_map"]) +def county(parcels): + return parcels.county @orca.column('parcels', cache=True) @@ -841,7 +827,7 @@ def vmt_code(parcels, run_setup): @orca.column('parcels', cache=True) -def subzone(parcels, parcels_subzone): +def subzone(parcels_subzone): return parcels_subzone.taz_sub diff --git a/configs/developer/developer_settings.yaml b/configs/developer/developer_settings.yaml index eeb13fb9b..a222412a8 100644 --- a/configs/developer/developer_settings.yaml +++ b/configs/developer/developer_settings.yaml @@ -5,17 +5,14 @@ cap_rate: 0.04 # equal weights. 0.5 means profit is half as much as ROC profit_vs_return_on_cost_combination_factor: 0.5 - -# settings for the feasibility model - parcel_filter is very important and is a rule of -# which parcels to consider - we don't consider nodev properties, historic properties, and -# optionally we don't look at small lot single family -# pass_through are columns not used by the developer but which go in the debug output +# settings for the feasibility model - feasibility: - parcel_filter: (nodev != 1 and manual_nodev != 1 and sdem != 1 and oldest_building > 1906 + parcel_filter: (nodev != 1 and sdem != 1 and oldest_building > 1906 and oldest_building_age > 20 and (total_residential_units != 1 or parcel_acres > 1.0) and first_building_type != 'HO' and first_building_type != 'SC') residential_to_yearly: True simple_zoning: True + # pass_through are columns not used by the developer but which go in the debug output pass_through: - oldest_building - total_sqft @@ -42,31 +39,6 @@ feasibility: - vmt_res_cat - vmt_nonres_cat -# a list of parcel geom ids which urbansim doesn't touch - this are viewed as exceptions -# and are often dealt with using specific models (SDEM and possibly proportional jobs model) -# which ignore this list -static_parcels: - - 11280465768398 # city hall - - 2240580234395 # sf state - - 7299494955245 # sj state - - 13202883289710 # sj state - - 6193503633797 # sj state - - 8603860488630 # sj state - - 8173572322083 # ucb - - 14259504015679 # labs - - 3473860030354 # sfo - - 11976417381057 # sf general - - 5600199824880 # more sf gen - - 9143399820283 - - 2161358104676 - - 15636310238820 - - 14512305680993 # sonoma state - - 15424804982410 # cal state east bay - - 7414256675266 # cal state east bay - - 8817315949318 # san quentin - - 12668875358422 # san quentin - - # settings that get passed to the residential developer as kwargs residential_developer: target_vacancy: .03 @@ -99,4 +71,74 @@ building_sqft_per_job: RB: 445 MR: 383 MT: 383 - ME: 383 \ No newline at end of file + ME: 383 + + +# this maps building type ids to general building types +# basically just reduces dimensionality +building_type_map: + HS: Residential + HT: Residential + HM: Residential + OF: Office + HO: Hotel + SC: School + IL: Industrial + IW: Industrial + IH: Industrial + RS: Retail + RB: Retail + MR: Residential + MT: Retail + ME: Office + PA: Parking + PA2: Parking + + +# this maps building "forms" from the developer model +# to building types so that when the developer builds a +# "form" this can be converted for storing as a type +# in the building table - in the long run, the developer +# forms and the building types should be the same and the +# developer model should account for the differences. +# travel_model_ variables are for the travel_model_summary step. +form_to_btype: + residential: + - HS + - HT + - HM + industrial: + - IL + - IW + - IH + retail: + - RS + - RB + office: + - OF + mixedresidential: + - MR + mixedoffice: + - ME + select_non_residential: + - OF + - HO + - SC + - IL + - IW + - IH + - RS + - RB + - MR + + +# convert square meters to square feet +parcel_size_factor: 10.764 + + +# these are the tables the get auto-merged to buildings/parcels in the hedonic and lcms +aggregation_tables: + - nodes + - tmnodes + - logsums + - buildings \ No newline at end of file diff --git a/configs/mapping.yaml b/configs/mapping.yaml deleted file mode 100644 index a160168b1..000000000 --- a/configs/mapping.yaml +++ /dev/null @@ -1,143 +0,0 @@ -# this maps Synthicity's 25 employment categories to -# the six major employment categories traditionally -# used by MTC and ABAG for use in output to the Travel Model -naics_to_empsix: - 10: AGREMPN - 11: AGREMPN - 21: AGREMPN - 22: MWTEMPN - 23: OTHEMPN - 31: MWTEMPN - 3133: MWTEMPN - 32: MWTEMPN - 33: MWTEMPN - 42: MWTEMPN - 44: RETEMPN - 4445: RETEMPN - 45: RETEMPN - 48: MWTEMPN - 4849: MWTEMPN - 49: MWTEMPN - 51: OTHEMPN - 52: FPSEMPN - 53: FPSEMPN - 54: FPSEMPN - 55: FPSEMPN - 56: FPSEMPN - 61: HEREMPN - 62: HEREMPN - 71: HEREMPN - 72: HEREMPN - 81: OTHEMPN - 91: OTHEMPN - 92: OTHEMPN - 99: OTHEMPN - 3133: MWTEMPN - 4445: RETEMPN - 4849: MWTEMPN - - -# what it says -empsix_name_to_id: - AGREMPN: 1 - MWTEMPN: 2 - RETEMPN: 3 - FPSEMPN: 4 - HEREMPN: 5 - OTHEMPN: 6 - - -# this maps building type ids to general building types -# basically just reduces dimensionality -building_type_map: - HS: Residential - HT: Residential - HM: Residential - OF: Office - HO: Hotel - SC: School - IL: Industrial - IW: Industrial - IH: Industrial - RS: Retail - RB: Retail - MR: Residential - MT: Retail - ME: Office - PA: Parking - PA2: Parking - - -# this maps building "forms" from the developer model -# to building types so that when the developer builds a -# "form" this can be converted for storing as a type -# in the building table - in the long run, the developer -# forms and the building types should be the same and the -# developer model should account for the differences. -# travel_model_ variables are for the travel_model_summary step. -form_to_btype: - residential: - - HS - - HT - - HM - industrial: - - IL - - IW - - IH - retail: - - RS - - RB - office: - - OF - mixedresidential: - - MR - mixedoffice: - - ME - select_non_residential: - - OF - - HO - - SC - - IL - - IW - - IH - - RS - - RB - - MR - - -# county ids in parcels geography (I think FIPS codes) -county_id_map: - 85: Santa Clara - 1: Alameda - 13: Contra Costa - 81: San Mateo - 97: Sonoma - 75: San Francisco - 95: Solano - 41: Marin - 55: Napa - - -# county ids for the travel model -county_id_tm_map: - 3: Santa Clara - 4: Alameda - 5: Contra Costa - 2: San Mateo - 8: Sonoma - 1: San Francisco - 6: Solano - 9: Marin - 7: Napa - - -# convert square meters to square feet -parcel_size_factor: 10.764 - - -# these are the tables the get auto-merged to buildings/parcels in the hedonic and lcms -aggregation_tables: - - nodes - - tmnodes - - logsums - - buildings \ No newline at end of file diff --git a/configs/paths.yaml b/configs/paths.yaml deleted file mode 100644 index 81696943e..000000000 --- a/configs/paths.yaml +++ /dev/null @@ -1,13 +0,0 @@ -# location of the hdf store -store: basis_inputs/parcels_buildings_agents/2015_09_01_bayarea_v3.h5 - -# large baus files are stored in s3 - this key gives the settings for fetching them -s3_settings: - bucket: bayarea_urbansim - files: - - 2015_09_01_bayarea_v3.h5 - - 2015_06_01_osm_bayarea4326.h5 - - 2015_12_21_zoning_parcels.csv - - 02_01_2016_parcels_geography.csv - - 2015_08_29_costar.csv - - 2015_08_03_tmnet.h5 \ No newline at end of file From 158a1c766e6494ee56ccc9dc903101cf6d6cdf1a Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Wed, 10 May 2023 09:05:27 -0700 Subject: [PATCH 02/49] add separated dev projects strategy projects input --- baus/datasources.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/baus/datasources.py b/baus/datasources.py index 3d687b282..3ebe863b3 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -485,7 +485,7 @@ def manual_edits(): # shared between demolish and build tables below def get_dev_projects_table(): df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), - "basis_inputs/parcels_buildings_agents/dev_pipeline_v0.csv")) + "basis_inputs/parcels_buildings_agents/dev_pipeline_v0b.csv")) df = df.set_index("geo_id") return df @@ -513,6 +513,21 @@ def development_projects(): return df +@orca.table(cache=True) +def dev_pipeline_strategy_projects(run_setup, development_projects): + + df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "plan_strategies/dev_pipeline_strategy_projects_v0b.csv")) + df = df.set_index("geo_id") + + if run_setup["dev_pipeline_strategy_projects"]: + dp = development_projects.to_frame() + # should error if the columns don't match the dev pipeline columns + dp.append(df) + # should all be add/build + dp = dp[df.action.isin(["add", "build"])] + + return dp + @orca.table(cache=True) def jobs(): From 27f15535d9999dc4113400d86b932e1ad538aa93 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Wed, 10 May 2023 09:13:20 -0700 Subject: [PATCH 03/49] update buildings table with v0b --- baus/datasources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baus/datasources.py b/baus/datasources.py index 3ebe863b3..09e9cd030 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -541,7 +541,7 @@ def households(): @orca.table(cache=True) def buildings(): - return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/buildings.csv") + return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/buildings_v0b.csv") @orca.table(cache=True) From 72c11ab3cfba2c6fe9b53f8d73ad5ba77c423770 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Wed, 10 May 2023 10:26:39 -0700 Subject: [PATCH 04/49] update growth_geographies to v0b --- baus/datasources.py | 2 +- baus/variables.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/baus/datasources.py b/baus/datasources.py index 09e9cd030..012cc45ba 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -413,7 +413,7 @@ def parcels_zoning_calculations(parcels): @orca.table(cache=True) def growth_geographies(): - return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/growth_geographies_v0.csv") + return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/growth_geographies_v0b.csv") @orca.table(cache=True) diff --git a/baus/variables.py b/baus/variables.py index bcc128153..d5ca30f5b 100644 --- a/baus/variables.py +++ b/baus/variables.py @@ -181,8 +181,8 @@ def price_per_sqft(buildings): @orca.column('buildings', cache=True) -def transit_type(buildings, parcels_geography): - return misc.reindex(parcels_geography.tpp_id, buildings.geo_id).\ +def transit_type(buildings, growth_geographies): + return misc.reindex(growth_geographies.tpp_id, buildings.geo_id).\ reindex(buildings.index).fillna('none') From 8043276d5fb1f3547ec0d2efd6efe6ecef7a0f71 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Thu, 15 Jun 2023 10:00:47 -0700 Subject: [PATCH 05/49] update travel_model_zones to v0b --- baus.py | 3 ++- baus/datasources.py | 20 +------------------- baus/summaries.py | 16 ++++++++-------- baus/variables.py | 13 +++++++------ 4 files changed, 18 insertions(+), 34 deletions(-) diff --git a/baus.py b/baus.py index ad33c9066..e84f466f8 100644 --- a/baus.py +++ b/baus.py @@ -519,7 +519,8 @@ def run_models(MODE): df2 = pd.read_csv((orca.get_injectable("outputs_dir")+"/run%d_superdistrict_summaries_2050.csv") % run_num) df2 = df2.set_index(df2.columns[0]).sort_index() - supnames = pd.read_csv((orca.get_injectable("inputs_dir") + "/basis_inputs/crosswalks/superdistricts_geography.csv"), index_col="number").name + supnames_df = pd.read_csv((orca.get_injectable("inputs_dir") + "/basis_inputs/crosswalks/travel_model_zones_v0b.csv")) + supnames = supnames_df.sort_values(['superdistrict'])['superdistrict'].unique() summary = compare_summary(df1, df2, supnames) with open((orca.get_injectable("outputs_dir") + "/run%d_difference_report.log") % run_num, "w") as f: diff --git a/baus/datasources.py b/baus/datasources.py index 012cc45ba..d9d32a1f5 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -312,7 +312,7 @@ def new_tpp_id(): @orca.table(cache=True) def travel_model_zones(): - return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/travel_model_zones_v0.csv")) + return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/travel_model_zones_v0b.csv")) @orca.table(cache=True) @@ -616,11 +616,6 @@ def vmt_fee_categories(): index_col="taz") -@orca.table(cache=True) -def superdistricts_geography(): - return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/superdistricts_geography.csv"), index_col="number") - - @orca.table(cache=True) def sqft_per_job_adjusters(): return pd.read_csv(os.path.join(misc.configs_dir(), "adjusters/sqft_per_job_adjusters.csv"), index_col="number") @@ -631,19 +626,6 @@ def telecommute_sqft_per_job_adjusters(): return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "plan_strategies/telecommute_sqft_per_job_adjusters.csv"), index_col="number") -@orca.table(cache=True) -def taz_geography(superdistricts_geography, mapping): - tg = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/taz_geography.csv"), - dtype={'zone': np.int64, 'superdistrcit': np.int64, 'county': np.int64}, index_col="zone") - # we want "subregion" geography on the taz_geography table - # we have to go get it from the superdistricts_geography table and join - # using the superdistrcit id - tg["subregion_id"] = superdistricts_geography.subregion.loc[tg.superdistrict].values - tg["subregion"] = tg.subregion_id.map({1: "Core", 2: "Urban", 3: "Suburban", 4: "Rural"}) - - return tg - - # SLR progression by year @orca.table(cache=True) def slr_progression(): diff --git a/baus/summaries.py b/baus/summaries.py index a98b30cd6..46d1ee1e0 100644 --- a/baus/summaries.py +++ b/baus/summaries.py @@ -42,10 +42,10 @@ def write(s): @orca.step() -def topsheet(households, jobs, buildings, parcels, zones, year, run_number, taz_geography, parcels_zoning_calculations, - summary, parcels_geography, new_tpp_id, residential_units, mapping): +def topsheet(households, jobs, buildings, parcels, zones, year, run_number, parcels_zoning_calculations, + summary, parcels_geography, new_tpp_id, residential_units, mapping, travel_model_zones): - hh_by_subregion = misc.reindex(taz_geography.subregion, households.zone_id).value_counts() + hh_by_subregion = misc.reindex(travel_model_zones.subregion, households.geo_id).value_counts() # Cols for Draft/Final Blueprint and EIR geographies households_df = orca.merge_tables('households', [parcels_geography, buildings, households], @@ -66,7 +66,7 @@ def topsheet(households, jobs, buildings, parcels, zones, year, run_number, taz_ # round to nearest 100s hhincome_by_insesit = (hhincome_by_insesit/100).round()*100 - jobs_by_subregion = misc.reindex(taz_geography.subregion, jobs.zone_id).value_counts() + jobs_by_subregion = misc.reindex(travel_model_zones.subregion, jobs.geo_id).value_counts() jobs_df = orca.merge_tables('jobs', [parcels, buildings, jobs], columns=['pda_id', 'tra_id']) @@ -633,11 +633,11 @@ def travel_model_output(parcels, households, jobs, buildings, year, summary, fin 'income', 'persons', 'maz_id']) - taz_tm1_df = pd.DataFrame(index=travel_model_zones.taz_tm1.unique().index) + taz_tm1_df = pd.DataFrame(index=travel_model_zones.sort_values(['taz_tm1'])['taz_tm1'].unique()) - taz_tm1_df["zone"] = zones.index - taz_tm1_df["sd"] = taz_geography.superdistrict - taz_tm1_df["county"] = taz_geography.county + taz_tm1_df["zone"] = taz_tm1_df.index + taz_tm1_df["sd"] = parcels.sort_values(['taz_tm1']).groupby(['taz_tm1'])['superdistrict'].first() + taz_tm1_df["county"] = parcels.sort_values(['taz_tm1']).groupby(['taz_tm1'])['county'].first() jobs_df = orca.merge_tables( 'jobs', diff --git a/baus/variables.py b/baus/variables.py index d5ca30f5b..eb42e8430 100644 --- a/baus/variables.py +++ b/baus/variables.py @@ -147,11 +147,12 @@ def vacant_res_units(buildings, households): @orca.column('buildings', cache=True) -def sqft_per_job(buildings, building_sqft_per_job, sqft_per_job_adjusters, telecommute_sqft_per_job_adjusters, taz_geography, base_year, year, run_setup): +def sqft_per_job(buildings, building_sqft_per_job, sqft_per_job_adjusters, telecommute_sqft_per_job_adjusters, travel_model_zones + base_year, year, run_setup): sqft_per_job = buildings.building_type.fillna("O").map(building_sqft_per_job) - superdistrict = misc.reindex(taz_geography.superdistrict, buildings.zone_id) + superdistrict = misc.reindex(travel_model_zones.superdistrict, buildings.geo_id) # this factor changes all sqft per job according to which superdistrict the building is in - this is so denser areas can have lower sqft per job # this is a simple multiply so a number 1.1 increases the sqft per job by 10% and .9 decreases it by 10% @@ -470,8 +471,8 @@ def juris_coc(parcels, parcels_geography): @orca.column('parcels', cache=True) -def superdistrict(parcels, taz_geography): - return misc.reindex(taz_geography.superdistrict, parcels.zone_id) +def superdistrict(parcels, travel_model_zones): + return misc.reindex(travel_model_zones.superdistrict, parcels.geo_id) # perffoot is a dummy indicating the FOOTprint for the PERFormance targets @@ -815,8 +816,8 @@ def tmnode_id(parcels, net): @orca.column('parcels', cache=True) -def subregion(taz_geography, parcels): - return misc.reindex(taz_geography.subregion, parcels.zone_id) +def subregion(travel_model_zones, parcels): + return misc.reindex(travel_model_zones.subregion, parcels.geo_id) @orca.column('parcels', cache=True) From a69267c9b2a593902f6f4b457fa558cf74aa8783 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Tue, 27 Jun 2023 11:17:48 -0700 Subject: [PATCH 06/49] update boc to v0c --- baus/datasources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baus/datasources.py b/baus/datasources.py index d9d32a1f5..919672d3b 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -289,7 +289,7 @@ def costar(parcels): @orca.table(cache=True) def zoning_existing(zoning_lookup): - return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/zoning/boc_v0.csv") + return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/zoning/boc_v0c.csv") @orca.table(cache=True) From d7b065a0dff19c6769cb3c9d30dd8e87bf4bd34f Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Tue, 27 Jun 2023 12:00:09 -0700 Subject: [PATCH 07/49] update buildings table to v0c --- baus/datasources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baus/datasources.py b/baus/datasources.py index 919672d3b..78e455ce5 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -541,7 +541,7 @@ def households(): @orca.table(cache=True) def buildings(): - return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/buildings_v0b.csv") + return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/buildings_v0c.csv") @orca.table(cache=True) From 72e42c1ae526cd85c75aeec15d97db3ad7c034ce Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Tue, 27 Jun 2023 12:38:13 -0700 Subject: [PATCH 08/49] update dev pipeline tables to v0c --- baus/datasources.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/baus/datasources.py b/baus/datasources.py index 78e455ce5..0fceb81ef 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -485,7 +485,7 @@ def manual_edits(): # shared between demolish and build tables below def get_dev_projects_table(): df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), - "basis_inputs/parcels_buildings_agents/dev_pipeline_v0b.csv")) + "basis_inputs/parcels_buildings_agents/dev_pipeline_v0c.csv")) df = df.set_index("geo_id") return df @@ -516,7 +516,7 @@ def development_projects(): @orca.table(cache=True) def dev_pipeline_strategy_projects(run_setup, development_projects): - df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "plan_strategies/dev_pipeline_strategy_projects_v0b.csv")) + df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "plan_strategies/dev_pipeline_strategy_projects_v0c.csv")) df = df.set_index("geo_id") if run_setup["dev_pipeline_strategy_projects"]: From d704faf4e7ada1e31a92c4f30477095c96ad9ec8 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Tue, 27 Jun 2023 12:57:52 -0700 Subject: [PATCH 09/49] move unique parcel identifier back to parcel_id --- baus/datasources.py | 16 +++++++-------- baus/models.py | 46 +++++++++++++++++++++--------------------- baus/postprocessing.py | 12 +++++------ baus/subsidies.py | 12 +++++------ baus/summaries.py | 44 ++++++++++++++++++++-------------------- baus/variables.py | 32 ++++++++++++++--------------- 6 files changed, 81 insertions(+), 81 deletions(-) diff --git a/baus/datasources.py b/baus/datasources.py index 0fceb81ef..7bd9bd1d5 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -397,13 +397,13 @@ def add_drop_helper(col, val): join_col = 'zoningmodcat' print('join_col of zoningmods is {}'.format(join_col)) - return pd.merge(growth_geographies.to_frame().reset_index(), strategy_zoning, on=join_col, how='left').set_index('geo_id') + return pd.merge(growth_geographies.to_frame().reset_index(), strategy_zoning, on=join_col, how='left').set_index('parcel_id') @orca.table(cache=True) def parcels(): df = os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/parcels_v0.csv") - return df.set_index("geo_id") + return df.set_index("parcel_id") @orca.table(cache=True) @@ -486,7 +486,7 @@ def manual_edits(): def get_dev_projects_table(): df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/dev_pipeline_v0c.csv")) - df = df.set_index("geo_id") + df = df.set_index("parcel_id") return df @@ -517,7 +517,7 @@ def development_projects(): def dev_pipeline_strategy_projects(run_setup, development_projects): df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "plan_strategies/dev_pipeline_strategy_projects_v0c.csv")) - df = df.set_index("geo_id") + df = df.set_index("parcel_id") if run_setup["dev_pipeline_strategy_projects"]: dp = development_projects.to_frame() @@ -698,13 +698,13 @@ def accessory_units(): @orca.table(cache=True) def nodev_sites(): - df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/nodev_sites_v0.csv"), index_col="geo_id") + df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/nodev_sites_v0.csv"), index_col="parcel_id") return df @orca.table(cache=True) def institutions(): - df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/institutions.csv"), index_col="geo_id") + df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/institutions.csv"), index_col="parcel_id") return df @@ -713,7 +713,7 @@ def institutions(): # this specifies the relationships between tables orca.broadcast('buildings', 'residential_units', cast_index=True, onto_on='building_id') orca.broadcast('residential_units', 'households', cast_index=True, onto_on='unit_id') -orca.broadcast('growth_geographies', 'buildings', cast_index=True, onto_on='geo_id') -orca.broadcast('parcels', 'buildings', cast_index=True, onto_on='geo_id') +orca.broadcast('growth_geographies', 'buildings', cast_index=True, onto_on='parcel_id') +orca.broadcast('parcels', 'buildings', cast_index=True, onto_on='parcel_id') # not defined in urbansim_defaults orca.broadcast('tmnodes', 'buildings', cast_index=True, onto_on='tmnode_id') \ No newline at end of file diff --git a/baus/models.py b/baus/models.py index 35c9d397d..8459e25b7 100644 --- a/baus/models.py +++ b/baus/models.py @@ -119,7 +119,7 @@ def accessory_units_strategy(year, buildings, parcels, accessory_units): add_units = accessory_units[str(year)] - buildings_juris = misc.reindex(parcels.jurisdiction, buildings.geo_id) + buildings_juris = misc.reindex(parcels.jurisdiction, buildings.parcel_id) res_buildings = buildings_juris[buildings.general_type == "Residential"] add_buildings = groupby_random_choice(res_buildings, add_units) @@ -216,7 +216,7 @@ def jobs_relocation(jobs, employment_relocation_rates, run_setup, employment_rel static_parcels, buildings): # get buildings that are on those parcels - static_buildings = buildings.index[buildings.geo_id.isin(static_parcels)] + static_buildings = buildings.index[buildings.parcel_id.isin(static_parcels)] rates = employment_relocation_rates.local # update the relocation rates with the adjusters if adjusters are being used @@ -246,7 +246,7 @@ def jobs_relocation(jobs, employment_relocation_rates, run_setup, employment_rel def household_relocation(households, household_relocation_rates, run_setup, static_parcels, buildings): # get buildings that are on those parcels - static_buildings = buildings.index[buildings.geo_id.isin(static_parcels)] + static_buildings = buildings.index[buildings.parcel_id.isin(static_parcels)] rates = household_relocation_rates.local # update the relocation rates with the renter protections strategy if applicable @@ -288,7 +288,7 @@ def scheduled_development_events(buildings, development_projects, demolish_event buildings = utils._remove_developed_buildings(buildings.to_frame(buildings.local_columns), demolish, unplace_agents=["households", "jobs"]) - orca.add_injectable('static_parcels', np.append(static_parcels, demolish.loc[demolish.action == 'build', 'geo_id'])) + orca.add_injectable('static_parcels', np.append(static_parcels, demolish.loc[demolish.action == 'build', 'parcel_id'])) orca.add_table("buildings", buildings) buildings = orca.get_table("buildings") print("Demolished %d buildings on parcels with pipeline projects being built" % (l1 - len(buildings))) @@ -312,7 +312,7 @@ def scheduled_development_events(buildings, development_projects, demolish_event new_buildings["SDEM"] = True new_buildings["subsidized"] = False - new_buildings["zone_id"] = misc.reindex(parcels.zone_id, new_buildings.geo_id) + new_buildings["zone_id"] = misc.reindex(parcels.zone_id, new_buildings.parcel_id) if run_setup['run_vmt_fee_res_for_res_strategy'] or ["run_sb743_strategy"]: vmt_fee_categories = orca.get_table("vmt_fee_categories") new_buildings["vmt_res_cat"] = misc.reindex(vmt_fee_categories.res_cat, new_buildings.zone_id) @@ -321,15 +321,15 @@ def scheduled_development_events(buildings, development_projects, demolish_event new_buildings["vmt_nonres_cat"] = misc.reindex(vmt_fee_categories.nonres_cat, new_buildings.zone_id) del new_buildings["zone_id"] - new_buildings["pda_id"] = growth_geographies.pda_id.loc[new_buildings.geo_id].values - new_buildings["tra_id"] = growth_geographies.tra_id.loc[new_buildings.geo_id].values - new_buildings["ppa_id"] = growth_geographies.ppa_id.loc[new_buildings.geo_id].values - new_buildings["sesit_id"] = growth_geographies.sesit_id.loc[new_buildings.geo_id].values - new_buildings["coc_id"] = growth_geographies.coc_id.loc[new_buildings.geo_id].values - new_buildings["juris_tra"] = growth_geographies.juris_tra.loc[new_buildings.geo_id].values - new_buildings["juris_ppa"] = growth_geographies.juris_ppa.loc[new_buildings.geo_id].values - new_buildings["juris_sesit"] = growth_geographies.juris_sesit.loc[new_buildings.geo_id].values - new_buildings["juris_coc"] = growth_geographies.juris_coc.loc[new_buildings.geo_id].values + new_buildings["pda_id"] = growth_geographies.pda_id.loc[new_buildings.parcel_id].values + new_buildings["tra_id"] = growth_geographies.tra_id.loc[new_buildings.parcel_id].values + new_buildings["ppa_id"] = growth_geographies.ppa_id.loc[new_buildings.parcel_id].values + new_buildings["sesit_id"] = growth_geographies.sesit_id.loc[new_buildings.parcel_id].values + new_buildings["coc_id"] = growth_geographies.coc_id.loc[new_buildings.parcel_id].values + new_buildings["juris_tra"] = growth_geographies.juris_tra.loc[new_buildings.parcel_id].values + new_buildings["juris_ppa"] = growth_geographies.juris_ppa.loc[new_buildings.parcel_id].values + new_buildings["juris_sesit"] = growth_geographies.juris_sesit.loc[new_buildings.parcel_id].values + new_buildings["juris_coc"] = growth_geographies.juris_coc.loc[new_buildings.parcel_id].values summary.add_parcel_output(new_buildings) @@ -401,7 +401,7 @@ def add_extra_columns_func(df): if "parcel_size" not in df: df["parcel_size"] = \ - orca.get_table("parcels").parcel_size.loc[df.geo_id] + orca.get_table("parcels").parcel_size.loc[df.parcel_id] if orca.is_injectable("year") and "year_built" not in df: df["year_built"] = orca.get_injectable("year") @@ -625,7 +625,7 @@ def retail_developer(jobs, buildings, parcels, nodes, feasibility, target -= d.non_residential_sqft # add redeveloped sqft to target - filt = "general_type == 'Retail' and geo_id == %d" % d["geo_id"] + filt = "general_type == 'Retail' and parcel_id == %d" % d["parcel_id"] target += bldgs.query(filt).non_residential_sqft.sum() devs.append(d) @@ -799,7 +799,7 @@ def developer_reprocess(buildings, year, years_per_iter, jobs, print("Attempting to add ground floor retail to %d devs" % len(new_buildings)) retail = parcel_is_allowed_func("retail") - new_buildings = new_buildings[retail.loc[new_buildings.geo_id].values] + new_buildings = new_buildings[retail.loc[new_buildings.parcel_id].values] print("Disallowing dev on these parcels:") print(" %d devs left after retail disallowed" % len(new_buildings)) @@ -821,7 +821,7 @@ def developer_reprocess(buildings, year, years_per_iter, jobs, # retail in areas that are underserved right now - this is defined as # the location where the retail ratio (ratio of income to retail sqft) # is greater than the median - ratio = parcels.retail_ratio.loc[new_buildings.geo_id] + ratio = parcels.retail_ratio.loc[new_buildings.parcel_id] new_buildings = new_buildings[ratio.values > ratio.median()] print("Adding %d sqft of ground floor retail in %d locations" % @@ -856,8 +856,8 @@ def proportional_job_allocation(): # institutions and not subject to the market # get buildings on this parcel - buildings = orca.get_table("buildings").to_frame(["geo_id", "job_spaces", "zone_id", "year_built"]).\ - query("geo_id == %d" % geo_id) + buildings = orca.get_table("buildings").to_frame(["parcel_id", "job_spaces", "zone_id", "year_built"]).\ + query("parcel_id == %d" % parcel_id) # get jobs in those buildings all_jobs = orca.get_table("jobs").local @@ -882,7 +882,7 @@ def proportional_job_allocation(): # make sure index is incrementing new_jobs.index = new_jobs.index + 1 + np.max(all_jobs.index.values) - print("Adding {} new jobs to parcel {} with proportional model".format(num_new_jobs, geo_id)) + print("Adding {} new jobs to parcel {} with proportional model".format(num_new_jobs, parcel_id)) print(new_jobs.head()) all_jobs = all_jobs.append(new_jobs) orca.add_table("jobs", all_jobs) @@ -890,8 +890,8 @@ def proportional_job_allocation(): @orca.step() def static_parcel_proportional_job_allocation(static_parcels): - for geo_id in static_parcels: - proportional_job_allocation(geo_id) + for parcel_id in static_parcels: + proportional_job_allocation(parcel_id) def make_network(name, weight_col, max_distance): diff --git a/baus/postprocessing.py b/baus/postprocessing.py index d359f479d..647c5a4f6 100644 --- a/baus/postprocessing.py +++ b/baus/postprocessing.py @@ -532,13 +532,13 @@ def GEO_SUMMARY_LOADER(run_num, geo, parcel_baseyear, parcel_endyear): zoningtag = 'zoningmodcat' - parcel_baseyear = parcel_baseyear[['geo_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4', + parcel_baseyear = parcel_baseyear[['parcel_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4', 'residential_units','deed_restricted_units', 'inclusionary_units', 'subsidized_units','preserved_units']] - parcel_endyear = parcel_endyear[['geo_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4', + parcel_endyear = parcel_endyear[['parcel_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4', 'residential_units','deed_restricted_units','inclusionary_units', 'subsidized_units','preserved_units','juris',zoningtag]] - parcel_data = parcel_baseyear.merge(parcel_endyear, on = 'geo_id', how = 'left').fillna(0) + parcel_data = parcel_baseyear.merge(parcel_endyear, on = 'parcel_id', how = 'left').fillna(0) if 0 in parcel_data.juris.values: dropindex = parcel_data[parcel_data['juris'] == 0].index parcel_data.drop(dropindex,inplace = True) @@ -563,13 +563,13 @@ def TWO_GEO_SUMMARY_LOADER(run_num, geo1, geo2, parcel_baseyear, parcel_endyear) zoningtag = 'zoningmodcat' - parcel_baseyear = parcel_baseyear[['geo_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4', + parcel_baseyear = parcel_baseyear[['parcel_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4', 'residential_units','deed_restricted_units', 'inclusionary_units', 'subsidized_units', 'preserved_units']] - parcel_endyear = parcel_endyear[['geo_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4', + parcel_endyear = parcel_endyear[['parcel_id','tothh','totemp', 'hhq1','hhq2','hhq3','hhq4', 'residential_units','deed_restricted_units','inclusionary_units', 'subsidized_units','preserved_units','juris',zoningtag]] - parcel_data = parcel_baseyear.merge(parcel_endyear, on = 'geo_id', how = 'left').fillna(0) + parcel_data = parcel_baseyear.merge(parcel_endyear, on = 'parcel_id', how = 'left').fillna(0) if 0 in parcel_data.juris.values: dropindex = parcel_data[parcel_data['juris'] == 0].index parcel_data.drop(dropindex,inplace = True) diff --git a/baus/subsidies.py b/baus/subsidies.py index ea28c3214..c1ae3d61f 100644 --- a/baus/subsidies.py +++ b/baus/subsidies.py @@ -63,8 +63,8 @@ def preserve_affordable(preservation, residential_units, travel_model_zones, bui growth_geogs = growth_geographies.to_frame() tm_geogs = travel_model_zones.to_frame() - res_units = res_units.merge(bldgs[['geo_id']], left_on='building_id', right_index=True, how='left').\ - merge(growth_geogs[['gg_id', 'sesit_id', 'tra_id', 'juris']], left_on='geo_id', right_index=True, how='left').\ + res_units = res_units.merge(bldgs[['parcel_id']], left_on='building_id', right_index=True, how='left').\ + merge(growth_geogs[['gg_id', 'sesit_id', 'tra_id', 'juris']], left_on='parcel_id', right_index=True, how='left').\ merge(tm_geogs, left_on='taz_tm1', right_index=True, how='left') s = preservation["housing_preservation"]["settings"] @@ -392,7 +392,7 @@ def calculate_vmt_fees(run_setup, account_strategies, year, coffer, summary, yea total_fees = 0 if run_setup["run_vmt_fee_com_for_com_strategy"]: # this step is only needed if summary_output doesn't get a county column - df = df.merge(orca.get_table("parcels").to_frame(), on='geo_id', columns=['county']) + df = df.merge(orca.get_table("parcels").to_frame(), on='parcel_id', columns=['county']) # assign fee to parcels based on county for county in df.county.unique(): df.loc[df["county"] == county, "com_for_com_fees"] = df.vmt_nonres_cat.map(vmt_settings["com_for_com_fee_amounts"][county]) @@ -422,7 +422,7 @@ def calculate_jobs_housing_fees(account_strategies, year, coffer, summary, years for key, acct in jobs_housing_settings.items(): # this step is only needed if summary_output doesn't get a juris county column - df = df.merge(orca.get_table("parcels").to_frame(), on='geo_id', columns=['jurisdiction', 'county']) + df = df.merge(orca.get_table("parcels").to_frame(), on='parcel_id', columns=['jurisdiction', 'county']) # calculate jobs-housing fees for each county's acct df_sub = df.loc[df.county == acct["county_name"]] @@ -464,7 +464,7 @@ def subsidized_office_developer(feasibility, coffer, formula, year, add_extra_co # in order off the top feasibility = feasibility.sort_values(['max_profit_per_sqft']) - # make geo_id available + # make parcel_id available feasibility = feasibility.reset_index() print("%.0f subsidy with %d developments to choose from" % (total_subsidy, len(feasibility))) @@ -498,7 +498,7 @@ def subsidized_office_developer(feasibility, coffer, formula, year, add_extra_co "non_residential_sqft": d["non_residential_sqft"], "juris": d["juris"], "tra_id": d["tra_id"], - "geo_id": d["geo_id"], + "parcel_id": d["parcel_id"], "index": dev_id } diff --git a/baus/summaries.py b/baus/summaries.py index 46d1ee1e0..36a432393 100644 --- a/baus/summaries.py +++ b/baus/summaries.py @@ -45,7 +45,7 @@ def write(s): def topsheet(households, jobs, buildings, parcels, zones, year, run_number, parcels_zoning_calculations, summary, parcels_geography, new_tpp_id, residential_units, mapping, travel_model_zones): - hh_by_subregion = misc.reindex(travel_model_zones.subregion, households.geo_id).value_counts() + hh_by_subregion = misc.reindex(travel_model_zones.subregion, households.parcel_id).value_counts() # Cols for Draft/Final Blueprint and EIR geographies households_df = orca.merge_tables('households', [parcels_geography, buildings, households], @@ -66,7 +66,7 @@ def topsheet(households, jobs, buildings, parcels, zones, year, run_number, parc # round to nearest 100s hhincome_by_insesit = (hhincome_by_insesit/100).round()*100 - jobs_by_subregion = misc.reindex(travel_model_zones.subregion, jobs.geo_id).value_counts() + jobs_by_subregion = misc.reindex(travel_model_zones.subregion, jobs.parcel_id).value_counts() jobs_df = orca.merge_tables('jobs', [parcels, buildings, jobs], columns=['pda_id', 'tra_id']) @@ -532,31 +532,31 @@ def parcel_summary(parcels, buildings, households, jobs, run_number, year, parce join_col = 'zoningmodcat' if join_col in parcels_geography.to_frame().columns: - parcel_gg = parcels_geography.to_frame(["geo_id", join_col, "juris"]) - df = df.merge(parcel_gg, on='geo_id', how='left') + parcel_gg = parcels_geography.to_frame(["parcel_id", join_col, "juris"]) + df = df.merge(parcel_gg, on='parcel_id', how='left') - households_df = orca.merge_tables('households', [buildings, households], columns=['geo_id', 'base_income_quartile']) + households_df = orca.merge_tables('households', [buildings, households], columns=['parcel_id', 'base_income_quartile']) # add households by quartile on each parcel for i in range(1, 5): - df['hhq%d' % i] = households_df[households_df.base_income_quartile == i].geo_id.value_counts() - df["tothh"] = households_df.groupby('geo_id').size() + df['hhq%d' % i] = households_df[households_df.base_income_quartile == i].parcel_id.value_counts() + df["tothh"] = households_df.groupby('parcel_id').size() building_df = orca.merge_tables('buildings', [parcels, buildings], - columns=['geo_id', 'residential_units', 'deed_restricted_units', + columns=['parcel_id', 'residential_units', 'deed_restricted_units', 'preserved_units', 'inclusionary_units', 'subsidized_units']) - df['residential_units'] = building_df.groupby('geo_id')['residential_units'].sum() - df['deed_restricted_units'] = building_df.groupby('geo_id')['deed_restricted_units'].sum() - df['preserved_units'] = building_df.groupby('geo_id')['preserved_units'].sum() - df['inclusionary_units'] = building_df.groupby('geo_id')['inclusionary_units'].sum() - df['subsidized_units'] = building_df.groupby('geo_id')['subsidized_units'].sum() + df['residential_units'] = building_df.groupby('parcel_id')['residential_units'].sum() + df['deed_restricted_units'] = building_df.groupby('parcel_id')['deed_restricted_units'].sum() + df['preserved_units'] = building_df.groupby('parcel_id')['preserved_units'].sum() + df['inclusionary_units'] = building_df.groupby('parcel_id')['inclusionary_units'].sum() + df['subsidized_units'] = building_df.groupby('parcel_id')['subsidized_units'].sum() - jobs_df = orca.merge_tables('jobs', [buildings, jobs], columns=['geo_id', 'empsix']) + jobs_df = orca.merge_tables('jobs', [buildings, jobs], columns=['parcel_id', 'empsix']) # add jobs by empsix category on each parcel for cat in jobs_df.empsix.unique(): - df[cat] = jobs_df[jobs_df.empsix == cat].geo_id.value_counts() - df["totemp"] = jobs_df.groupby('geo_id').size() + df[cat] = jobs_df[jobs_df.empsix == cat].parcel_id.value_counts() + df["totemp"] = jobs_df.groupby('parcel_id').size() df.to_csv(os.path.join(orca.get_injectable("outputs_dir"), "run%d_parcel_data_%d.csv" % (run_number, year))) @@ -567,7 +567,7 @@ def parcel_summary(parcels, buildings, households, jobs, run_number, year, parce # do diff with initial year df2 = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "run%d_parcel_data_%d.csv" % - (run_number, initial_year)), index_col="geo_id") + (run_number, initial_year)), index_col="parcel_id") for col in df.columns: @@ -624,7 +624,7 @@ def travel_model_output(parcels, households, jobs, buildings, year, summary, fin tm1_tm2_maz_forecast_inputs, tm1_tm2_regional_demographic_forecast, tm1_tm2_regional_controls): - parcels = parcels.to_frame().merge(travel_model_zones, on='geo_id', columns=["taz_tm1", "maz_tm2"]) + parcels = parcels.to_frame().merge(travel_model_zones, on='parcel_id', columns=["taz_tm1", "maz_tm2"]) households_df = orca.merge_tables('households', [parcels, buildings, households], @@ -726,7 +726,7 @@ def count_acres_with_mask(mask): summary.write_parcel_output(add_xy={ "xy_table": "parcels", - "foreign_key": "geo_id", + "foreign_key": "parcel_id", "x_col": "x", "y_col": "y" }) @@ -1525,10 +1525,10 @@ def write(s): # print out demolished buildings eq_demolish = eq_demolish.to_frame() eq_demolish_taz = misc.reindex(parcels.zone_id, - eq_demolish.geo_id) + eq_demolish.parcel_id) eq_demolish['taz'] = eq_demolish_taz eq_demolish['count'] = 1 - eq_demolish = eq_demolish.drop(['geo_id', 'year_built', + eq_demolish = eq_demolish.drop(['parcel_id', 'year_built', 'redfin_sale_year'], axis=1) eq_demolish = eq_demolish.groupby(['taz']).sum() eq_demolish.to_csv(os.path.join(orca.get_injectable("outputs_dir"), @@ -1559,7 +1559,7 @@ def write(s): if year in [2030, 2035, 2050] and eq: buildings = buildings.to_frame() buildings_taz = misc.reindex(parcels.zone_id, - buildings.geo_id) + buildings.parcel_id) buildings['taz'] = buildings_taz buildings['count'] = 1 buildings = buildings[['taz', 'count', 'residential_units', diff --git a/baus/variables.py b/baus/variables.py index eb42e8430..29d4709b5 100644 --- a/baus/variables.py +++ b/baus/variables.py @@ -152,7 +152,7 @@ def sqft_per_job(buildings, building_sqft_per_job, sqft_per_job_adjusters, telec sqft_per_job = buildings.building_type.fillna("O").map(building_sqft_per_job) - superdistrict = misc.reindex(travel_model_zones.superdistrict, buildings.geo_id) + superdistrict = misc.reindex(travel_model_zones.superdistrict, buildings.parcel_id) # this factor changes all sqft per job according to which superdistrict the building is in - this is so denser areas can have lower sqft per job # this is a simple multiply so a number 1.1 increases the sqft per job by 10% and .9 decreases it by 10% @@ -183,7 +183,7 @@ def price_per_sqft(buildings): @orca.column('buildings', cache=True) def transit_type(buildings, growth_geographies): - return misc.reindex(growth_geographies.tpp_id, buildings.geo_id).\ + return misc.reindex(growth_geographies.tpp_id, buildings.parcel_id).\ reindex(buildings.index).fillna('none') @@ -194,17 +194,17 @@ def unit_price(buildings): @orca.column('buildings', cache=True) def tmnode_id(buildings, parcels): - return misc.reindex(parcels.tmnode_id, buildings.geo_id) + return misc.reindex(parcels.tmnode_id, buildings.parcel_id) @orca.column('buildings') def juris_ave_income(parcels, buildings): - return misc.reindex(parcels.juris_ave_income, buildings.geo_id) + return misc.reindex(parcels.juris_ave_income, buildings.parcel_id) @orca.column('buildings', cache=True) def is_sanfran(parcels, buildings): - return misc.reindex(parcels.is_sanfran, buildings.geo_id) + return misc.reindex(parcels.is_sanfran, buildings.parcel_id) @orca.column('buildings', cache=True) @@ -301,17 +301,17 @@ def residential_price(buildings, residential_units, developer_settings): @orca.column('buildings', cache=True, cache_scope='iteration') def cml(buildings, parcels): - return misc.reindex(parcels.cml, buildings.geo_id) + return misc.reindex(parcels.cml, buildings.parcel_id) @orca.column('buildings', cache=True, cache_scope='iteration') def cnml(buildings, parcels): - return misc.reindex(parcels.cnml, buildings.geo_id) + return misc.reindex(parcels.cnml, buildings.parcel_id) @orca.column('buildings', cache=True, cache_scope='iteration') def combo_logsum(buildings, parcels): - return misc.reindex(parcels.combo_logsum, buildings.geo_id) + return misc.reindex(parcels.combo_logsum, buildings.parcel_id) ##################### @@ -355,7 +355,7 @@ def retail_ratio(parcels, nodes): # attribute on the buildings @orca.column('parcels', cache=True) def stories(buildings): - return buildings.stories.groupby(buildings.geo_id).max() + return buildings.stories.groupby(buildings.parcel_id).max() @orca.column('parcels', cache=True) @@ -472,7 +472,7 @@ def juris_coc(parcels, parcels_geography): @orca.column('parcels', cache=True) def superdistrict(parcels, travel_model_zones): - return misc.reindex(travel_model_zones.superdistrict, parcels.geo_id) + return misc.reindex(travel_model_zones.superdistrict, parcels.parcel_id) # perffoot is a dummy indicating the FOOTprint for the PERFormance targets @@ -595,8 +595,8 @@ def parcel_is_allowed(form): @orca.column('parcels') def first_building_type(buildings): - df = buildings.to_frame(columns=['building_type', 'geo_id']) - return df.groupby('geo_id').building_type.first() + df = buildings.to_frame(columns=['building_type', 'parcel_id']) + return df.groupby('parcel_id').building_type.first() @orca.injectable(autocall=False) @@ -624,7 +624,7 @@ def juris_ave_income(households, buildings, parcels_geography, parcels): # missing values with 1800 - for use with development limits @orca.column('parcels') def newest_building(parcels, buildings): - return buildings.year_built.groupby(buildings.geo_id).max().\ + return buildings.year_built.groupby(buildings.parcel_id).max().\ reindex(parcels.index).fillna(1800) @@ -641,7 +641,7 @@ def is_sanfran(parcels_geography, buildings, parcels): @orca.column('parcels', cache=True) def total_non_residential_sqft(parcels, buildings): - return buildings.non_residential_sqft.groupby(buildings.geo_id).sum().\ + return buildings.non_residential_sqft.groupby(buildings.parcel_id).sum().\ reindex(parcels.index).fillna(0) @@ -728,7 +728,7 @@ def max_dua(parcels_zoning_calculations, parcels, zoning_adjusters): @orca.column('parcels') def general_type(parcels, buildings): - s = buildings.general_type.groupby(buildings.geo_id).first() + s = buildings.general_type.groupby(buildings.parcel_id).first() return s.reindex(parcels.index).fillna("Vacant") @@ -817,7 +817,7 @@ def tmnode_id(parcels, net): @orca.column('parcels', cache=True) def subregion(travel_model_zones, parcels): - return misc.reindex(travel_model_zones.subregion, parcels.geo_id) + return misc.reindex(travel_model_zones.subregion, parcels.parcel_id) @orca.column('parcels', cache=True) From 8fd9e81598bd9fc797e6e95a4db1935dfd2b3402 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Tue, 27 Jun 2023 13:40:53 -0700 Subject: [PATCH 10/49] update growth_geographies to v0c --- baus/datasources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baus/datasources.py b/baus/datasources.py index 7bd9bd1d5..dc92aa917 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -413,7 +413,7 @@ def parcels_zoning_calculations(parcels): @orca.table(cache=True) def growth_geographies(): - return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/growth_geographies_v0b.csv") + return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/growth_geographies_v0c.csv") @orca.table(cache=True) From 7952241853e488d53e97bdc30b0cf8acd542e111 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Wed, 28 Jun 2023 09:56:46 -0700 Subject: [PATCH 11/49] update nodev, streamline parcel_filter component --- baus/datasources.py | 9 +-------- baus/variables.py | 24 +++++++++++++++-------- configs/developer/developer_settings.yaml | 4 +--- docs/configs.md | 1 + 4 files changed, 19 insertions(+), 19 deletions(-) create mode 100644 docs/configs.md diff --git a/baus/datasources.py b/baus/datasources.py index dc92aa917..94d13ce9b 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -698,17 +698,10 @@ def accessory_units(): @orca.table(cache=True) def nodev_sites(): - df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/nodev_sites_v0.csv"), index_col="parcel_id") + df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/nodev_sites_v0c.csv"), index_col="parcel_id") return df -@orca.table(cache=True) -def institutions(): - df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/institutions.csv"), index_col="parcel_id") - return df - - - # this specifies the relationships between tables orca.broadcast('buildings', 'residential_units', cast_index=True, onto_on='building_id') diff --git a/baus/variables.py b/baus/variables.py index 29d4709b5..3f26f6b62 100644 --- a/baus/variables.py +++ b/baus/variables.py @@ -645,21 +645,29 @@ def total_non_residential_sqft(parcels, buildings): reindex(parcels.index).fillna(0) -# these are parcels where development is off-limits @orca.column('parcels') -def nodev(parcels, nodev_sites): - # the table tells us what category of nodev the various entries are: - # manual, sea level rise, preservation area, etc. +def nodev(parcels, nodev_sites, static_parcels): + # start with nodev parcels: parcels where development is off-limits + # the input table tells us what category of nodev the various entries are: + # protected open space, small single-family lots, etc. nd = nodev_sites[nodev_sites["no_dev"] == 1].index + # then add all static parcels: a subset of nodev parcels where + # jobs and households don't relocate, including: + # institutions (where job growth is handled separately) and sea level rise parcels nd.append(static_parcels.index) + # development projects and buildings less than 20 years old also become off limits in developer_settings.yaml return nd.reindex(parcels.index) -# these are parcels where households and jobs don't move @orca.injectable() -def static_parcels(institutions): - static_parcels = institutions.index.values - # development projects sites then get added to this in the year they are added +def static_parcels(parcels, nodev_sites): + # start with insitutions + # these are parcels where households and jobs don't move + static_parcels = nodev_sites[nodev_sites.institutions_flag == 1].index.values + # add sea level rise parcels + parcels = parcels.to_frame() + static_parcels.append(parcels[parcels.slr_nodev ==1].index.values) + # development projects also get added to static_parcels in their model step return static_parcels diff --git a/configs/developer/developer_settings.yaml b/configs/developer/developer_settings.yaml index a222412a8..60f53e36c 100644 --- a/configs/developer/developer_settings.yaml +++ b/configs/developer/developer_settings.yaml @@ -7,9 +7,7 @@ profit_vs_return_on_cost_combination_factor: 0.5 # settings for the feasibility model - feasibility: - parcel_filter: (nodev != 1 and sdem != 1 and oldest_building > 1906 - and oldest_building_age > 20 and (total_residential_units != 1 or parcel_acres > 1.0) - and first_building_type != 'HO' and first_building_type != 'SC') + parcel_filter: (nodev != 1 and sdem != 1 and oldest_building_age > 20) residential_to_yearly: True simple_zoning: True # pass_through are columns not used by the developer but which go in the debug output diff --git a/docs/configs.md b/docs/configs.md new file mode 100644 index 000000000..ad82a753a --- /dev/null +++ b/docs/configs.md @@ -0,0 +1 @@ +rsh.yaml| Residential sales hedonic price model specification. From 90ec562fa4ae4801e325e71c9c54927cd285e969 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Wed, 28 Jun 2023 10:00:18 -0700 Subject: [PATCH 12/49] update parcels to v0c --- baus/datasources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baus/datasources.py b/baus/datasources.py index 94d13ce9b..6ef8139aa 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -402,7 +402,7 @@ def add_drop_helper(col, val): @orca.table(cache=True) def parcels(): - df = os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/parcels_v0.csv") + df = os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/parcels_v0c.csv") return df.set_index("parcel_id") From e3194f3e6a473d2e5e206a5b1a7ac4e84a4e91e4 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Thu, 29 Jun 2023 06:59:35 -0700 Subject: [PATCH 13/49] update travel_model_zones to v0c --- baus.py | 4 +- baus/datasources.py | 8 +-- baus/variables.py | 6 +- configs/adjusters/sqft_per_job_adjusters.csv | 68 ++++++++++---------- 4 files changed, 40 insertions(+), 46 deletions(-) diff --git a/baus.py b/baus.py index e84f466f8..073b49742 100644 --- a/baus.py +++ b/baus.py @@ -519,8 +519,8 @@ def run_models(MODE): df2 = pd.read_csv((orca.get_injectable("outputs_dir")+"/run%d_superdistrict_summaries_2050.csv") % run_num) df2 = df2.set_index(df2.columns[0]).sort_index() - supnames_df = pd.read_csv((orca.get_injectable("inputs_dir") + "/basis_inputs/crosswalks/travel_model_zones_v0b.csv")) - supnames = supnames_df.sort_values(['superdistrict'])['superdistrict'].unique() + supnames_df = pd.read_csv((orca.get_injectable("inputs_dir") + "/basis_inputs/crosswalks/travel_model_zones_v0c.csv")) + supnames = supnames_df.sort_values(['superdistrict_name'])['superdistrict_name'].unique() summary = compare_summary(df1, df2, supnames) with open((orca.get_injectable("outputs_dir") + "/run%d_difference_report.log") % run_num, "w") as f: diff --git a/baus/datasources.py b/baus/datasources.py index 6ef8139aa..958f5d98f 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -312,7 +312,7 @@ def new_tpp_id(): @orca.table(cache=True) def travel_model_zones(): - return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/travel_model_zones_v0b.csv")) + return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/travel_model_zones_v0c.csv")) @orca.table(cache=True) @@ -416,12 +416,6 @@ def growth_geographies(): return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/growth_geographies_v0c.csv") -@orca.table(cache=True) -def parcels_subzone(): - return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), 'basis_inputs/crosswalks/2020_08_17_parcel_to_taz1454sub.csv'), - usecols=['taz_sub', 'PARCEL_ID', 'county'], dtype={'PARCEL_ID': np.int64}, index_col='PARCEL_ID') - - @orca.table(cache=False) def mandatory_accessibility(year, run_setup): diff --git a/baus/variables.py b/baus/variables.py index 3f26f6b62..602b4ad5d 100644 --- a/baus/variables.py +++ b/baus/variables.py @@ -472,7 +472,7 @@ def juris_coc(parcels, parcels_geography): @orca.column('parcels', cache=True) def superdistrict(parcels, travel_model_zones): - return misc.reindex(travel_model_zones.superdistrict, parcels.parcel_id) + return misc.reindex(travel_model_zones.superdistrict_name, parcels.parcel_id) # perffoot is a dummy indicating the FOOTprint for the PERFormance targets @@ -836,8 +836,8 @@ def vmt_code(parcels, run_setup): @orca.column('parcels', cache=True) -def subzone(parcels_subzone): - return parcels_subzone.taz_sub +def subzone(travel_model_zones, parcels): + return misc.reindex(travel_model_zones.taz_subzone, parcels.parcel_id) @orca.column('parcels', cache=True, cache_scope='iteration') diff --git a/configs/adjusters/sqft_per_job_adjusters.csv b/configs/adjusters/sqft_per_job_adjusters.csv index ce9979d0a..a49167bd1 100644 --- a/configs/adjusters/sqft_per_job_adjusters.csv +++ b/configs/adjusters/sqft_per_job_adjusters.csv @@ -1,35 +1,35 @@ number,name,sqft_per_job_factor -1,SF NE,1.21208 -2,SF NW,1.03089 -3,SF S SE,1.4545 -4,SF SW,1.69458 -5,Daly City Millbrae,0.96933 -6,San Mateo,0.91995 -7,Redwood City,0.96933 -8,Palo Alto,0.92032 -9,Golden Triangle,0.89362 -10,West San Jose,0.92032 -11,San Jose CBD,0.8536 -12,East San Jose,0.9227 -13,South San Jose,0.969 -14,SE Snta Clara Cnty,1.063 -15,Tri Valley,1.031 -16,Fremont,0.95 -17,S Leandro Hayward,0.96867 -18,Oakland Alameda,0.90858 -19,Berkeley Eville,0.9818 -20,Richmond Pinole,0.96733 -21,Martinez Concord,0.96733 -22,Lamorinda WC,0.968 -23,S Ramon Danville,0.96867 -24,East Contra Costa,1.063 -25,Vallejo Benicia,1.031 -26,Solano Remainder,1.063 -27,Napa City and S,0.96367 -28,Napa Remainder,1 -29,Southern Sonoma,0.965 -30,Santa Rosa Area,0.96333 -31,Northern Sonoma,0.96133 -32,Northern Marin,1.063 -33,Central Marin,0.96767 -34,Southern Marin,0.96767 +1,NE San Francisco,1.21208 +2,NW San Francisco,1.03089 +3,SE San Francisco,1.4545 +4,SW San Francisco,1.69458 +5,N San Mateo,0.96933 +6,C San Mateo,0.91995 +7,S San Mateo,0.96933 +8,NW Santa Clara,0.92032 +9,N Santa Clara,0.89362 +10,SW Santa Clara,0.92032 +11,C Santa Clara,0.8536 +12,NE Santa Clara,0.9227 +13,S Santa Clara,0.969 +14,SE Santa Clara,1.063 +15,E Alameda,1.031 +16,S Aleameda,0.95 +17,C Alameda,0.96867 +18,N Alameda,0.90858 +19,NW Alameda,0.9818 +20,W Contra Costa,0.96733 +21,N Contra Cosra,0.96733 +22,SW Cotra Costa,0.968 +23,S Contra Costa,0.96867 +24,E Contra Costa,1.063 +25,S Solano,1.031 +26,N Solano,1.063 +27,S Napa,0.96367 +28,N Napa,1 +29,S Sonoma,0.965 +30,C Sonoma,0.96333 +31,N Sonoma,0.96133 +32,N Marin,1.063 +33,C Marin,0.96767 +34,S Marin,0.96767 From f2d2c3b63f65a8e95e94470640a0d1169440576b Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Fri, 30 Jun 2023 06:26:03 -0700 Subject: [PATCH 14/49] add a residential_units input table includes creating units and assigning deed restricted units pre-simulation --- baus/datasources.py | 9 ++--- baus/preprocessing.py | 45 +------------------------ baus/ual.py | 77 +++---------------------------------------- 3 files changed, 8 insertions(+), 123 deletions(-) diff --git a/baus/datasources.py b/baus/datasources.py index 958f5d98f..5261759c9 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -471,11 +471,6 @@ def accessibilities_segmentation(year, run_setup): return df -@orca.table(cache=True) -def manual_edits(): - return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/edits/manual_edits.csv")) - - # shared between demolish and build tables below def get_dev_projects_table(): df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), @@ -539,8 +534,8 @@ def buildings(): @orca.table(cache=True) -def residential_units(store): - return print_error_if_not_available(store, 'residential_units_preproc') +def residential_units(): + return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/residential_units_v0c.csv") @orca.table(cache=True) diff --git a/baus/preprocessing.py b/baus/preprocessing.py index 77d4ee98e..4fdbc88fb 100644 --- a/baus/preprocessing.py +++ b/baus/preprocessing.py @@ -141,49 +141,6 @@ def preproc_households(store): store['households_preproc'] = df -def assign_deed_restricted_units(df, parcels): - - df["deed_restricted_units"] = 0 - - zone_ids = misc.reindex(parcels.zone_id, df.parcel_id).\ - reindex(df.index).fillna(-1) - # sample deed restricted units to match current deed restricted unit - # zone totals - for taz, row in pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), - "basis_inputs/parcels_buildings_agents/deed_restricted_zone_totals.csv"), - index_col='taz_key').iterrows(): - - cnt = row["units"] - - if cnt <= 0: - continue - - potential_add_locations = df.residential_units[ - (zone_ids == taz) & - (df.residential_units > 0)] - - assert len(potential_add_locations) > 0 - - weights = potential_add_locations / potential_add_locations.sum() - - buildings_ids = potential_add_locations.sample( - cnt, replace=True, weights=weights) - - units = pd.Series(buildings_ids.index.values).value_counts() - df.loc[units.index, "deed_restricted_units"] += units.values - - print("Total deed restricted units after random selection: %d" % - df.deed_restricted_units.sum()) - - df["deed_restricted_units"] = \ - df[["deed_restricted_units", "residential_units"]].min(axis=1) - - print("Total deed restricted units after truncating to res units: %d" % - df.deed_restricted_units.sum()) - - return df - - @orca.step() def correct_baseyear_vacancies(buildings, parcels, jobs, store): # sonoma county has too much vacancy in the buildings so we're @@ -312,4 +269,4 @@ def baseline_data_checks(store): # check res units >= households # check job spaces >= jobs - pass + pass \ No newline at end of file diff --git a/baus/ual.py b/baus/ual.py index b8a3e4d54..5f86d4243 100644 --- a/baus/ual.py +++ b/baus/ual.py @@ -20,64 +20,6 @@ # ############################################################################### - -def _create_empty_units(buildings): - """ - Create a table of empty units corresponding to an input table of buildings. - This function is used (a) in initialization and (b) after the developer - model steps run. - - Parameters - ---------- - buildings : DataFrameWrapper or DataFrame - Must contain an index to be used as the building identifier, and a - count of 'residential_units' which will determine the number of - units to create - - Returns - ------- - df : DataFrame - Table of units, to be processed within an orca step - """ - # The '.astype(int)' deals with a bug (?) where the developer model creates - # floating-point unit counts - - s = buildings.residential_units.fillna(0) >=\ - buildings.deed_restricted_units.fillna(0) - - assert np.all(buildings.residential_units.fillna(0) >= - buildings.deed_restricted_units.fillna(0)) - - df = pd.DataFrame({ - 'unit_residential_price': 0.0, - 'unit_residential_rent': 0.0, - 'num_units': 1, - 'building_id': np.repeat( - buildings.index.values, - buildings.residential_units.values.astype(int) - ), - # counter of the units in a building - 'unit_num': np.concatenate([ - np.arange(num_units) - for num_units in buildings.residential_units.values.astype(int) - ]), - # also identify deed restricted units - 'deed_restricted': np.concatenate([ - np.concatenate([ - np.ones(restricted_units), - np.zeros(num_units - restricted_units) - ]) - # iterate over number of units and deed restricted units too - for (num_units, restricted_units) in list(zip( - buildings.residential_units.values.astype(int), - buildings.deed_restricted_units.values.astype(int) - )) - ]) - }).sort_values(by=['building_id', 'unit_num']).reset_index(drop=True) - df.index.name = 'unit_id' - return df - - def match_households_to_units(households, residential_units): """ This initialization step adds a 'unit_id' to the households table and @@ -175,19 +117,9 @@ def assign_tenure_to_units(residential_units, households): @orca.step() -def initialize_residential_units(store): - # this is assumed to run as preprocessing step, after the other - # preprocessing steps - thus we need to get the data from the hdf rather - # than from the orca tables - I contemplated putting this code in the - # preprocessing.py module, but in the end I wanted to keep the residential - # units code together, and also I wanted the github diff to show how few - # lines actually changed here I'm not editing code - just changing where - # this code runs - households = store['households_preproc'] - buildings = store['buildings_preproc'] - - # fan out buildings into units - units = _create_empty_units(buildings) +def initialize_residential_units(residential_units): + + units = residential_units # put households into units based on the building id households = match_households_to_units(households, units) @@ -197,7 +129,8 @@ def initialize_residential_units(store): # write to the hdfstore store['households_preproc'] = households - store['residential_units_preproc'] = units + + @orca.step() From 8bdf062fb6ccf97ba51773e1775ffbc6c772f90a Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Fri, 30 Jun 2023 08:04:26 -0700 Subject: [PATCH 15/49] add superdistrict names in summaries only --- baus/summaries.py | 5 ++++- baus/variables.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/baus/summaries.py b/baus/summaries.py index 36a432393..0ca6ed012 100644 --- a/baus/summaries.py +++ b/baus/summaries.py @@ -276,7 +276,7 @@ def diagnostic_output(households, buildings, parcels, taz, jobs, developer_setti @orca.step() -def geographic_summary(parcels, households, jobs, buildings, run_setup, run_number, year, summary, final_year): +def geographic_summary(parcels, households, jobs, buildings, run_setup, run_number, year, summary, final_year, travel_model_zones): # using the following conditional b/c `year` is used to pull a column # from a csv based on a string of the year in add_population() # and in add_employment() and 2009 is the @@ -389,6 +389,9 @@ def geographic_summary(parcels, households, jobs, buildings, run_setup, run_numb summary_table = summary_table.sort_index() + if geography == 'superdistrict': + summary_table["superdistrict_name"] = travel_model_zones.sort_values(['superdistrict'])['superdistrict_name'].unique() + if base is False: summary_csv = os.path.join(orca.get_injectable("outputs_dir"), "run{}_{}_summaries_{}.csv").\ format(run_number, geography, year) diff --git a/baus/variables.py b/baus/variables.py index 602b4ad5d..2ca9041a9 100644 --- a/baus/variables.py +++ b/baus/variables.py @@ -472,7 +472,7 @@ def juris_coc(parcels, parcels_geography): @orca.column('parcels', cache=True) def superdistrict(parcels, travel_model_zones): - return misc.reindex(travel_model_zones.superdistrict_name, parcels.parcel_id) + return misc.reindex(travel_model_zones.superdistrict, parcels.parcel_id) # perffoot is a dummy indicating the FOOTprint for the PERFormance targets From a1de6d5894cc4284f47c6bafaaa3f2308e8629f1 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Fri, 30 Jun 2023 10:18:51 -0700 Subject: [PATCH 16/49] one more sd fix --- baus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baus.py b/baus.py index 073b49742..31ec07eb2 100644 --- a/baus.py +++ b/baus.py @@ -520,7 +520,7 @@ def run_models(MODE): df2 = df2.set_index(df2.columns[0]).sort_index() supnames_df = pd.read_csv((orca.get_injectable("inputs_dir") + "/basis_inputs/crosswalks/travel_model_zones_v0c.csv")) - supnames = supnames_df.sort_values(['superdistrict_name'])['superdistrict_name'].unique() + supnames = supnames_df.sort_values(['superdistrict'])['superdistrict_name'].unique() summary = compare_summary(df1, df2, supnames) with open((orca.get_injectable("outputs_dir") + "/run%d_difference_report.log") % run_num, "w") as f: From 66342e73301dcb21cf08e983b36bc4411a336947 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Mon, 3 Jul 2023 07:41:37 -0700 Subject: [PATCH 17/49] reverse accidental docs update --- docs/configs.md | 1 - 1 file changed, 1 deletion(-) delete mode 100644 docs/configs.md diff --git a/docs/configs.md b/docs/configs.md deleted file mode 100644 index ad82a753a..000000000 --- a/docs/configs.md +++ /dev/null @@ -1 +0,0 @@ -rsh.yaml| Residential sales hedonic price model specification. From 239aab63b3df475017541feb5cd712e17ff68374 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Wed, 19 Jul 2023 12:30:48 -0700 Subject: [PATCH 18/49] fully remove the pre-processing script with the new data --- baus.py | 9 -- baus/datasources.py | 1 - baus/preprocessing.py | 272 ------------------------------------------ 3 files changed, 282 deletions(-) diff --git a/baus.py b/baus.py index 31ec07eb2..38db6ae12 100644 --- a/baus.py +++ b/baus.py @@ -376,15 +376,6 @@ def get_baseyear_models(): def run_models(MODE): - if MODE == "preprocessing": - - orca.run([ - "preproc_jobs", - "preproc_households", - "preproc_buildings", - "initialize_residential_units" - ]) - elif MODE == "fetch_data": orca.run(["fetch_from_s3"]) diff --git a/baus/datasources.py b/baus/datasources.py index 5261759c9..021018df6 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -7,7 +7,6 @@ from urbansim_defaults import utils from urbansim.utils import misc import orca -from baus import preprocessing from baus.utils import nearest_neighbor import yaml diff --git a/baus/preprocessing.py b/baus/preprocessing.py index 4fdbc88fb..e69de29bb 100644 --- a/baus/preprocessing.py +++ b/baus/preprocessing.py @@ -1,272 +0,0 @@ -from __future__ import print_function - -import orca -import pandas as pd -from urbansim.utils import misc -from baus.validation import assert_series_equal - - -# TO ADD: Housing Unit imputation -# We want to match the target in baseyear_taz_controls.csv - -# TO ADD: Nonresidential space imputation -# We want to match the target in baseyear_taz_controls.csv - - -# the way this works is there is an orca step to do jobs allocation, which -# reads base year totals and creates jobs and allocates them to buildings, -# and writes it back to the h5. then the actual jobs table above just reads -# the auto-allocated version from the h5. was hoping to just do allocation -# on the fly but it takes about 4 minutes so way to long to do on the fly - - -def allocate_jobs(baseyear_taz_controls, mapping, buildings, parcels): - # this does a new assignment from the controls to the buildings - - # first disaggregate the job totals - sector_map = mapping["naics_to_empsix"] - jobs = [] - for taz, row in baseyear_taz_controls.local.iterrows(): - for sector_col, num in row.iteritems(): - - # not a sector total - if not sector_col.startswith("emp_sec"): - continue - - # get integer sector id - sector_id = int(''.join(c for c in sector_col if c.isdigit())) - sector_name = sector_map[sector_id] - - jobs += [[sector_id, sector_name, taz, -1]] * int(num) - - df = pd.DataFrame(jobs, columns=[ - 'sector_id', 'empsix', 'taz', 'building_id']) - - zone_id = misc.reindex(parcels.zone_id, buildings.parcel_id) - - # just do random assignment weighted by job spaces - we'll then - # fill in the job_spaces if overfilled in the next step (code - # has existed in urbansim for a while) - for taz, cnt in df.groupby('taz').size().iteritems(): - - potential_add_locations = buildings.non_residential_sqft[ - (zone_id == taz) & - (buildings.non_residential_sqft > 0)] - - if len(potential_add_locations) == 0: - # if no non-res buildings, put jobs in res buildings - potential_add_locations = buildings.building_sqft[ - zone_id == taz] - - weights = potential_add_locations / potential_add_locations.sum() - - if len(potential_add_locations) > 0: - buildings_ids = potential_add_locations.sample( - cnt, replace=True, weights=weights) - - df["building_id"][df.taz == taz] = buildings_ids.index.values - - else: - # no locations for jobs; needs to be dealt with on the data side - print("ERROR in TAZ {}: {} jobs, {} potential locations".format( - taz, cnt, len(potential_add_locations))) - - s = zone_id.loc[df.building_id].value_counts() - # assert that we at least got the total employment right after assignment - # 07/27/2020 ET: re-enabling this assertion - # see: https://github.com/BayAreaMetro/bayarea_urbansim/issues/199 - assert_series_equal(baseyear_taz_controls.emp_tot, s) - print("Jobs to assign: {}".format(baseyear_taz_controls.emp_tot.sum())) - print("Jobs assigned: {}".format(s.sum())) - - return df - - -@orca.step() -def move_jobs_from_portola_to_san_mateo_county(parcels, buildings, jobs_df): - # need to move jobs from portola valley to san mateo county - NUM_IN_PORTOLA = 1500 - - juris = misc.reindex(parcels.juris, misc.reindex(buildings.parcel_id, jobs_df.building_id)) - - # find jobs in portols valley to move - portola = jobs_df[juris == "Portola Valley"] - move = portola.sample(len(portola) - NUM_IN_PORTOLA) - - # find places in san mateo to which to move them - san_mateo = jobs_df[juris == "San Mateo County"] - move_to = san_mateo.sample(len(move)) - - jobs_df.loc[move.index, "building_id"] = move_to.building_id.values - - return jobs_df - - -@orca.step() -def preproc_jobs(store, baseyear_taz_controls, mapping, parcels): - buildings = store['buildings'] - - jobs = allocate_jobs(baseyear_taz_controls, mapping, buildings, parcels) - jobs = move_jobs_from_portola_to_san_mateo_county(parcels, buildings, jobs) - store['jobs_preproc'] = jobs - - -@orca.step() -def preproc_households(store): - - df = store['households'] - - df['tenure'] = df.hownrent.map({1: 'own', 2: 'rent'}) - - # need to keep track of base year income quartiles for use in the - # transition model - even caching doesn't work because when you add - # rows via the transitioning, you automatically clear the cache! - # this is pretty nasty and unfortunate - df["base_income_quartile"] = pd.Series(pd.qcut(df.income, 4, labels=False), index=df.index).add(1) - df["base_income_octile"] = pd.Series(pd.qcut(df.income, 8, labels=False), index=df.index).add(1) - - # there are some overrides where we move households around in order - # to match the city totals - in the future we will resynthesize and this - # can go away - this csv is generated by scripts/match_city_totals.py - overrides = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/edits/household_building_id_overrides.csv"), - index_col="household_id").building_id - df.loc[overrides.index, "building_id"] = overrides.values - - # turns out we need 4 more households - new_households = df.loc[[1132542, 1306618, 950630, 886585]].reset_index() - # keep unique index - new_households.index += pd.Series(df.index).max() + 1 - df = df.append(new_households) - - store['households_preproc'] = df - - -@orca.step() -def correct_baseyear_vacancies(buildings, parcels, jobs, store): - # sonoma county has too much vacancy in the buildings so we're - # going to lower it a bit to match job totals - I'm doing it here - # as opposed to in datasources as it requires registered orca - # variables - - ''' - These are the original vacancies - Alameda 0.607865 - Contra Costa 0.464277 - Marin 0.326655 - Napa 0.427900 - San Francisco 0.714938 - San Mateo 0.285090 - Santa Clara 0.368031 - Solano 0.383663 - Sonoma 0.434263 - ''' - - # get buildings by county - buildings_county = misc.reindex(parcels.county, buildings.parcel_id) - buildings_juris = misc.reindex(parcels.juris, buildings.parcel_id) - - # this is the maximum vacancy you can have any a building so it NOT the - # same thing as setting the vacancy for the entire county - SURPLUS_VACANCY_COUNTY = buildings_county.map({ - "Alameda": .42, - "Contra Costa": .57, - "Marin": .28, - "Napa": .7, - "San Francisco": .08, - "San Mateo": .4, - "Santa Clara": .32, - "Solano": .53, - "Sonoma": .4 - }).fillna(.2) - - SURPLUS_VACANCY_JURIS = buildings_juris.map({ - "Berkeley": .65, - "Atherton": 0.05, - "Belvedere": 0, - "Corte Madera": 0, - "Cupertino": .1, - "Healdsburg": 0, - "Larkspur": 0, - "Los Altos Hills": 0, - "Los Gatos": 0, - "Monte Sereno": 0, - "Piedmont": 0, - "Portola Valley": 0, - "Ross": 0, - "San Anselmo": 0, - "Saratoga": 0, - "Woodside": 0, - "Alameda": .2 - }) - - SURPLUS_VACANCY = pd.DataFrame([ - SURPLUS_VACANCY_COUNTY, SURPLUS_VACANCY_JURIS]).min() - - # count of jobs by building - job_counts_by_building = jobs.building_id.value_counts().\ - reindex(buildings.index).fillna(0) - - # with SURPLUS_VACANCY vacancy - job_counts_by_building_surplus = \ - (job_counts_by_building * (SURPLUS_VACANCY+1)).astype('int') - - # min of job spaces and vacancy - correct_job_spaces = pd.DataFrame([ - job_counts_by_building_surplus, buildings.job_spaces]).min() - - # convert back to non res sqft because job spaces is computed - correct_non_res_sqft = correct_job_spaces * buildings.sqft_per_job - - buildings.update_col("non_residential_sqft", correct_non_res_sqft) - - jobs_county = misc.reindex(buildings_county, jobs.building_id) - - print("Vacancy rate by county:\n", - buildings.job_spaces.groupby(buildings_county).sum() / - jobs_county.value_counts() - 1.0) - - jobs_juris = misc.reindex(buildings_juris, jobs.building_id) - - s = buildings.job_spaces.groupby(buildings_juris).sum() / \ - jobs_juris.value_counts() - 1.0 - print("Vacancy rate by juris:\n", s.to_string()) - - return buildings - - -@orca.step() -def preproc_buildings(store, parcels, manual_edits): - # drop columns we don't needed - df = df.drop(['development_type_id', 'improvement_value', - 'sqft_per_unit', 'nonres_rent_per_sqft', - 'res_price_per_sqft', - 'redfin_home_type', 'costar_property_type' - 'costar_rent'], axis=1) - - # set default redfin sale year to 2012 - df["redfin_sale_year"] = df.redfin_sale_year.fillna(2012) - - # this runs after the others because it needs access to orca-assigned - # columns - in particular is needs access to the non-residential sqft and - # job spaces columns - orca.run(["correct_baseyear_vacancies"]) - - -@orca.step() -def baseline_data_checks(store): - # TODO - - # tests to make sure our baseline data edits worked as expected - - # spot check we match controls for jobs at the zonal level - - # spot check portola has 1500 jobs - - # check manual edits are applied - - # check deed restricted units match totals - - # check res units >= households - - # check job spaces >= jobs - pass \ No newline at end of file From 6e118da0b973befdb97408650a1ee82bf8a8c1d2 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Wed, 19 Jul 2023 14:45:27 -0700 Subject: [PATCH 19/49] move residential unit assignment to BASIS --- baus/ual.py | 58 ++--------------------------------------------------- 1 file changed, 2 insertions(+), 56 deletions(-) diff --git a/baus/ual.py b/baus/ual.py index 5f86d4243..a803c7411 100644 --- a/baus/ual.py +++ b/baus/ual.py @@ -20,54 +20,6 @@ # ############################################################################### -def match_households_to_units(households, residential_units): - """ - This initialization step adds a 'unit_id' to the households table and - populates it based on existing assignments of households to buildings. - This also allows us to add a 'vacant_units' count to the residential_units - table. FSF note: this won't work if there are more households in a - building than there are units in that building - make sure not to have - overfull buildings. - - Data expectations - ----------------- - - 'households' table has NO column 'unit_id' - - 'households' table has column 'building_id' (int, '-1'-filled, - corresponds to index of 'buildings' table) - - 'residential_units' table has an index that serves as its id, - and following columns: - - 'building_id' (int, non-missing, corresponds to index of - 'buildings' table) - - 'unit_num' (int, non-missing, unique within building) - - Results - ------- - - adds following column to 'households' table: - - 'unit_id' (int, '-1'-filled, corresponds to index of - 'residential_units' table) - """ - units = residential_units - hh = households - - # This code block is from Fletcher - unit_lookup = units.reset_index().set_index(['building_id', 'unit_num']) - hh = hh.sort_values(by=['building_id'], ascending=True) - - building_counts = hh.building_id.value_counts().sort_index() - hh['unit_num'] = np.concatenate( - [np.arange(i) for i in building_counts.values]) - - unplaced = hh[hh.building_id == -1].index - placed = hh[hh.building_id != -1].index - - indexes = [tuple(t) for t in - hh.loc[placed, ['building_id', 'unit_num']].values] - - hh.loc[placed, 'unit_id'] = unit_lookup.loc[indexes].unit_id.values - hh.loc[unplaced, 'unit_id'] = -1 - - return hh - def assign_tenure_to_units(residential_units, households): """ @@ -119,17 +71,11 @@ def assign_tenure_to_units(residential_units, households): @orca.step() def initialize_residential_units(residential_units): - units = residential_units - - # put households into units based on the building id - households = match_households_to_units(households, units) - - # then assign tenure to units based on the households in them + # assign tenure to units based on the households in them units = assign_tenure_to_units(units, households) # write to the hdfstore - store['households_preproc'] = households - + store['residential_units'] = units From 7cc2f44a416fe47488b3a0b8a084d2fced5da683 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Thu, 20 Jul 2023 07:19:44 -0700 Subject: [PATCH 20/49] fully remove paths.yaml, mapping.yaml, and update datastore --- baus/datasources.py | 64 +++++-------------- baus/models.py | 10 +-- baus/summaries.py | 2 +- baus/ual.py | 4 +- baus/validation.py | 8 +-- baus/variables.py | 6 +- configs/developer/developer_settings.yaml | 75 +++++++++++++++++++++++ 7 files changed, 104 insertions(+), 65 deletions(-) diff --git a/baus/datasources.py b/baus/datasources.py index 021018df6..db84c0521 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -35,12 +35,6 @@ def outputs_dir(run_setup): return run_setup['outputs_dir'] -@orca.injectable('paths', cache=True) -def paths(): - with open(os.path.join(misc.configs_dir(), "paths.yaml")) as f: - return yaml.load(f) - - @orca.injectable('accessibility_settings', cache=True) def accessibility_settings(): with open(os.path.join(misc.configs_dir(), "accessibility/accessibility_settings.yaml")) as f: @@ -113,12 +107,6 @@ def preservation(): return yaml.load(f) -@orca.injectable('mapping', cache=True) -def mapping(): - with open(os.path.join(misc.configs_dir(), "mapping.yaml")) as f: - return yaml.load(f) - - @orca.injectable('cost_shifters', cache=True) def cost_shifters(): with open(os.path.join(misc.configs_dir(), "adjusters/cost_shifters.yaml")) as f: @@ -142,15 +130,15 @@ def price_settings(): # this just adds some of the BAUS settings to a master "settings", since the urbansim code looks for them there @orca.injectable("settings") -def settings(mapping, transition_relocation_settings): - settings = mapping.copy() +def settings(developer_settings, transition_relocation_settings): + settings = developer_settings.copy() settings.update(transition_relocation_settings) return settings @orca.injectable("building_type_map") -def building_type_map(mapping): - return mapping["building_type_map"] +def building_type_map(developer_settings): + return developer_settings["building_type_map"] @orca.injectable('year') @@ -174,8 +162,8 @@ def final_year(): @orca.injectable(cache=True) -def store(paths): - return pd.HDFStore(os.path.join(orca.get_injectable("inputs_dir"), paths["store"])) +def store(): + return pd.HDFStore(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/data_store.h5")) @orca.injectable(cache=True) @@ -230,24 +218,6 @@ def building_sqft_per_job(developer_settings): return developer_settings['building_sqft_per_job'] -@orca.step() -def fetch_from_s3(paths): - import boto - # fetch files from s3 based on config in settings.yaml - s3_settings = paths["s3_settings"] - - conn = boto.connect_s3() - bucket = conn.get_bucket(s3_settings["bucket"], validate=False) - - for file in s3_settings["files"]: - file = os.path.join("data", file) - if os.path.exists(file): - continue - print("Downloading " + file) - key = bucket.get_key(file, validate=False) - key.get_contents_to_filename(file) - - # key locations in the Bay Area for use as attractions in the models @orca.table(cache=True) def landmarks(): @@ -256,13 +226,7 @@ def landmarks(): @orca.table(cache=True) -def baseyear_taz_controls(): - return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/baseyear_taz_controls.csv"), - dtype={'taz1454': np.int64}, index_col="taz1454") - - -@orca.table(cache=True) -def base_year_summary_taz(mapping): +def base_year_summary_taz(): df = pd.read_csv(os.path.join('output', 'baseyear_taz_summaries_2010.csv'), dtype={'taz1454': np.int64}, index_col="taz_tm1") return df @@ -376,11 +340,11 @@ def tm1_tm2_maz_forecast_inputs(tm1_tm2_regional_demographic_forecast): @orca.table(cache=True) -def zoning_strategy(growth_geographies, mapping): +def zoning_strategy(growth_geographies, developer_settings): strategy_zoning = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), 'plan_strategies/zoning_mods.csv')) - for k in mapping["building_type_map"].keys(): + for k in developer_settings["building_type_map"].keys(): strategy_zoning[k] = np.nan def add_drop_helper(col, val): @@ -401,7 +365,7 @@ def add_drop_helper(col, val): @orca.table(cache=True) def parcels(): - df = os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/parcels_v0c.csv") + df = store['parcels'] return df.set_index("parcel_id") @@ -519,22 +483,22 @@ def dev_pipeline_strategy_projects(run_setup, development_projects): @orca.table(cache=True) def jobs(): - return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/jobs.csv") + return store['jobs'] @orca.table(cache=True) def households(): - return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/households.csv") + return store['households'] @orca.table(cache=True) def buildings(): - return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/buildings_v0c.csv") + return store['buildings'] @orca.table(cache=True) def residential_units(): - return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/residential_units_v0c.csv") + return store['residential_units'] @orca.table(cache=True) diff --git a/baus/models.py b/baus/models.py index 8459e25b7..c18c9330e 100644 --- a/baus/models.py +++ b/baus/models.py @@ -274,8 +274,8 @@ def household_relocation(households, household_relocation_rates, run_setup, stat # this deviates from the step in urbansim_defaults when there are multiple projects on a parcel: # instead of redeveloping the parcel each time, it adds each building to the parcel @orca.step() -def scheduled_development_events(buildings, development_projects, demolish_events, summary, year, parcels, mapping, years_per_iter, - growth_geographies, building_sqft_per_job, static_parcels, base_year, run_setup): +def scheduled_development_events(buildings, development_projects, demolish_events, summary, year, parcels, developer_settings, + years_per_iter, growth_geographies, building_sqft_per_job, static_parcels, base_year, run_setup): # first demolish # grab projects from the simulation year and previous four years, except for 2015 which pulls 2015-2010 projects if year == (base_year + years_per_iter): @@ -306,7 +306,7 @@ def scheduled_development_events(buildings, development_projects, demolish_event new_buildings = utils.scheduled_development_events(buildings, dps, remove_developed_buildings=False, unplace_agents=['households', 'jobs']) - new_buildings["form"] = new_buildings.building_type.map(mapping['building_type_map']).str.lower() + new_buildings["form"] = new_buildings.building_type.map(developer_settings['building_type_map']).str.lower() new_buildings["job_spaces"] = new_buildings.non_residential_sqft / new_buildings.building_type.fillna("OF").map(building_sqft_per_job) new_buildings["job_spaces"] = new_buildings.job_spaces.fillna(0).astype('int') new_buildings["SDEM"] = True @@ -354,7 +354,7 @@ def supply_and_demand_multiplier_func(demand, supply): # specific building type @orca.injectable(autocall=False) def form_to_btype_func(building): - mapping = orca.get_injectable('mapping') + developer_settings = orca.get_injectable('developer_settings') form = building.form dua = building.residential_units / (building.parcel_size / 43560.0) # precise mapping of form to building type for residential @@ -364,7 +364,7 @@ def form_to_btype_func(building): elif dua < 32: return "HT" return "HM" - return mapping["form_to_btype"][form][0] + return developer_settings["form_to_btype"][form][0] @orca.injectable(autocall=False) diff --git a/baus/summaries.py b/baus/summaries.py index 0ca6ed012..d680f8bd1 100644 --- a/baus/summaries.py +++ b/baus/summaries.py @@ -43,7 +43,7 @@ def write(s): @orca.step() def topsheet(households, jobs, buildings, parcels, zones, year, run_number, parcels_zoning_calculations, - summary, parcels_geography, new_tpp_id, residential_units, mapping, travel_model_zones): + summary, parcels_geography, new_tpp_id, residential_units, travel_model_zones): hh_by_subregion = misc.reindex(travel_model_zones.subregion, households.parcel_id).value_counts() diff --git a/baus/ual.py b/baus/ual.py index a803c7411..fe4503e7a 100644 --- a/baus/ual.py +++ b/baus/ual.py @@ -69,10 +69,10 @@ def assign_tenure_to_units(residential_units, households): @orca.step() -def initialize_residential_units(residential_units): +def initialize_residential_units(residential_units, households): # assign tenure to units based on the households in them - units = assign_tenure_to_units(units, households) + units = assign_tenure_to_units(residential_units, households) # write to the hdfstore store['residential_units'] = units diff --git a/baus/validation.py b/baus/validation.py index 44d9dcc03..f78af707a 100644 --- a/baus/validation.py +++ b/baus/validation.py @@ -34,13 +34,13 @@ def check_household_controls(households, household_controls, year): # make sure the employment controls are currently being matched -def check_job_controls(jobs, employment_controls, year, mapping): +def check_job_controls(jobs, employment_controls, year, developer_settings): print("Check job controls") current_employment_controls = employment_controls.local.loc[year] current_employment_controls = current_employment_controls.\ set_index("empsix_id").number_of_jobs - empsix_map = mapping["empsix_name_to_id"] + empsix_map = developer_settings["empsix_name_to_id"] current_counts = jobs.empsix.map(empsix_map).value_counts() assert_series_equal( @@ -109,14 +109,14 @@ def check_unit_ids_match_building_ids(households, residential_units): @orca.step() def simulation_validation( parcels, buildings, households, jobs, residential_units, year, - household_controls, employment_controls, mapping): + household_controls, employment_controls, developer_settings): # this does a save and restore state for debugging # d = save_and_restore_state(locals()) # for k in d.keys(): # locals()[k].local = d[k] - check_job_controls(jobs, employment_controls, year, mapping) + check_job_controls(jobs, employment_controls, year, developer_settings) check_household_controls(households, household_controls, year) diff --git a/baus/variables.py b/baus/variables.py index 2ca9041a9..85bd7e2d9 100644 --- a/baus/variables.py +++ b/baus/variables.py @@ -560,8 +560,8 @@ def parcel_average_price(use, quantile=.5): @orca.injectable("parcel_is_allowed_func", autocall=False) def parcel_is_allowed(form): zoning_adjusters = orca.get_injectable("zoning_adjusters") - mapping = orca.get_injectable("mapping") - form_to_btype = mapping["form_to_btype"] + developer_settings = orca.get_injectable("developer_settings") + form_to_btype = developer_settings["form_to_btype"] # we have zoning by building type but want # to know if specific forms are allowed @@ -601,7 +601,7 @@ def first_building_type(buildings): @orca.injectable(autocall=False) def parcel_first_building_type_is(form): - form_to_btype = orca.get_injectable('mapping')["form_to_btype"] + form_to_btype = orca.get_injectable('developer_settings')["form_to_btype"] parcels = orca.get_table('parcels') return parcels.first_building_type.isin(form_to_btype[form]) diff --git a/configs/developer/developer_settings.yaml b/configs/developer/developer_settings.yaml index 60f53e36c..c365c433f 100644 --- a/configs/developer/developer_settings.yaml +++ b/configs/developer/developer_settings.yaml @@ -130,6 +130,81 @@ form_to_btype: - MR +# this maps Synthicity's 25 employment categories to + +# the six major employment categories traditionally + +# used by MTC and ABAG for use in output to the Travel Model + +naics_to_empsix: + + - 10: AGREMPN + + - 11: AGREMPN + + - 21: AGREMPN + + - 22: MWTEMPN + + - 23: OTHEMPN + + - 31: MWTEMPN + + - 3133: MWTEMPN + + - 32: MWTEMPN + + - 33: MWTEMPN + + - 42: MWTEMPN + + - 44: RETEMPN + + - 4445: RETEMPN + + - 45: RETEMPN + + - 48: MWTEMPN + + - 4849: MWTEMPN + + - 49: MWTEMPN + + - 51: OTHEMPN + + - 52: FPSEMPN + + - 53: FPSEMPN + + - 54: FPSEMPN + + - 55: FPSEMPN + + - 56: FPSEMPN + + - 61: HEREMPN + + - 62: HEREMPN + + - 71: HEREMPN + + - 72: HEREMPN + + - 81: OTHEMPN + + - 91: OTHEMPN + + - 92: OTHEMPN + + - 99: OTHEMPN + + - 3133: MWTEMPN + + - 4445: RETEMPN + + - 4849: MWTEMPN + + # convert square meters to square feet parcel_size_factor: 10.764 From 6c977a090b8177a41ae28b86c6f453eb90192ee0 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Thu, 20 Jul 2023 07:41:34 -0700 Subject: [PATCH 21/49] consolidate years variables --- baus.py | 19 ++++++++++--------- baus/datasources.py | 20 -------------------- 2 files changed, 10 insertions(+), 29 deletions(-) diff --git a/baus.py b/baus.py index 38db6ae12..042cff315 100644 --- a/baus.py +++ b/baus.py @@ -34,7 +34,7 @@ COMPARE_TO_NO_PROJECT = True NO_PROJECT = 611 -IN_YEAR, OUT_YEAR = 2010, 2050 +IN_YEAR, OUT_YEAR = 2020, 2050 COMPARE_AGAINST_LAST_KNOWN_GOOD = False LAST_KNOWN_GOOD_RUN = 182 @@ -44,8 +44,13 @@ orca.add_injectable("base_year", IN_YEAR) +orca.add_injectable("final_year", OUT_YEAR) + +orca.add_injectable("year", orca.get_injectable("iter_var")) + orca.add_injectable("slack_enabled", SLACK) + parser = argparse.ArgumentParser(description='Run UrbanSim models.') parser.add_argument( @@ -376,13 +381,9 @@ def get_baseyear_models(): def run_models(MODE): - elif MODE == "fetch_data": - - orca.run(["fetch_from_s3"]) - - elif MODE == "debug": + if MODE == "debug": - orca.run(["simulation_validation"], [2010]) + orca.run(["simulation_validation"], [2020]) elif MODE == "simulation": @@ -411,7 +412,7 @@ def run_models(MODE): "hlcm_estimate", # household lcm "elcm_estimate", # employment lcm - ], iter_vars=[2010]) + ], iter_vars=[2020]) # Estimation steps ''' @@ -440,7 +441,7 @@ def run_models(MODE): "price_vars", "subsidized_residential_feasibility" - ], iter_vars=[2010]) + ], iter_vars=[2020]) # the whole point of this is to get the feasibility dataframe # for debugging diff --git a/baus/datasources.py b/baus/datasources.py index db84c0521..1781ccb46 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -141,26 +141,6 @@ def building_type_map(developer_settings): return developer_settings["building_type_map"] -@orca.injectable('year') -def year(): - try: - return orca.get_injectable("iter_var") - except Exception as e: - pass - # if we're not running simulation, return base year - return 2014 - - -@orca.injectable() -def initial_year(): - return 2010 - - -@orca.injectable() -def final_year(): - return 2050 - - @orca.injectable(cache=True) def store(): return pd.HDFStore(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/data_store.h5")) From 9bcddb1283abcf608eaff139c797a9ca80ab87f1 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Thu, 20 Jul 2023 11:56:10 -0700 Subject: [PATCH 22/49] update the model base year in the summaries --- baus/datasources.py | 6 ------ baus/summaries.py | 20 ++++++++++---------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/baus/datasources.py b/baus/datasources.py index 1781ccb46..d01ed3b50 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -205,12 +205,6 @@ def landmarks(): index_col="name") -@orca.table(cache=True) -def base_year_summary_taz(): - df = pd.read_csv(os.path.join('output', 'baseyear_taz_summaries_2010.csv'), dtype={'taz1454': np.int64}, index_col="taz_tm1") - return df - - # non-residential rent data @orca.table(cache=True) def costar(parcels): diff --git a/baus/summaries.py b/baus/summaries.py index d680f8bd1..043de181b 100644 --- a/baus/summaries.py +++ b/baus/summaries.py @@ -73,7 +73,7 @@ def topsheet(households, jobs, buildings, parcels, zones, year, run_number, parc jobs_by_inpda = jobs_df.pda_id.notnull().value_counts() jobs_by_intra = jobs_df.tra_id.notnull().value_counts() - if year == 2010: + if year == 2020: # save some info for computing growth measures orca.add_injectable("base_year_measures", { "hh_by_subregion": hh_by_subregion, @@ -89,7 +89,7 @@ def topsheet(households, jobs, buildings, parcels, zones, year, run_number, parc try: base_year_measures = orca.get_injectable("base_year_measures") except Exception as e: - # the base year measures don't exist - we didn't run year 2010 + # the base year measures don't exist - we didn't run year 2020 # this can happen when we skip the first year, usually because # we don't want to waste time doing so return @@ -279,10 +279,10 @@ def diagnostic_output(households, buildings, parcels, taz, jobs, developer_setti def geographic_summary(parcels, households, jobs, buildings, run_setup, run_number, year, summary, final_year, travel_model_zones): # using the following conditional b/c `year` is used to pull a column # from a csv based on a string of the year in add_population() - # and in add_employment() and 2009 is the - # 'base'/pre-simulation year, as is the 2010 value in the csv. - if year == 2009: - year = 2010 + # and in add_employment() and 2019 is the + # 'base'/pre-simulation year, as is the 2020 value in the csv. + if year == 2019: + year = 2020 base = True else: base = False @@ -315,7 +315,7 @@ def geographic_summary(parcels, households, jobs, buildings, run_setup, run_numb # # append Draft/Final Blueprint strategy geographis # geographies.extend(['pda_id', 'juris_tra', 'juris_sesit', 'juris_ppa']) - if year in [2010, 2015, 2020, 2025, 2030, 2035, 2040, 2045, 2050]: + if year in [2020, 2025, 2030, 2035, 2040, 2045, 2050]: for geography in geographies: @@ -514,7 +514,7 @@ def building_summary(parcels, run_number, year, def parcel_summary(parcels, buildings, households, jobs, run_number, year, parcels_zoning_calculations, initial_year, final_year, parcels_geography): - # if year not in [2010, 2015, 2035, 2050]: + # if year not in [2020, 2035, 2050]: # return df = parcels.to_frame([ @@ -565,7 +565,7 @@ def parcel_summary(parcels, buildings, households, jobs, run_number, year, parce # if year == final_year: print('year printed for debug: {}'.format(year)) - if year not in [2010, 2015]: + if not year == 2020: print('calculate diff for year {}'.format(year)) # do diff with initial year @@ -587,7 +587,7 @@ def parcel_summary(parcels, buildings, households, jobs, run_number, year, parce # if year == final_year: print('year printed for debug: {}'.format(year)) - if year not in [2010, 2015]: + if not year == 2020: print('calculate diff for year {}'.format(year)) baseyear = 2015 From 1cb57c5ae5673c5a3e6a4210903dccfcc8f2a19c Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Thu, 20 Jul 2023 12:40:34 -0700 Subject: [PATCH 23/49] update additional code using the model base year --- baus/models.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/baus/models.py b/baus/models.py index c18c9330e..288632b64 100644 --- a/baus/models.py +++ b/baus/models.py @@ -277,7 +277,7 @@ def household_relocation(households, household_relocation_rates, run_setup, stat def scheduled_development_events(buildings, development_projects, demolish_events, summary, year, parcels, developer_settings, years_per_iter, growth_geographies, building_sqft_per_job, static_parcels, base_year, run_setup): # first demolish - # grab projects from the simulation year and previous four years, except for 2015 which pulls 2015-2010 projects + # grab projects from the simulation year and previous four years, except for 2025 which pulls 2025-2020 projects if year == (base_year + years_per_iter): demolish = demolish_events.to_frame().query("%d <= year_built <= %d" % (year - years_per_iter, year)) else: @@ -294,7 +294,7 @@ def scheduled_development_events(buildings, development_projects, demolish_event print("Demolished %d buildings on parcels with pipeline projects being built" % (l1 - len(buildings))) # then build - # grab projects from the simulation year and previous four years, except for 2015 which pulls 2015-2010 projects + # grab projects from the simulation year and previous four years, except for 2025 which pulls 2025-2020 projects if year == (base_year + years_per_iter): dps = development_projects.to_frame().query("%d <= year_built <= %d" % (year - years_per_iter, year)) else: @@ -477,15 +477,15 @@ def residential_developer(feasibility, households, buildings, parcels, year, # and development is lumpy current_total = parcels.total_residential_units[ - (juris_name == juris) & (parcels.newest_building >= 2010)]\ + (juris_name == juris) & (parcels.newest_building >= 2020)]\ .sum() - target = (year - 2010 + 1) * limit - current_total + target = (year - 2020 + 1) * limit - current_total # make sure we don't overshoot the total development of the limit # for the horizon year - for instance, in Half Moon Bay we have # a very low limit and a single development in a far out year can # easily build over the limit for the total simulation - max_target = (final_year - 2010 + 1) * limit - current_total + max_target = (final_year - 2020 + 1) * limit - current_total if target <= 0: continue From 387e23fc711bcef8fdf032277690c7acf9f688b7 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Thu, 20 Jul 2023 13:24:31 -0700 Subject: [PATCH 24/49] remove hardcoding of initial year and final year in model --- baus/models.py | 25 +++++++++++++------------ baus/summaries.py | 36 +++++++++++++++++++----------------- 2 files changed, 32 insertions(+), 29 deletions(-) diff --git a/baus/models.py b/baus/models.py index 288632b64..aa9a64466 100644 --- a/baus/models.py +++ b/baus/models.py @@ -277,7 +277,8 @@ def household_relocation(households, household_relocation_rates, run_setup, stat def scheduled_development_events(buildings, development_projects, demolish_events, summary, year, parcels, developer_settings, years_per_iter, growth_geographies, building_sqft_per_job, static_parcels, base_year, run_setup): # first demolish - # grab projects from the simulation year and previous four years, except for 2025 which pulls 2025-2020 projects + # grab projects from the simulation year and previous four years, except in the first forecast year + # which also pulls in the base year projects if year == (base_year + years_per_iter): demolish = demolish_events.to_frame().query("%d <= year_built <= %d" % (year - years_per_iter, year)) else: @@ -293,8 +294,8 @@ def scheduled_development_events(buildings, development_projects, demolish_event buildings = orca.get_table("buildings") print("Demolished %d buildings on parcels with pipeline projects being built" % (l1 - len(buildings))) - # then build - # grab projects from the simulation year and previous four years, except for 2025 which pulls 2025-2020 projects + # grab projects from the simulation year and previous four years, except in the first forecast year + # which also pulls in the base year projects if year == (base_year + years_per_iter): dps = development_projects.to_frame().query("%d <= year_built <= %d" % (year - years_per_iter, year)) else: @@ -444,7 +445,7 @@ def alt_feasibility(parcels, developer_settings, def residential_developer(feasibility, households, buildings, parcels, year, developer_settings, summary, form_to_btype_func, add_extra_columns_func, parcels_geography, - limits_settings, final_year, run_setup): + limits_settings, final_year, run_setup, base_year): kwargs = developer_settings['residential_developer'] @@ -477,15 +478,15 @@ def residential_developer(feasibility, households, buildings, parcels, year, # and development is lumpy current_total = parcels.total_residential_units[ - (juris_name == juris) & (parcels.newest_building >= 2020)]\ + (juris_name == juris) & (parcels.newest_building >= base_year)]\ .sum() - target = (year - 2020 + 1) * limit - current_total + target = (year - base_year + 1) * limit - current_total # make sure we don't overshoot the total development of the limit # for the horizon year - for instance, in Half Moon Bay we have # a very low limit and a single development in a far out year can # easily build over the limit for the total simulation - max_target = (final_year - 2020 + 1) * limit - current_total + max_target = (final_year - base_year + 1) * limit - current_total if target <= 0: continue @@ -685,7 +686,7 @@ def office_developer(feasibility, jobs, buildings, parcels, year, # now apply limits - limits are assumed to be yearly, apply to an # entire jurisdiction and be in terms of residential_units or # job_spaces - if year > 2015 and typ in limits_settings: + if year > 2025 and typ in limits_settings: juris_name = parcels_geography.juris_name.\ reindex(parcels.index).fillna('Other') @@ -700,9 +701,9 @@ def office_developer(feasibility, jobs, buildings, parcels, year, current_total = parcels.total_job_spaces[ (juris_name == juris) & - (parcels.newest_building > 2015)].sum() + (parcels.newest_building > 2025)].sum() - target = (year - 2015 + 1) * limit - current_total + target = (year - 2025 + 1) * limit - current_total if target <= 0: print("Already met target for juris = %s" % juris) @@ -862,13 +863,13 @@ def proportional_job_allocation(): # get jobs in those buildings all_jobs = orca.get_table("jobs").local jobs = all_jobs[ - all_jobs.building_id.isin(buildings.query("year_built <= 2015").index)] + all_jobs.building_id.isin(buildings.query("year_built <= 2025").index)] # get job distribution by sector for this parcel job_dist = jobs.empsix.value_counts() # only add jobs to new buildings records - for index, building in buildings.query("year_built > 2015").iterrows(): + for index, building in buildings.query("year_built > 2025").iterrows(): num_new_jobs = building.job_spaces - len( all_jobs.query("building_id == %d" % index)) diff --git a/baus/summaries.py b/baus/summaries.py index 043de181b..729abc37a 100644 --- a/baus/summaries.py +++ b/baus/summaries.py @@ -43,7 +43,7 @@ def write(s): @orca.step() def topsheet(households, jobs, buildings, parcels, zones, year, run_number, parcels_zoning_calculations, - summary, parcels_geography, new_tpp_id, residential_units, travel_model_zones): + summary, parcels_geography, new_tpp_id, residential_units, travel_model_zones, base_year): hh_by_subregion = misc.reindex(travel_model_zones.subregion, households.parcel_id).value_counts() @@ -73,7 +73,7 @@ def topsheet(households, jobs, buildings, parcels, zones, year, run_number, parc jobs_by_inpda = jobs_df.pda_id.notnull().value_counts() jobs_by_intra = jobs_df.tra_id.notnull().value_counts() - if year == 2020: + if year == base_year: # save some info for computing growth measures orca.add_injectable("base_year_measures", { "hh_by_subregion": hh_by_subregion, @@ -276,13 +276,14 @@ def diagnostic_output(households, buildings, parcels, taz, jobs, developer_setti @orca.step() -def geographic_summary(parcels, households, jobs, buildings, run_setup, run_number, year, summary, final_year, travel_model_zones): +def geographic_summary(parcels, households, jobs, buildings, run_setup, run_number, year, summary, final_year, travel_model_zones, + base_year, final_year): # using the following conditional b/c `year` is used to pull a column # from a csv based on a string of the year in add_population() # and in add_employment() and 2019 is the # 'base'/pre-simulation year, as is the 2020 value in the csv. if year == 2019: - year = 2020 + year = base_year base = True else: base = False @@ -315,7 +316,7 @@ def geographic_summary(parcels, households, jobs, buildings, run_setup, run_numb # # append Draft/Final Blueprint strategy geographis # geographies.extend(['pda_id', 'juris_tra', 'juris_sesit', 'juris_ppa']) - if year in [2020, 2025, 2030, 2035, 2040, 2045, 2050]: + if year in [base_year, 2025, 2030, 2035, 2040, 2045, final_year]: for geography in geographies: @@ -411,7 +412,7 @@ def geographic_summary(parcels, households, jobs, buildings, run_setup, run_numb acct.to_frame().to_csv(fname) if year == final_year: - baseyear = 2015 + baseyear = base_year for geography in geographies: df_base = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "run{}_{}_summaries_{}.csv".\ format(run_number, geography, baseyear))) @@ -422,7 +423,7 @@ def geographic_summary(parcels, households, jobs, buildings, run_setup, run_numb format(run_number, geography)), index = False) # Write Urban Footprint Summary - if year in [2010, 2015, 2020, 2025, 2030, 2035, 2040, 2045, 2050]: + if year in [base_year, 2025, 2030, 2035, 2040, 2045, final_year]: # 02 15 2019 ET: Using perffoot there was no greenfield change # between 2010 and 2050. Joined the parcels to Urbanized_Footprint # instead, which improved the diff. The large majority of greenfield @@ -512,9 +513,9 @@ def building_summary(parcels, run_number, year, @orca.step() def parcel_summary(parcels, buildings, households, jobs, run_number, year, parcels_zoning_calculations, - initial_year, final_year, parcels_geography): + initial_year, final_year, parcels_geography, base_year, final_year): - # if year not in [2020, 2035, 2050]: + # if year not in [base_year, 2035, final_year]: # return df = parcels.to_frame([ @@ -565,7 +566,7 @@ def parcel_summary(parcels, buildings, households, jobs, run_number, year, parce # if year == final_year: print('year printed for debug: {}'.format(year)) - if not year == 2020: + if not year == base_year: print('calculate diff for year {}'.format(year)) # do diff with initial year @@ -587,10 +588,10 @@ def parcel_summary(parcels, buildings, households, jobs, run_number, year, parce # if year == final_year: print('year printed for debug: {}'.format(year)) - if not year == 2020: + if not year == base_year: print('calculate diff for year {}'.format(year)) - baseyear = 2015 + baseyear = base_year df_base = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "run%d_parcel_data_%d.csv" % (run_number, baseyear))) df_final = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "run%d_parcel_data_%d.csv" # % (run_number, final_year))) % (run_number, year))) @@ -836,7 +837,7 @@ def count_acres_with_mask(mask): "run{}_county_summaries_{}.csv").format(run_number, year)) if year == final_year: - baseyear = 2015 + baseyear = base_year df_base = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "run%d_taz_summaries_%d.csv" % (run_number, baseyear))) df_final = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), @@ -860,8 +861,9 @@ def count_acres_with_mask(mask): @orca.step() def travel_model_2_output(parcels, households, jobs, buildings, maz, year, tm2_emp27_employment_shares, run_number, tm1_tm2_maz_forecast_inputs, tm2_taz2_forecast_inputs, tm2_occupation_shares, - tm1_tm2_regional_demographic_forecast, tm1_tm2_regional_controls): - if year not in [2010, 2015, 2020, 2025, 2030, 2035, 2040, 2045, 2050]: + tm1_tm2_regional_demographic_forecast, tm1_tm2_regional_controls, base_year, final_year): + + if year not in [base_year, 2025, 2030, 2035, 2040, 2045, final_year]: # only summarize for years which are multiples of 5 return @@ -1438,7 +1440,7 @@ def write(s): @orca.step() -def hazards_eq_summary(run_setup, run_number, year, households, jobs, parcels, buildings): +def hazards_eq_summary(run_setup, run_number, year, households, jobs, parcels, buildings, final_year): if run_setup['run_eq']: if year == 2035: @@ -1559,7 +1561,7 @@ def write(s): # print out buildings in 2030, 2035, and 2050 so Horizon team can compare # building inventory by TAZ - if year in [2030, 2035, 2050] and eq: + if year in [2030, 2035, final_year] and eq: buildings = buildings.to_frame() buildings_taz = misc.reindex(parcels.zone_id, buildings.parcel_id) From af3dd94c3642b2eb939b0134af8d0035a854dc85 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Mon, 24 Jul 2023 12:41:10 -0700 Subject: [PATCH 25/49] update inputs doc --- docs/input.md | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/docs/input.md b/docs/input.md index 674b11862..9980dedbd 100644 --- a/docs/input.md +++ b/docs/input.md @@ -18,25 +18,22 @@ AccessibilityMarkets_YYY.csv| A travel model output file that incorportates trav mandatoryAccessibilities_YYY.csv| A travel model output file that incorportates travel model run logsums into the forecast, by year. nonMandatoryAccessibilities_YYY.csv| A travel model output file that incorportates travel model run logsums into the forecast, by year.   -## basis_inputs (under construction) +## basis_inputs ### crosswalks **name**|**use** -----|-----| -parcel_to_maz22.csv| A lookup table from parcels to Travel Model Two MAZs. -parcel_to_taz1454sub.csv| A lookup table from parcels to Travel Model One TAZs. -parcels_geography.csv| A lookup table from parcels to jurisdiction, growth geographies, UGB areas, greenfield areas, and a concatenation of these used to join these geographies zoning_mods.csv, to apply zoning rules within them. +growth_geographies.csv | A mapping of parcels to all growth geographies used in the Plan +travel_model_zones.csv| A mapping of parcels to travel model geographies census_id_to_name.csv| Maps census id from parcels_geography to name so it can be used. -maz_geography| A lookup between MAZ, TAZ2, and county. -maz22_taz1454| A lookup between MAZ and TAZ1. -superdistricts_geography.csv| A map of superdistrict numbers, names, and their subregion. -taz_geography.csv| A lookup between TAZ1, supedisctrict, and county. ### edits **name**|**use** -----|-----| data_edits.yaml| Settings for editing the input data in the model code, e.g. clipping values. -manual_edits.csv| Overrides the current h5 data using the table name, attribute name, and new value, so we don't have to generate a new one each time. -household_building_id_overrides.csv| Moves households to match new city household totals during the data preprocessing. -tpp_id_2016.csv| Updates tpp_ids after changes were made to the ids. +### equity +**name**|**use** +-----|-----| +coc_tracts.csv | Communities of Concern census trats, used to generate model metrics. +urban_displacement_tracts.csv | Urban Displacement Project census tracts, used to generate model metrics. ### existing_policy **name**|**use** -----|-----| @@ -45,22 +42,27 @@ inclusionary.yaml| Base year inclusionary zoning policies in place in jurisdicti ### hazards **name**|**use** -----|-----| -slr_progression.csv| The sea level rise level, for each forecast year. -slr_inundation.csv| The sea level rise level at which each inundation parcel becomes inundated, for each forecast year. Rows marked with "100" are parcels where sea level rise has been mitigated, either through planned projects or a plan strategy. +slr_progression.csv | The sea level rise level in each forecast year. +slr_inundation.csv | The sea level rise level at which each inundation parcel becomes inundated. +slr_committed_mitigation.csv | The sea level rise level at which each inundation parcel becomes inundated, with a column to indicate whether a committed project has been applied to mitigate the sea level rise and prevent indundation. ### parcels_buildings-agents **name**|**use** -----|-----| -bayarea_v3.h5| Base year database of households, jobs, buildings, and parcels. The data is pre-processed in pre-processing.py. -costar.csv| Commercial data from CoStar, including non-residential price to inform the price model. +buildings.csv | A list of buldings in the region and their attributes, collected from parcel assessor's data (or third party processed prcel assessor's data), supplemented with additional datasets such as: CoStar commerical real estate data and Craigslist rental data, and in certain cases imputed to maintain a buildings dataset for the entire region. This data is sometimes pseudo-building data in that the assessor's data often aggregates bulding data on a parcel. +costar.csv | Commercial data from CoStar, including non-residential price to inform the non-residential building price model. +data_store.h5| This is simply the households, jobs, buildings, residential units, and parcels datasets packaged in HDF5 format for faster model runtime. +craisglist.csv| Craigslist data to inform the residential rental price model and model tenure. development_projects.csv| The list of projects that have happened since the base data, or buildings in the development pipeline. This file tends to have more attributes than we use in the model. -deed_restricted_zone_totals.csv| An approximate number of deed restricted units per TAZ to assign randomly within the TAZ. -baseyear_taz_controls.csv| Base year control totals by TAZ, to use for checking and refining inputs. The file includes number of units, vacancy rates, and employment by sector (TODO: add households). -sfbay_craisglist.csv| Craigslist data to inform rental unit information and model tenure. +households.csv | A list of households in the region and select attributes such as household income and tenure used to forecast their locations in the region. The households dataset is generated with a population synthesizer using census household microdata. +institutions.csv | A list of institutions in the region, such as hospitals and schools, which don't follow typical market-based development and location behaviors and are modeled separately. +jobs.csv | A list of households in the region by industry and occupation, used to forecast their locations in the region. THe jobs dataset is built with microdata on job locations. +nodev_sites.csv | A list of parcels which cannot be redeveloped due to site-specific land use constraints. +parcels.csv | A list of parcels in the region. The foundational unit that buildings, households, and jobs act on. +residential_units.csv | A list of residential units in the region, derived from the buildings table without additional unit-level information, but that helps predict prices and model tenure at the unit-level. ### zoning **name**|**use** -----|-----| -zoning_parcels.csv| A lookup table from parcels to zoning_id, zoning area information, and a "nodev" flag (currently all set to 0). -zoning_lookup.csv| The existing zoning for each jurisdiction, assigned to parcels with the "id" field. Fields include the city name, city id, and the name of the zoning. The active attributes are max_dua, max_far, and max_height, all of which must be respected by each development. +boc.csv| A parcel table with each parcel's zoning designation, allowed building types, maximum dwelling units per acre, maximum floor area ratio, and maximum height.   ## plan_strategies **name**|**use** @@ -75,6 +77,7 @@ renter_protections_relocation_rates_overwrites| The rows in this file overwrite telecommute_sqft_per_job_adjusters| These are multipliers which adjust the sqft per job setting by superdistrict by year to represent changes from a telework strategy. (TODO: Disentangle the k-factors and the policy application within this file and sqft_per_job_adjusters.csv. In the meantime, use both files as is done in the PBA50 No Project). vmt_fee_zonecats.csv| This file pairs with the VMT Fee and SB-743 strategies. It provides VMT levels by TAZ1, which map to the corresponding price adjustments in the strategies. zoning_mods.csv| A file which allows you to upzone or downzone. If you enter a value in "dua_up" or "far_up", the model will apply that as the new zoning or maintain the existing zoning if it is higher. If you enter a value in "dua_down" or "far_down", the model will apply that as the zoning or maintain the existing zoning if it is lower. UGBs are also controlled using this file, using zoning changes to enforce them. This file is mapped to parcels using the field "zoningmodcat", which is the concatenated field of growth designations in parcels_geography.csv. +slr_strategy_mitigation.csv | The sea level rise level at which each inundation parcel becomes inundated, with a column to indicate whether a strategy has been applied to mitigate the sea level rise and prevent indundation.   ## regional_controls **name**|**use** From 22809fb7616d680e6a2cc00f1aa3b20994afb081 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Mon, 24 Jul 2023 13:07:19 -0700 Subject: [PATCH 26/49] update the user guide doc --- docs/user_guide.md | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/docs/user_guide.md b/docs/user_guide.md index cc2936f2e..d4c6ebd95 100644 --- a/docs/user_guide.md +++ b/docs/user_guide.md @@ -1,29 +1,33 @@ # User Guide -This User Guide applies to UrbanSim implementation for the Bay Area. Documentation for the UrbanSim framework is available here. +This User Guide applies to UrbanSim implementation for the Bay Area. ## Installation Bay Area UrbanSim is written in Python and runs in a command line environment. It's compatible with Mac, Windows, and Linux, and with Python 2.7 and 3.5+. Python 3 is recommended. - 1. Install the Anaconda Python distribution (not strictly required, but makes things easier and more reliable) 2. Clone this repository -3. Download base data from this Box folder and move the files to `bayarea_urbansim/data/` (ask an MTC contact for access) -4. Clone the MTC urban_data_internal repository to the same location as this repository (ask an MTC contact for access) -5. Create a Python environment with the current dependencies: `conda env create -f baus-env-2020.yml` -6. Activate the environment: `conda activate baus-env-2020` -7. Pre-process the base data: `python baus.py --mode preprocessing` (only needed once) -8. Run the model: `python baus.py` (typical AWS linux run uses `nohup python baus.py -s 25 --disable-slack --random-seed &` which add no hanging up / specifies scenario 25 / disables slack output / turns OFF random seed / puts in background) +3. Create a Python environment with the current dependencies: `conda env create -f baus-env-2020.yml` +4. Activate the environment: `conda activate baus-env-2020` + +## Model Run -More info about the command line arguments: `python baus.py --help` +1. Download the model inputs folder and run_setup.yaml file (ask an MTC contact for access) and create an outputs directory +3. Specify I/O folder locations, model features to enable, and policy configurations in `run_setup.yaml` +4. Run `python baus.py` from the main model directory (more info about the command line arguments: `python baus.py --help`) -Optional visualization tool and Slack messenger: +## Optional visualization tool and Slack messenger * Configure Amazon Web Services (AWS) to get s3 permission (you will need an appropriately configured AWS credentials file from your MTC contact) * Install AWS SDK for Python -- boto3 using `pip install boto3` * Install Slacker to use Slack API using `pip install slacker` (you will need an appropriate slack token to access the slack bot from your MTC contact) * Set environment variable `URBANSIM_SLACK = TRUE` -## File Structure +## Adding documentation to gh-pages -TBD +1. Install the required docs packages with pip: `mkdocs`, `mike`, `mkdocs-autorefs`, `mkdocs-material`, `mkdocstrings[python]*` +2. While in your development branch, edit the mkdocs.yml file located in the repo's root dir and the markdown files located in the `docs` folder +3. From the mode's root dir call `mike deploy [branch_name] [alias=latest] or mike deploy [branch_name]` (if another branch has the alias "latest"), which will create a Github commit in branch "gh-pages". If this is the initial documentation publication of the branch it will create a new folder in the root dir of the "gh-pages" branch using the branch name and also update the "latest" folder otherwise, it will push the updates to both folder "baus_v2" and folder "latest" in branch "gh-pages". +4. Switch to branch "gh-pages", and push the commit to origin. +5. To make a branch's documentation the `main` branch's documentation, merge the branch into main and deploy the main branch. +6. After merging a branch into the main branch and deleting that branch, delete the associated doc using `mike delete [branch_name]` \ No newline at end of file From ce49c96a8b5efbf75454228de8e6c0a703bfb300 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Mon, 24 Jul 2023 14:02:20 -0700 Subject: [PATCH 27/49] Update README.md --- README.md | 29 +++++------------------------ 1 file changed, 5 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index cdd0fb025..8cfa71399 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ -DRAFT Bay Area UrbanSim (BAUS) Implementation +Bay Area UrbanSim (BAUS) Implementation ======= -[![Build Status](https://travis-ci.org/UDST/bayarea_urbansim.svg?branch=master)](https://travis-ci.org/UDST/bayarea_urbansim) +This is the UrbanSim implementation for the Bay Area. Documentation for the UrbanSim framework is available [here](https://udst.github.io/urbansim/). -This is the DRAFT UrbanSim implementation for the Bay Area. Policy documentation for the Bay Area model is available [here](http://data.mtc.ca.gov/bayarea_urbansim/) and documentation for the UrbanSim framework is available [here](https://udst.github.io/urbansim/). +All other information for Bay Area Urbansim is at: http://bayareametro.github.io/bayarea_urbansim/main/ ### Branches * `main` contains the most recent release @@ -11,24 +11,5 @@ This is the DRAFT UrbanSim implementation for the Bay Area. Policy documentation * feature branches contain descriptive names * application branches contain descriptive names -### Installation - -Bay Area UrbanSim is written in Python and runs in a command line environment. It's compatible with Mac, Windows, and Linux, and with Python 2.7 and 3.5+. Python 3 is recommended. - -1. Install the [Anaconda Python](https://www.anaconda.com/products/individual#Downloads) distribution (not strictly required, but makes things easier and more reliable) -2. Clone this repository -3. Download base data from this [Box folder](under construction) and move the files to `` (ask an MTC contact for access) -4. Create a Python environment with the current dependencies: `conda env create -f baus-env-2020.yml` -5. Activate the environment: `conda activate baus-env-2020` -6. Use `run_setup.yaml` to set `inputs` and `outputs` folder paths and configure a model run -7. Run the model: `python baus.py` - -More info about the command line arguments: `python baus.py --help` - -Optional visualization tool and Slack messenger: -* Configure Amazon Web Services (AWS) to get s3 permission (you will need an appropriately configured AWS credentials file from your MTC contact) -* Install AWS SDK for Python -- boto3 using `pip install boto3` -* Install Slacker to use Slack API using `pip install slacker` (you will need an appropriate slack token to access the slack bot from your MTC contact) -* Set environment variable `URBANSIM_SLACK = TRUE` - -For all other information on the code and model application: http://bayareametro.github.io/bayarea_urbansim/main/ \ No newline at end of file +### Integration Testing +[![Build Status](https://travis-ci.org/UDST/bayarea_urbansim.svg?branch=master)](https://travis-ci.org/UDST/bayarea_urbansim) \ No newline at end of file From c33cc23423e1f728744e07cfd6df4fc5e724b596 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Wed, 2 Aug 2023 09:28:33 -0700 Subject: [PATCH 28/49] add v0d datasets --- baus/datasources.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/baus/datasources.py b/baus/datasources.py index 41820b7dd..511e53610 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -293,7 +293,7 @@ def new_tpp_id(): @orca.table(cache=True) def travel_model_zones(): - return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/travel_model_zones_v0c.csv")) + return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/travel_model_zones_v0d.csv")) @orca.table(cache=True) @@ -455,7 +455,7 @@ def accessibilities_segmentation(year, run_setup): # shared between demolish and build tables below def get_dev_projects_table(): df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), - "basis_inputs/parcels_buildings_agents/dev_pipeline_v0c.csv")) + "basis_inputs/parcels_buildings_agents/dev_pipeline_v0d.csv")) df = df.set_index("parcel_id") return df @@ -486,7 +486,7 @@ def development_projects(): @orca.table(cache=True) def dev_pipeline_strategy_projects(run_setup, development_projects): - df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "plan_strategies/dev_pipeline_strategy_projects_v0c.csv")) + df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "plan_strategies/dev_pipeline_strategy_projects_v0d.csv")) df = df.set_index("parcel_id") if run_setup["dev_pipeline_strategy_projects"]: From 7fe979792ab99eb49462f7c928e5ead1d53a50cc Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Wed, 2 Aug 2023 09:42:25 -0700 Subject: [PATCH 29/49] remove base year models since it's not a simulation year --- baus.py | 94 +++------------------------------------------------------ 1 file changed, 4 insertions(+), 90 deletions(-) diff --git a/baus.py b/baus.py index 0bae63024..4ebe839fe 100644 --- a/baus.py +++ b/baus.py @@ -67,9 +67,6 @@ parser.add_argument('-i', action='store_true', dest='interactive', help='enter interactive mode after imports') -parser.add_argument('-k', action='store_true', dest='skip_base_year', - help='skip base year - used for debugging') - parser.add_argument('-y', action='store', dest='out_year', type=int, help='The year to which to run the simulation.') @@ -94,8 +91,6 @@ if options.out_year: OUT_YEAR = options.out_year -SKIP_BASE_YEAR = options.skip_base_year - if options.mode: MODE = options.mode @@ -347,83 +342,6 @@ def get_summary_models(): return summary_models -def get_baseyear_models(): - - baseyear_models = [ - - "slr_inundate", - "slr_remove_dev", - "eq_code_buildings", - "earthquake_demolish", - - "neighborhood_vars", # local accessibility vars - "regional_vars", # regional accessibility vars - - "rsh_simulate", # residential sales hedonic for units - "rrh_simulate", # residential rental hedonic for units - "nrh_simulate", - - # (based on higher of predicted price or rent) - "assign_tenure_to_new_units", - - # uses conditional probabilities - "household_relocation", - "households_transition", - # update building/unit/hh correspondence - "reconcile_unplaced_households", - "jobs_transition", - - # we first put Q1 households only into deed-restricted units, - # then any additional unplaced Q1 households, Q2, Q3, and Q4 - # households are placed in either deed-restricted units or - # market-rate units - "hlcm_owner_lowincome_simulate", - "hlcm_renter_lowincome_simulate", - - # allocate owners to vacant owner-occupied units - "hlcm_owner_simulate", - # allocate renters to vacant rental units - "hlcm_renter_simulate", - - # we have to run the hlcm above before this one - we first want - # to try and put unplaced households into their appropraite - # tenured units and then when that fails, force them to place - # using the code below. - - # force placement of any unplaced households, in terms of - # rent/own, is a noop except in the final simulation year - # 09 11 2020 ET: enabled for all simulation years - "hlcm_owner_simulate_no_unplaced", - "hlcm_owner_lowincome_simulate_no_unplaced", - # this one crashes right no because there are no unplaced, so - # need to fix the crash in urbansim - # 09 11 2020 ET: appears to be working - "hlcm_renter_simulate_no_unplaced", - "hlcm_renter_lowincome_simulate_no_unplaced", - - # update building/unit/hh correspondence - "reconcile_placed_households", - - "elcm_simulate", - - "price_vars" - # "scheduled_development_events" - - ] - - run_setup = orca.get_injectable("run_setup") - - # sea level rise and sea level rise mitigation - if not run_setup["run_slr"]: - baseyear_models.remove("slr_inundate") - baseyear_models.remove("slr_remove_dev") - - # earthquake and earthquake mitigation - if not run_setup["run_eq"]: - baseyear_models.remove("eq_code_buildings") - baseyear_models.remove("earthquake_demolish") - - return baseyear_models def get_baseyear_summary_models(): @@ -432,9 +350,6 @@ def get_baseyear_summary_models(): "simulation_validation", "diagnostic_output", - "hazards_slr_summary", - "hazards_eq_summary", - "parcel_summary", "building_summary", @@ -476,19 +391,18 @@ def run_models(MODE): run_setup = orca.get_injectable("run_setup") # see above for docs on this - if not SKIP_BASE_YEAR: - baseyear_models = get_baseyear_models() - if run_setup["run_summaries"]: - baseyear_models.extend(get_baseyear_summary_models()) - orca.run(baseyear_models, iter_vars=[IN_YEAR]) + if run_setup["run_summaries"]: + orca.run(get_baseyear_summary_models(), iter_vars=[IN_YEAR]) # start the simulation in the next round - only the models above run # for the IN_YEAR years_to_run = range(IN_YEAR+EVERY_NTH_YEAR, OUT_YEAR+1, EVERY_NTH_YEAR) + models = get_simulation_models() if run_setup["run_summaries"]: models.extend(get_summary_models()) + orca.run(models, iter_vars=years_to_run) From a8e0faba71bf26cd1054c088b43487cd1d9dd3ae Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Wed, 2 Aug 2023 09:54:18 -0700 Subject: [PATCH 30/49] maintain hazards models summaries --- baus.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/baus.py b/baus.py index 4ebe839fe..5a89bbb27 100644 --- a/baus.py +++ b/baus.py @@ -350,6 +350,9 @@ def get_baseyear_summary_models(): "simulation_validation", "diagnostic_output", + "hazards_slr_summary", + "hazards_eq_summary", + "parcel_summary", "building_summary", From 5ea2084b551542feff93b6162e021a695cc8d952 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Wed, 2 Aug 2023 10:05:30 -0700 Subject: [PATCH 31/49] don't write slr summary if model doesn't run --- baus/summaries/hazards_summaries.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/baus/summaries/hazards_summaries.py b/baus/summaries/hazards_summaries.py index 409db4970..1d1f988bb 100644 --- a/baus/summaries/hazards_summaries.py +++ b/baus/summaries/hazards_summaries.py @@ -12,7 +12,12 @@ def hazards_slr_summary(run_setup, year): if not run_setup['run_slr']: return - if len(orca.get_table("slr_demolish")) < 1: + try: + slr_demolish = orca.get_table("slr_demolish") + except: + return + + if len(slr_demolish) < 1: return slr_summary = pd.DataFrame(index=[0]) From 61c8c1c02221166d6d0ca6ef578ab71d2c091ad7 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Mon, 14 Aug 2023 10:37:38 -0700 Subject: [PATCH 32/49] add new slr files --- baus/datasources.py | 14 +++++++++----- baus/slr.py | 10 +++++++--- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/baus/datasources.py b/baus/datasources.py index 511e53610..68a49f55e 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -602,13 +602,17 @@ def slr_progression(): return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/hazards/slr_progression.csv")) +@orca.table(cache=True) +def slr_committed_migitation(): + return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/hazards/slr_committed_mitigation.csv"), + index_col='parcel_id') + + # SLR inundation levels for parcels -# if slr is activated, there is either a committed projects mitigation applied -# or a committed projects + policy projects mitigation applied @orca.table(cache=True) -def slr_parcel_inundation(): - return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/hazards/slr_parcel_inundation.csv"), - dtype={'parcel_id': np.int64}, index_col='parcel_id') +def slr_strategy_mitigation(): + return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/hazards/slr_strategy_mitigation.csv"), + index_col='parcel_id') # census tracts for parcels, to assign earthquake probabilities diff --git a/baus/slr.py b/baus/slr.py index 043c6d4d8..25a813014 100644 --- a/baus/slr.py +++ b/baus/slr.py @@ -10,7 +10,7 @@ @orca.step() -def slr_inundate(slr_progression, slr_parcel_inundation, year, parcels): +def slr_inundate(slr_progression, parcels, run_setup): # inundated parcels are all parcels at or below the SLR progression level in that year slr_progression = slr_progression.to_frame() @@ -19,8 +19,12 @@ def slr_inundate(slr_progression, slr_parcel_inundation, year, parcels): print("Inundation in model year is %d inches" % inundation_yr) # tag parcels that are indundated in the current year - # slr mitigation is applied by modifying the set of inundated parcels in the list - slr_parcel_inundation = slr_parcel_inundation.to_frame() + # slr mitigation is applied by modifying the set of inundated parcels + if run_setup["run_strategy_mitigation"]: + slr_parcel_inundation = orca.get_table("slr_strategy_mitigation").to_frame() + else: + slr_parcel_inundation = orca.get_table("slr_committed_mitigation").to_frame() + slr_parcel_inundation = slr_parcel_inundation[slr_parcel_inundation.mitigation != True] orca.add_injectable("slr_mitigation",'applied') destroy_parcels = slr_parcel_inundation.query('inundation<=@inundation_yr').astype('bool') From b5b481da6061ac9e62250ed5271dfbcfd39d4d64 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Mon, 14 Aug 2023 11:57:01 -0700 Subject: [PATCH 33/49] add v0e datasets --- baus/datasources.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/baus/datasources.py b/baus/datasources.py index 68a49f55e..2c69a7bda 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -187,7 +187,7 @@ def final_year(): @orca.injectable(cache=True) def store(): - return pd.HDFStore(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/data_store.h5")) + return pd.HDFStore(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/core_datasets.h5")) @orca.injectable(cache=True) @@ -270,7 +270,7 @@ def costar(parcels): @orca.table(cache=True) def zoning_existing(zoning_lookup): - return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/zoning/boc_v0c.csv") + return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/zoning/boc_v0e.csv") @orca.table(cache=True) @@ -293,7 +293,7 @@ def new_tpp_id(): @orca.table(cache=True) def travel_model_zones(): - return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/travel_model_zones_v0d.csv")) + return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/travel_model_zones_v0e.csv")) @orca.table(cache=True) @@ -394,7 +394,7 @@ def parcels_zoning_calculations(parcels): @orca.table(cache=True) def growth_geographies(): - return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/growth_geographies_v0c.csv") + return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/growth_geographies_v0d.csv") @orca.table(cache=False) @@ -455,7 +455,7 @@ def accessibilities_segmentation(year, run_setup): # shared between demolish and build tables below def get_dev_projects_table(): df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), - "basis_inputs/parcels_buildings_agents/dev_pipeline_v0d.csv")) + "basis_inputs/parcels_buildings_agents/dev_pipeline_v0e.csv")) df = df.set_index("parcel_id") return df @@ -486,7 +486,7 @@ def development_projects(): @orca.table(cache=True) def dev_pipeline_strategy_projects(run_setup, development_projects): - df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "plan_strategies/dev_pipeline_strategy_projects_v0d.csv")) + df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "plan_strategies/dev_pipeline_strategy_projects_v0e.csv")) df = df.set_index("parcel_id") if run_setup["dev_pipeline_strategy_projects"]: @@ -672,7 +672,7 @@ def accessory_units(): @orca.table(cache=True) def nodev_sites(): - df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/nodev_sites_v0c.csv"), index_col="parcel_id") + df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/nodev_sites_v0e.csv"), index_col="parcel_id") return df From 585a024a9255c94c09ee8998e633c891339ecd28 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Mon, 14 Aug 2023 13:46:56 -0700 Subject: [PATCH 34/49] remove old tm logsum file from code --- baus/datasources.py | 6 ------ configs/developer/developer_settings.yaml | 10 +--------- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/baus/datasources.py b/baus/datasources.py index 2c69a7bda..8cc15b86b 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -632,12 +632,6 @@ def tracts_earthquake(): os.path.join(orca.get_injectable("inputs_dir"), "tract_damage_earthquake.csv")) -# override urbansim_defaults which looks for this in data/ -@orca.table(cache=True) -def logsums(): - return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "accessibility/pandana/logsums.csv"), index_col="taz") - - @orca.table(cache=True) def employment_relocation_rates(): df = pd.read_csv(os.path.join(misc.configs_dir(), "transition_relocation/employment_relocation_rates.csv")) diff --git a/configs/developer/developer_settings.yaml b/configs/developer/developer_settings.yaml index c365c433f..6b0cbde65 100644 --- a/configs/developer/developer_settings.yaml +++ b/configs/developer/developer_settings.yaml @@ -206,12 +206,4 @@ naics_to_empsix: # convert square meters to square feet -parcel_size_factor: 10.764 - - -# these are the tables the get auto-merged to buildings/parcels in the hedonic and lcms -aggregation_tables: - - nodes - - tmnodes - - logsums - - buildings \ No newline at end of file +parcel_size_factor: 10.764 \ No newline at end of file From 85aae321635b70b7428dcd07e70d00da1e9d823f Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Mon, 14 Aug 2023 13:58:15 -0700 Subject: [PATCH 35/49] continue outdated logsums file removal --- baus/ual.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/baus/ual.py b/baus/ual.py index fe4503e7a..b7b562e56 100644 --- a/baus/ual.py +++ b/baus/ual.py @@ -103,7 +103,7 @@ def load_rental_listings(): - 'node_id' (int, may be missing, corresponds to index of 'nodes') - 'tmnode_id' (int, may be missing, corresponds to index of 'tmnodes') - 'zone_id' (int, may be missing, corresponds to index of 'zones') - - adds broadcasts linking 'craigslist' to 'nodes', 'tmnodes', 'logsums' + - adds broadcasts linking 'craigslist' to 'nodes', 'tmnodes' """ @orca.table('craigslist', cache=True) def craigslist(): @@ -126,7 +126,6 @@ def zone_id(craigslist, parcels): orca.broadcast('tmnodes', 'craigslist', cast_index=True, onto_on='tmnode_id') orca.broadcast('zones', 'craigslist', cast_index=True, onto_on='zone_id') - orca.broadcast('logsums', 'craigslist', cast_index=True, onto_on='zone_id') return From 4597172102394fabd5af6bbf2fc2fd421b4d8c10 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Tue, 15 Aug 2023 10:57:30 -0700 Subject: [PATCH 36/49] add updated bart stations for pandana --- baus/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baus/models.py b/baus/models.py index 2209162da..3f1eff2d2 100644 --- a/baus/models.py +++ b/baus/models.py @@ -944,7 +944,7 @@ def local_pois(accessibility_settings): cols = {} - locations = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), 'accessibility/pandana/bart_stations.csv')) + locations = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), 'accessibility/pandana/bart_stations_2020.csv')) n.set_pois("tmp", locations.lng, locations.lat) cols["bartdist"] = n.nearest_pois(3000, "tmp", num_pois=1)[1] From 1d265c252725dcdc38d6bf8504dd8e11c164c09b Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Tue, 5 Sep 2023 09:13:09 -0700 Subject: [PATCH 37/49] update inputs --- baus.py | 2 +- baus/datasources.py | 25 +++++++------------------ baus/variables.py | 25 +------------------------ 3 files changed, 9 insertions(+), 43 deletions(-) diff --git a/baus.py b/baus.py index 5b243a24f..9eeb62a81 100644 --- a/baus.py +++ b/baus.py @@ -530,7 +530,7 @@ def run_models(MODE): df2 = pd.read_csv((orca.get_injectable("outputs_dir")+"/run%d_superdistrict_summaries_2050.csv") % run_name) df2 = df2.set_index(df2.columns[0]).sort_index() - supnames_df = pd.read_csv((orca.get_injectable("inputs_dir") + "/basis_inputs/crosswalks/travel_model_zones_v0c.csv")) + supnames_df = pd.read_csv((orca.get_injectable("inputs_dir") + "/basis_inputs/crosswalks/travel_model_zones.csv")) supnames = supnames_df.sort_values(['superdistrict'])['superdistrict_name'].unique() summary = compare_summary(df1, df2, supnames) diff --git a/baus/datasources.py b/baus/datasources.py index 8cc15b86b..2576a6535 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -77,12 +77,6 @@ def account_strategies(): def development_caps(): with open(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/existing_policy/development_caps.yaml")) as f: return yaml.load(f) - - -@orca.injectable('data_edits', cache=True) -def data_edits(): - with open(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/edits/data_edits.yaml")) as f: - return yaml.load(f) @orca.injectable('development_caps_asserted', cache=True) @@ -270,7 +264,7 @@ def costar(parcels): @orca.table(cache=True) def zoning_existing(zoning_lookup): - return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/zoning/boc_v0e.csv") + return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/existing_policy/boc.csv") @orca.table(cache=True) @@ -285,15 +279,9 @@ def proportional_gov_ed_jobs_forecast(): index_col="Taz") -@orca.table(cache=True) -def new_tpp_id(): - return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/edits/tpp_id_2016.csv"), - index_col="parcel_id") - - @orca.table(cache=True) def travel_model_zones(): - return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/travel_model_zones_v0e.csv")) + return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/travel_model_zones.csv")) @orca.table(cache=True) @@ -394,7 +382,7 @@ def parcels_zoning_calculations(parcels): @orca.table(cache=True) def growth_geographies(): - return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/growth_geographies_v0d.csv") + return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/growth_geographies.csv") @orca.table(cache=False) @@ -455,7 +443,7 @@ def accessibilities_segmentation(year, run_setup): # shared between demolish and build tables below def get_dev_projects_table(): df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), - "basis_inputs/parcels_buildings_agents/dev_pipeline_v0e.csv")) + "basis_inputs/parcels_buildings_agents/dev_pipeline.csv")) df = df.set_index("parcel_id") return df @@ -483,10 +471,11 @@ def development_projects(): return df + @orca.table(cache=True) def dev_pipeline_strategy_projects(run_setup, development_projects): - df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "plan_strategies/dev_pipeline_strategy_projects_v0e.csv")) + df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "plan_strategies/dev_pipeline_strategy_projects.csv")) df = df.set_index("parcel_id") if run_setup["dev_pipeline_strategy_projects"]: @@ -666,7 +655,7 @@ def accessory_units(): @orca.table(cache=True) def nodev_sites(): - df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/nodev_sites_v0e.csv"), index_col="parcel_id") + df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/nodev_sites.csv"), index_col="parcel_id") return df diff --git a/baus/variables.py b/baus/variables.py index 8ebcddbe9..b8f9840f3 100644 --- a/baus/variables.py +++ b/baus/variables.py @@ -505,31 +505,8 @@ def juris(parcels, parcels_geography): @orca.column('parcels', cache=True) -def ave_sqft_per_unit(parcels, zones, data_edits): +def ave_sqft_per_unit(parcels, zones): s = misc.reindex(zones.ave_unit_sqft, parcels.zone_id) - - clip = data_edits.get("ave_sqft_per_unit_clip", None) - if clip is not None: - s = s.clip(lower=clip['lower'], upper=clip['upper']) - - ''' - This is a fun feature that lets you set max dua for new contruction based - on the dua (as an indicator of density and what part of the city we are). - Example use in the YAML: - - clip_sqft_per_unit_based_on_dua: - - threshold: 50 - max: 1000 - - threshold: 100 - max: 900 - - threshold: 150 - max: 800 - ''' - cfg = data_edits.get("clip_sqft_per_unit_based_on_dua", None) - if cfg is not None: - for clip in cfg: - s[parcels.max_dua >= clip["threshold"]] = clip["max"] - return s From 4fd66054dcd93431126a48f27ca964dd87a6e9f7 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Tue, 5 Sep 2023 12:38:21 -0700 Subject: [PATCH 38/49] new basis inputs documentation --- docs/input.md | 51 ++++++++++++++++++++------------------------------- 1 file changed, 20 insertions(+), 31 deletions(-) diff --git a/docs/input.md b/docs/input.md index 515de1283..32715598c 100644 --- a/docs/input.md +++ b/docs/input.md @@ -11,8 +11,7 @@ tmnet.h5| Travel model network information for calculating accessibility within osm_bayarea4326.h5| Street network information for calculating accessibility within the model using Pandana landmarks.csv| Locations of a few major landmarks in the region for accessibility calculations. regional_poi_distances.csv| The pre-computed distances from each travel model node to each landmark. -bart_stations.csv| A list of BART stations and their locations so that distance to BART can calculated. -logsums.csv| A set of base year logsums from the travel model. +bart_stations_2020.csv| A list of BART stations and their locations so that distance to BART can calculated. #### travel_model/ **name**|**description** -----|----- @@ -20,54 +19,44 @@ AccessibilityMarkets_[year].csv| A travel model output file that incorportates t mandatoryAccessibilities_[year].csv| A travel model output file that incorportates travel model run logsums into the forecast, by year. nonMandatoryAccessibilities_[year].csv| A travel model output file that incorportates travel model run logsums into the forecast, by year. -### basis_inputs (in progress)/ +### basis_inputs/ #### crosswalks/ **name**|**desription** -----|----- -parcel_to_maz22.csv| A lookup table from parcels to Travel Model Two MAZs. -parcel_to_taz1454sub.csv| A lookup table from parcels to Travel Model One TAZs. -parcels_geography.csv| A lookup table from parcels to jurisdiction, growth geographies, UGB areas, greenfield areas, and a concatenation of these used to join these geographies zoning_mods.csv, to apply zoning rules within them. -census_id_to_name.csv| Maps census id from parcels_geography to name so it can be used. -maz_geography| A lookup between MAZ, TAZ2, and county. -maz22_taz1454| A lookup between MAZ and TAZ1. -superdistricts_geography.csv| A map of superdistrict numbers, names, and their subregion. -taz_geography.csv| A lookup between TAZ1, supedisctrict, and county. +growth_geographies.csv| A lookup table from parcels to growth geographies, zoning mod categories (see: plan_strategies/), and UGB zoning mod categories. +travel_model_geographies.csv| A lookup between TAZ, supedisctrict, and subgregion. -#### edits/ +#### equity/ **name**|**description** -----|----- -data_edits.yaml| Settings for editing the input data in the model code, e.g. clipping values. -manual_edits.csv| Overrides the current h5 data using the table name, attribute name, and new value, so we don't have to generate a new one each time. -household_building_id_overrides.csv| Moves households to match new city household totals during the data preprocessing. -tpp_id_2016.csv| Updates tpp_ids after changes were made to the ids. #### existing_policy/ **name**|**description** -----|----- -development_caps.yaml| Base year job cap policies in place in jurisdictions (TODO: remove the asserted development capsk-factors entangled here.) +development_caps.yaml| Base year job cap policies in place in jurisdictions (TODO: remove the asserted development caps k-factors entangled here.) inclusionary.yaml| Base year inclusionary zoning policies in place in jurisdictions (TODO: have all model runs inherit these, even if an inclusionary stratey is applied). +boc.csv | Base year build out capacity (zoning) information for each parcel, including max_dua, max_far, and max_height, all of which must be respected by each development. #### hazards/ **name**|**desctiption** -----|----- -slr_progression.csv| The sea level rise level, for each forecast year. -slr_inundation.csv| The sea level rise level at which each inundation parcel becomes inundated, for each forecast year. Rows marked with "100" are parcels where sea level rise has been mitigated, either through planned projects or a plan strategy. +slr_progression.csv| The sea level rise level for each forecast year. +slr_inundation.csv| The sea level rise level at which each inundation parcel becomes inundated. #### parcels_buildings-agents/ **name**|**description** -----|----- -bayarea_v3.h5| Base year database of households, jobs, buildings, and parcels. The data is pre-processed in pre-processing.py. -costar.csv| Commercial data from CoStar, including non-residential price to inform the price model. -development_projects.csv| The list of projects that have happened since the base data, or buildings in the development pipeline. This file tends to have more attributes than we use in the model. -deed_restricted_zone_totals.csv| An approximate number of deed restricted units per TAZ to assign randomly within the TAZ. -baseyear_taz_controls.csv| Base year control totals by TAZ, to use for checking and refining inputs. The file includes number of units, vacancy rates, and employment by sector (TODO: add households). -sfbay_craisglist.csv| Craigslist data to inform rental unit information and model tenure. - -#### zoning/ -**name**|**description** ------|----- -zoning_parcels.csv| A lookup table from parcels to zoning_id, zoning area information, and a "nodev" flag (currently all set to 0). -zoning_lookup.csv| The existing zoning for each jurisdiction, assigned to parcels with the "id" field. Fields include the city name, city id, and the name of the zoning. The active attributes are max_dua, max_far, and max_height, all of which must be respected by each development. +parcels.csv| A list of parcels in the region, their jurisdction and county. +buildings.csv| A list of buildings in the region, which link to their parcel, and select building attributes. +residential_units.csv| This reflects the same information as the buildings table, but creates a row for each unit in a building for the model to use. +jobs.csv| A list of all jobs in the region and their industry category. Each job has an associated building ID. +households.csv| A list of all households in the region and their income category. Each household has an associated residential unit ID. +core_datasets.h5| This file simply packages the above datasets for use during model runtime. +development_projects.csv| The list of projects that have happened since the base data, or buildings in the development pipeline. +institutions.csv| These are job locations in the region that operate outside of the commerical real estate market, therefore are set off-limits for development and for jobs to relocate from. +nodev_sites.csv| This is a list of all sites set off-limits for development with their "nodev" category, including uses such as open space and historic buildings. +craisglist.csv| Craigslist rental data use for model estimation. +costar.csv| Commercial building data usd for model estimation. ### plan_strategies (optional)/ **name**|**description** From 7c12eb83064586be3f38f3e3247fa3fd8130ffc5 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Mon, 18 Sep 2023 14:27:57 -0700 Subject: [PATCH 39/49] fixes --- baus.py | 2 -- baus/datasources.py | 9 ++++----- baus/variables.py | 2 +- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/baus.py b/baus.py index b032a06dc..7e9b7f55f 100644 --- a/baus.py +++ b/baus.py @@ -53,8 +53,6 @@ orca.add_injectable("final_year", OUT_YEAR) -orca.add_injectable("year", orca.get_injectable("iter_var")) - orca.add_injectable("slack_enabled", SLACK) diff --git a/baus/datasources.py b/baus/datasources.py index a2038f0dd..938524827 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -27,7 +27,7 @@ def run_setup(): @orca.injectable('run_name', cache=True) def run_name(run_setup): - return os.path.join(run_setup["run_name"]) + return run_setup["run_name"] @orca.injectable('inputs_dir', cache=True) @@ -153,7 +153,7 @@ def year(): except Exception as e: pass # if we're not running simulation, return base year - return 2014 + return 2020 @orca.injectable() @@ -374,9 +374,8 @@ def add_drop_helper(col, val): @orca.table(cache=True) -def parcels(): - df = store['parcels'] - return df.set_index("parcel_id") +def parcels(store): + return store['parcels'] @orca.table(cache=True) diff --git a/baus/variables.py b/baus/variables.py index b8f9840f3..ec09ca521 100644 --- a/baus/variables.py +++ b/baus/variables.py @@ -147,7 +147,7 @@ def vacant_res_units(buildings, households): @orca.column('buildings', cache=True) -def sqft_per_job(buildings, building_sqft_per_job, sqft_per_job_adjusters, telecommute_sqft_per_job_adjusters, travel_model_zones +def sqft_per_job(buildings, building_sqft_per_job, sqft_per_job_adjusters, telecommute_sqft_per_job_adjusters, travel_model_zones, base_year, year, run_setup): sqft_per_job = buildings.building_type.fillna("O").map(building_sqft_per_job) From 98a44464329124e8b4a5553052e705dd91338a9b Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Wed, 20 Sep 2023 07:45:11 -0700 Subject: [PATCH 40/49] add store to table definitions --- baus/datasources.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/baus/datasources.py b/baus/datasources.py index 938524827..5293b9326 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -492,22 +492,22 @@ def dev_pipeline_strategy_projects(run_setup, development_projects): @orca.table(cache=True) -def jobs(): +def jobs(store): return store['jobs'] @orca.table(cache=True) -def households(): +def households(store): return store['households'] @orca.table(cache=True) -def buildings(): +def buildings(store): return store['buildings'] @orca.table(cache=True) -def residential_units(): +def residential_units(store): return store['residential_units'] From 0e24d2e4dcdc49713948035002614c2463c1b5cb Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Wed, 20 Sep 2023 09:10:55 -0700 Subject: [PATCH 41/49] update settings needed to do some checks (need to be removed) --- configs/developer/developer_settings.yaml | 44 +++++------------------ 1 file changed, 9 insertions(+), 35 deletions(-) diff --git a/configs/developer/developer_settings.yaml b/configs/developer/developer_settings.yaml index 6b0cbde65..2b8f3c62a 100644 --- a/configs/developer/developer_settings.yaml +++ b/configs/developer/developer_settings.yaml @@ -131,79 +131,53 @@ form_to_btype: # this maps Synthicity's 25 employment categories to - # the six major employment categories traditionally - # used by MTC and ABAG for use in output to the Travel Model naics_to_empsix: - - 10: AGREMPN - - 11: AGREMPN - - 21: AGREMPN - - 22: MWTEMPN - - 23: OTHEMPN - - 31: MWTEMPN - - 3133: MWTEMPN - - 32: MWTEMPN - - 33: MWTEMPN - - 42: MWTEMPN - - 44: RETEMPN - - 4445: RETEMPN - - 45: RETEMPN - - 48: MWTEMPN - - 4849: MWTEMPN - - 49: MWTEMPN - - 51: OTHEMPN - - 52: FPSEMPN - - 53: FPSEMPN - - 54: FPSEMPN - - 55: FPSEMPN - - 56: FPSEMPN - - 61: HEREMPN - - 62: HEREMPN - - 71: HEREMPN - - 72: HEREMPN - - 81: OTHEMPN - - 91: OTHEMPN - - 92: OTHEMPN - - 99: OTHEMPN - - 3133: MWTEMPN - - 4445: RETEMPN - - 4849: MWTEMPN +empsix_name_to_id: + AGREMPN: 1 + MWTEMPN: 2 + RETEMPN: 3 + FPSEMPN: 4 + HEREMPN: 5 + OTHEMPN: 6 + + # convert square meters to square feet parcel_size_factor: 10.764 \ No newline at end of file From 83e5e73f87814f7a44a808097844fb5e0a5472c1 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Wed, 20 Sep 2023 09:57:51 -0700 Subject: [PATCH 42/49] fix merge overrides --- baus.py | 55 +----- baus/datasources.py | 338 ++++++++----------------------------- baus/preprocessing.py | 383 ------------------------------------------ 3 files changed, 69 insertions(+), 707 deletions(-) delete mode 100644 baus/preprocessing.py diff --git a/baus.py b/baus.py index f4134eca3..213947eeb 100644 --- a/baus.py +++ b/baus.py @@ -96,56 +96,6 @@ def run_models(MODE): elif MODE == "simulation": - - def get_baseyear_models(): - - baseyear_models = [ - - "slr_inundate", - "slr_remove_dev", - "eq_code_buildings", - "earthquake_demolish", - - "neighborhood_vars", - "regional_vars", - - "rsh_simulate", - "rrh_simulate", - "nrh_simulate", - "assign_tenure_to_new_units", - - "household_relocation", - "households_transition", - - "reconcile_unplaced_households", - "jobs_transition", - - "hlcm_owner_lowincome_simulate", - "hlcm_renter_lowincome_simulate", - - "hlcm_owner_simulate", - "hlcm_renter_simulate", - - "hlcm_owner_simulate_no_unplaced", - "hlcm_owner_lowincome_simulate_no_unplaced", - "hlcm_renter_simulate_no_unplaced", - "hlcm_renter_lowincome_simulate_no_unplaced", - - "reconcile_placed_households", - - "elcm_simulate", - - "price_vars"] - - if not run_setup["run_slr"]: - baseyear_models.remove("slr_inundate") - baseyear_models.remove("slr_remove_dev") - - if not run_setup["run_eq"]: - baseyear_models.remove("eq_code_buildings") - baseyear_models.remove("earthquake_demolish") - - return baseyear_models def get_baseyear_summary_models(): @@ -358,10 +308,9 @@ def get_simulation_visualization_models(): return simulation_visualization_models - baseyear_models = get_baseyear_models() if run_setup["run_summaries"]: - baseyear_models.extend(get_baseyear_summary_models()) - orca.run(baseyear_models, iter_vars=[IN_YEAR]) + baseyear_summary_models = get_baseyear_summary_models() + orca.run(baseyear_summary_models, iter_vars=[IN_YEAR]) years_to_run = range(IN_YEAR+EVERY_NTH_YEAR, OUT_YEAR+1, EVERY_NTH_YEAR) simulation_models = get_simulation_models() diff --git a/baus/datasources.py b/baus/datasources.py index cc6c98ad7..d47735fa9 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -7,8 +7,6 @@ from urbansim_defaults import utils from urbansim.utils import misc import orca -from baus import preprocessing -from baus.utils import geom_id_to_parcel_id, parcel_id_to_geom_id from baus.utils import nearest_neighbor import yaml @@ -81,12 +79,6 @@ def account_strategies(): def development_caps(): with open(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/existing_policy/development_caps.yaml")) as f: return yaml.load(f) - - -@orca.injectable('data_edits', cache=True) -def data_edits(): - with open(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/edits/data_edits.yaml")) as f: - return yaml.load(f) @orca.injectable('development_caps_asserted', cache=True) @@ -125,12 +117,6 @@ def preservation(): return yaml.load(f) -@orca.injectable('mapping', cache=True) -def mapping(): - with open(os.path.join(misc.configs_dir(), "mapping.yaml")) as f: - return yaml.load(f) - - @orca.injectable('cost_shifters', cache=True) def cost_shifters(): with open(os.path.join(misc.configs_dir(), "adjusters/cost_shifters.yaml")) as f: @@ -154,15 +140,15 @@ def price_settings(): # this just adds some of the BAUS settings to a master "settings", since the urbansim code looks for them there @orca.injectable("settings") -def settings(mapping, transition_relocation_settings): - settings = mapping.copy() +def settings(developer_settings, transition_relocation_settings): + settings = developer_settings.copy() settings.update(transition_relocation_settings) return settings @orca.injectable("building_type_map") -def building_type_map(mapping): - return mapping["building_type_map"] +def building_type_map(developer_settings): + return developer_settings["building_type_map"] @orca.injectable('year') @@ -172,7 +158,7 @@ def year(): except Exception as e: pass # if we're not running simulation, return base year - return 2014 + return 2020 @orca.injectable() @@ -196,8 +182,8 @@ def final_year(): @orca.injectable(cache=True) -def store(paths): - return pd.HDFStore(os.path.join(orca.get_injectable("inputs_dir"), paths["store"])) +def store(): + return pd.HDFStore(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/core_datasets.h5")) @orca.injectable(cache=True) @@ -259,24 +245,6 @@ def building_sqft_per_job(developer_settings): return developer_settings['building_sqft_per_job'] -@orca.step() -def fetch_from_s3(paths): - import boto - # fetch files from s3 based on config in settings.yaml - s3_settings = paths["s3_settings"] - - conn = boto.connect_s3() - bucket = conn.get_bucket(s3_settings["bucket"], validate=False) - - for file in s3_settings["files"]: - file = os.path.join("data", file) - if os.path.exists(file): - continue - print("Downloading " + file) - key = bucket.get_key(file, validate=False) - key.get_contents_to_filename(file) - - # key locations in the Bay Area for use as attractions in the models @orca.table(cache=True) def landmarks(): @@ -284,12 +252,6 @@ def landmarks(): index_col="name") -@orca.table(cache=True) -def baseyear_taz_controls(): - return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/baseyear_taz_controls.csv"), - dtype={'taz1454': np.int64}, index_col="taz1454") - - @orca.table(cache=True) def base_year_summary_taz(mapping): df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "zone_forecasts/baseyear_taz_summaries.csv"), @@ -301,7 +263,7 @@ def base_year_summary_taz(mapping): # non-residential rent data @orca.table(cache=True) -def costar(store, parcels): +def costar(parcels): df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), 'basis_inputs/parcels_buildings_agents/2015_08_29_costar.csv')) df["PropertyType"] = df.PropertyType.replace("General Retail", "Retail") @@ -319,26 +281,8 @@ def costar(store, parcels): @orca.table(cache=True) -def zoning_lookup(): - - file = os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/zoning/2020_11_05_zoning_lookup_hybrid_pba50.csv") - print('Version of zoning_lookup: {}'.format(file)) - - return pd.read_csv(file, dtype={'id': np.int64}, index_col='id') - - -@orca.table(cache=True) -def zoning_existing(parcels, zoning_lookup): - - file = os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/zoning/2020_11_05_zoning_parcels_hybrid_pba50.csv") - print('Version of zoning_parcels: {}'.format(file)) - - df = pd.read_csv(file, dtype={'geom_id': np.int64, 'PARCEL_ID': np.int64, 'zoning_id': np.int64}, index_col="geom_id") - df = pd.merge(df, zoning_lookup.to_frame(), left_on="zoning_id", right_index=True) - - df = geom_id_to_parcel_id(df, parcels) - - return df +def zoning_existing(): + return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/existing_policy/boc.csv") @orca.table(cache=True) @@ -354,26 +298,8 @@ def proportional_gov_ed_jobs_forecast(): @orca.table(cache=True) -def new_tpp_id(): - return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/edits/tpp_id_2016.csv"), - index_col="parcel_id") - - -@orca.table(cache=True) -def maz(): - maz = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/maz_geography.csv"), - dtype={'MAZ': np.int64, 'TAZ': np.int64}) - maz = maz.drop_duplicates('MAZ').set_index('MAZ') - taz1454 = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/maz22_taz1454.csv"), - dtype={'maz': np.int64, 'TAZ1454': np.int64}, index_col='maz') - maz['taz1454'] = taz1454.TAZ1454 - return maz - - -@orca.table(cache=True) -def parcel_to_maz(): - return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/2020_08_17_parcel_to_maz22.csv"), - dtype={'PARCEL_ID': np.int64, 'maz': np.int64}, index_col="PARCEL_ID") +def travel_model_zones(): + return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/travel_model_zones.csv")) @orca.table(cache=True) @@ -438,11 +364,11 @@ def tm1_tm2_maz_forecast_inputs(tm1_tm2_regional_demographic_forecast): @orca.table(cache=True) -def zoning_strategy(parcels_geography, mapping): +def zoning_strategy(growth_geographies, developer_settings): strategy_zoning = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), 'plan_strategies/zoning_mods.csv')) - for k in mapping["building_type_map"].keys(): + for k in developer_settings["building_type_map"].keys(): strategy_zoning[k] = np.nan def add_drop_helper(col, val): @@ -458,22 +384,12 @@ def add_drop_helper(col, val): join_col = 'zoningmodcat' print('join_col of zoningmods is {}'.format(join_col)) - return pd.merge(parcels_geography.to_frame().reset_index(), strategy_zoning, on=join_col, how='left').set_index('parcel_id') + return pd.merge(growth_geographies.to_frame().reset_index(), strategy_zoning, on=join_col, how='left').set_index('parcel_id') @orca.table(cache=True) def parcels(store): df = store['parcels'] - # add a lat/lon to synthetic parcels to avoid a Pandana error - df.loc[2054503, "x"] = -122.1697 - df.loc[2054503, "y"] = 37.4275 - df.loc[2054504, "x"] = -122.1697 - df.loc[2054504, "y"] = 37.4275 - df.loc[2054505, "x"] = -122.1697 - df.loc[2054505, "y"] = 37.4275 - df.loc[2054506, "x"] = -122.1697 - df.loc[2054506, "y"] = 37.4275 - return df @orca.table(cache=True) @@ -481,53 +397,10 @@ def parcels_zoning_calculations(parcels): return pd.DataFrame(index=parcels.index) -@orca.table() -def taz(zones): - return zones - - @orca.table(cache=True) -def parcels_geography(parcels): - - file = os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/2021_02_25_parcels_geography.csv") - print('Versin of parcels_geography: {}'.format(file)) - df = pd.read_csv(file, dtype={'PARCEL_ID': np.int64, 'geom_id': np.int64, 'jurisdiction_id': np.int64},index_col="geom_id") - df = geom_id_to_parcel_id(df, parcels) - - # this will be used to map juris id to name - juris_name = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/census_id_to_name.csv"), - dtype={'census_id': np.int64}, index_col="census_id").name10 - - df["juris_name"] = df.jurisdiction_id.map(juris_name) - - df.loc[2054504, "juris_name"] = "Marin County" - df.loc[2054505, "juris_name"] = "Santa Clara County" - df.loc[2054506, "juris_name"] = "Marin County" - df.loc[572927, "juris_name"] = "Contra Costa County" - - # assert no empty juris values - assert True not in df.juris_name.isnull().value_counts() - - df["pda_id"] = df.pda_id.str.lower() - df["gg_id"] = df.gg_id.str.lower() - df["tra_id"] = df.tra_id.str.lower() - df['juris_tra'] = df.juris + '-' + df.tra_id - df["ppa_id"] = df.ppa_id.str.lower() - df['juris_ppa'] = df.juris + '-' + df.ppa_id - df["sesit_id"] = df.sesit_id.str.lower() - df['juris_sesit'] = df.juris + '-' + df.sesit_id - - df['coc_id'] = df.coc_id.str.lower() - df['juris_coc'] = df.juris + '-' + df.coc_id - - return df - - -@orca.table(cache=True) -def parcels_subzone(): - return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), 'basis_inputs/crosswalks/2020_08_17_parcel_to_taz1454sub.csv'), - usecols=['taz_sub', 'PARCEL_ID', 'county'], dtype={'PARCEL_ID': np.int64}, index_col='PARCEL_ID') - +def growth_geographies(): + return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/growth_geographies.csv") + @orca.table(cache=False) def mandatory_accessibility(year, run_setup): @@ -584,136 +457,71 @@ def accessibilities_segmentation(year, run_setup): return df -@orca.table(cache=True) -def manual_edits(): - return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/edits/manual_edits.csv")) - - -@orca.table(cache=True) -def parcel_rejections(): - url = "https://forecast-feedback.firebaseio.com/parcelResults.json" - return pd.read_json(url, orient="index").set_index("geomId") - - -def reprocess_dev_projects(df): - # if dev projects with the same parcel id have more than one build - # record, we change the later ones to add records - we don't want to - # constantly be redeveloping projects, but it's a common error for users - # to make in their development project configuration - df = df.sort_values(["geom_id", "year_built"]) - prev_geom_id = None - for index, rec in df.iterrows(): - if rec.geom_id == prev_geom_id: - df.loc[index, "action"] = "add" - prev_geom_id = rec.geom_id - - return df - - # shared between demolish and build tables below -def get_dev_projects_table(parcels): +def get_dev_projects_table(): df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), - "basis_inputs/parcels_buildings_agents/2021_0309_1939_development_projects.csv"), - dtype={'PARCEL_ID': np.int64, 'geom_id': np.int64}) - df = reprocess_dev_projects(df) - orca.add_injectable("devproj_len", len(df)) - - df = df.dropna(subset=['geom_id']) - - cnts = df.geom_id.isin(parcels.geom_id).value_counts() - if False in cnts.index: - print("%d MISSING GEOMIDS!" % cnts.loc[False]) - - df = df[df.geom_id.isin(parcels.geom_id)] - - geom_id = df.geom_id # save for later - df = df.set_index("geom_id") - df = geom_id_to_parcel_id(df, parcels).reset_index() # use parcel id - df["geom_id"] = geom_id.values # add it back again cause it goes away - orca.add_injectable("devproj_len_geomid", len(df)) - + "basis_inputs/parcels_buildings_agents/dev_pipeline.csv")) + df = df.set_index("parcel_id") return df @orca.table(cache=True) -def demolish_events(parcels): - df = get_dev_projects_table(parcels) - +def demolish_events(): + df = get_dev_projects_table() # keep demolish and build records + # build records will be used to demolish the existing building on a parcel where a pipeline project is occuring + # demolish events will be demolished return df[df.action.isin(["demolish", "build"])] @orca.table(cache=True) -def development_projects(parcels, mapping): - df = get_dev_projects_table(parcels) - - for col in [ - 'residential_sqft', 'residential_price', 'non_residential_rent']: - df[col] = 0 - df["redfin_sale_year"] = 2012 # default base year - df["redfin_sale_price"] = np.nan # null sales price - df["stories"] = df.stories.fillna(1) - df["building_sqft"] = df.building_sqft.fillna(0) - df["non_residential_sqft"] = df.non_residential_sqft.fillna(0) - df["residential_units"] = df.residential_units.fillna(0).astype("int") - df["preserved_units"] = 0.0 - df["inclusionary_units"] = 0.0 - df["subsidized_units"] = 0.0 - - df["building_type"] = df.building_type.replace("HP", "OF") - df["building_type"] = df.building_type.replace("GV", "OF") - df["building_type"] = df.building_type.replace("SC", "OF") - - building_types = mapping["building_type_map"].keys() - # only deal with building types we recorgnize - # otherwise hedonics break - # currently: 'HS', 'HT', 'HM', 'OF', 'HO', 'SC', 'IL', - # 'IW', 'IH', 'RS', 'RB', 'MR', 'MT', 'ME', 'PA', 'PA2' - df = df[df.building_type.isin(building_types)] - - # we don't predict prices for schools and hotels right now - df = df[~df.building_type.isin(["SC", "HO"])] - - # need a year built to get built - df = df.dropna(subset=["year_built"]) +def development_projects(): + df = get_dev_projects_table() + # keep add and build records + # build records will be built on a parcel + # add records will be added to a parcel where a building already exists df = df[df.action.isin(["add", "build"])] - orca.add_injectable("devproj_len_proc", len(df)) - print("Describe of development projects") - # this makes sure dev projects has all the same columns as buildings - # which is the point of this method print(df[orca.get_table('buildings').local_columns].describe()) return df -def print_error_if_not_available(store, table): - if table not in store: - raise Exception( - "%s not found in store - you need to preprocess" % table + - " the data with:\n python baus.py --mode preprocessing -c") - return store[table] +@orca.table(cache=True) +def dev_pipeline_strategy_projects(run_setup, development_projects): + + df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "plan_strategies/dev_pipeline_strategy_projects.csv")) + df = df.set_index("parcel_id") + + if run_setup["dev_pipeline_strategy_projects"]: + dp = development_projects.to_frame() + # should error if the columns don't match the dev pipeline columns + dp.append(df) + # should all be add/build + dp = dp[df.action.isin(["add", "build"])] + + return dp @orca.table(cache=True) def jobs(store): - return print_error_if_not_available(store, 'jobs_preproc') + return store['jobs'] @orca.table(cache=True) def households(store): - return print_error_if_not_available(store, 'households_preproc') + return store['households'] @orca.table(cache=True) def buildings(store): - return print_error_if_not_available(store, 'buildings_preproc') + return store['buildings'] @orca.table(cache=True) -def residential_units(store): - return print_error_if_not_available(store, 'residential_units_preproc') +def residential_units(store) + return store['residential_units'] @orca.table(cache=True) @@ -783,11 +591,6 @@ def vmt_fee_categories(): index_col="taz") -@orca.table(cache=True) -def superdistricts_geography(): - return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/superdistricts_geography.csv"), index_col="number") - - @orca.table(cache=True) def sqft_per_job_adjusters(): return pd.read_csv(os.path.join(misc.configs_dir(), "adjusters/sqft_per_job_adjusters.csv"), index_col="number") @@ -798,22 +601,6 @@ def telecommute_sqft_per_job_adjusters(): return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "plan_strategies/telecommute_sqft_per_job_adjusters.csv"), index_col="number") -@orca.table(cache=True) -def taz_geography(superdistricts_geography, mapping): - tg = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/taz_geography.csv"), - dtype={'zone': np.int64, 'superdistrcit': np.int64, 'county': np.int64}, index_col="zone") - cmap = mapping["county_id_tm_map"] - tg['county_name'] = tg.county.map(cmap) - - # we want "subregion" geography on the taz_geography table - # we have to go get it from the superdistricts_geography table and join - # using the superdistrcit id - tg["subregion_id"] = superdistricts_geography.subregion.loc[tg.superdistrict].values - tg["subregion"] = tg.subregion_id.map({1: "Core", 2: "Urban", 3: "Suburban", 4: "Rural"}) - - return tg - - # these are shapes - "zones" in the bay area @orca.table(cache=True) def zones(store): @@ -828,14 +615,23 @@ def slr_progression(): # SLR inundation levels for parcels -# if slr is activated, there is either a committed projects mitigation applied -# or a committed projects + policy projects mitigation applied @orca.table(cache=True) def slr_parcel_inundation(): return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/hazards/slr_parcel_inundation.csv"), dtype={'parcel_id': np.int64}, index_col='parcel_id') +@orca.table(cache=True) +def slr_committed_migitation(): + return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/hazards/slr_committed_mitigation.csv"), + index_col='parcel_id') + + +def slr_strategy_mitigation(): + return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/hazards/slr_strategy_mitigation.csv"), + index_col='parcel_id') + + # census tracts for parcels, to assign earthquake probabilities @orca.table(cache=True) def parcels_tract(): @@ -853,12 +649,6 @@ def tracts_earthquake(): os.path.join(orca.get_injectable("inputs_dir"), "tract_damage_earthquake.csv")) -# override urbansim_defaults which looks for this in data/ -@orca.table(cache=True) -def logsums(): - return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "accessibility/pandana/logsums.csv"), index_col="taz") - - @orca.table(cache=True) def employment_relocation_rates(): df = pd.read_csv(os.path.join(misc.configs_dir(), "transition_relocation/employment_relocation_rates.csv")) @@ -891,6 +681,12 @@ def accessory_units(): return df +@orca.table(cache=True) +def nodev_sites(): + df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/nodev_sites.csv"), index_col="parcel_id") + return df + + # parcels-tract crosswalk that match the Urban Displacement Project census tract vintage @orca.table(cache=True) def parcel_tract_crosswalk(): @@ -924,7 +720,7 @@ def eq_retrofit_lookup(): # this specifies the relationships between tables orca.broadcast('buildings', 'residential_units', cast_index=True, onto_on='building_id') orca.broadcast('residential_units', 'households', cast_index=True, onto_on='unit_id') -orca.broadcast('parcels_geography', 'buildings', cast_index=True, onto_on='parcel_id') +orca.broadcast('growth_geographies', 'buildings', cast_index=True, onto_on='parcel_id') orca.broadcast('parcels', 'buildings', cast_index=True, onto_on='parcel_id') # adding orca.broadcast('buildings', 'households', cast_index=True, onto_on='building_id') diff --git a/baus/preprocessing.py b/baus/preprocessing.py deleted file mode 100644 index c38502353..000000000 --- a/baus/preprocessing.py +++ /dev/null @@ -1,383 +0,0 @@ -from __future__ import print_function - -import orca -import pandas as pd -from urbansim.utils import misc -from baus.tests import validation - - -# TO ADD: Housing Unit imputation -# We want to match the target in baseyear_taz_controls.csv - -# TO ADD: Nonresidential space imputation -# We want to match the target in baseyear_taz_controls.csv - - -# the way this works is there is an orca step to do jobs allocation, which -# reads base year totals and creates jobs and allocates them to buildings, -# and writes it back to the h5. then the actual jobs table above just reads -# the auto-allocated version from the h5. was hoping to just do allocation -# on the fly but it takes about 4 minutes so way to long to do on the fly - - -def allocate_jobs(baseyear_taz_controls, mapping, buildings, parcels): - # this does a new assignment from the controls to the buildings - - # first disaggregate the job totals - sector_map = mapping["naics_to_empsix"] - jobs = [] - for taz, row in baseyear_taz_controls.local.iterrows(): - for sector_col, num in row.iteritems(): - - # not a sector total - if not sector_col.startswith("emp_sec"): - continue - - # get integer sector id - sector_id = int(''.join(c for c in sector_col if c.isdigit())) - sector_name = sector_map[sector_id] - - jobs += [[sector_id, sector_name, taz, -1]] * int(num) - - df = pd.DataFrame(jobs, columns=[ - 'sector_id', 'empsix', 'taz', 'building_id']) - - zone_id = misc.reindex(parcels.zone_id, buildings.parcel_id) - - # just do random assignment weighted by job spaces - we'll then - # fill in the job_spaces if overfilled in the next step (code - # has existed in urbansim for a while) - for taz, cnt in df.groupby('taz').size().iteritems(): - - potential_add_locations = buildings.non_residential_sqft[ - (zone_id == taz) & - (buildings.non_residential_sqft > 0)] - - if len(potential_add_locations) == 0: - # if no non-res buildings, put jobs in res buildings - potential_add_locations = buildings.building_sqft[ - zone_id == taz] - - weights = potential_add_locations / potential_add_locations.sum() - - if len(potential_add_locations) > 0: - buildings_ids = potential_add_locations.sample( - cnt, replace=True, weights=weights) - - df["building_id"][df.taz == taz] = buildings_ids.index.values - - else: - # no locations for jobs; needs to be dealt with on the data side - print("ERROR in TAZ {}: {} jobs, {} potential locations".format( - taz, cnt, len(potential_add_locations))) - - s = zone_id.loc[df.building_id].value_counts() - # assert that we at least got the total employment right after assignment - # 07/27/2020 ET: re-enabling this assertion - # see: https://github.com/BayAreaMetro/bayarea_urbansim/issues/199 - assert_series_equal(baseyear_taz_controls.emp_tot, s) - print("Jobs to assign: {}".format(baseyear_taz_controls.emp_tot.sum())) - print("Jobs assigned: {}".format(s.sum())) - - return df - - -@orca.step() -def move_jobs_from_portola_to_san_mateo_county(parcels, buildings, jobs_df): - # need to move jobs from portola valley to san mateo county - NUM_IN_PORTOLA = 1500 - - juris = misc.reindex(parcels.juris, misc.reindex(buildings.parcel_id, jobs_df.building_id)) - - # find jobs in portols valley to move - portola = jobs_df[juris == "Portola Valley"] - move = portola.sample(len(portola) - NUM_IN_PORTOLA) - - # find places in san mateo to which to move them - san_mateo = jobs_df[juris == "San Mateo County"] - move_to = san_mateo.sample(len(move)) - - jobs_df.loc[move.index, "building_id"] = move_to.building_id.values - - return jobs_df - - -@orca.step() -def preproc_jobs(store, baseyear_taz_controls, mapping, parcels): - buildings = store['buildings'] - - jobs = allocate_jobs(baseyear_taz_controls, mapping, buildings, parcels) - jobs = move_jobs_from_portola_to_san_mateo_county(parcels, buildings, jobs) - store['jobs_preproc'] = jobs - - -@orca.step() -def preproc_households(store): - - df = store['households'] - - df['tenure'] = df.hownrent.map({1: 'own', 2: 'rent'}) - - # need to keep track of base year income quartiles for use in the - # transition model - even caching doesn't work because when you add - # rows via the transitioning, you automatically clear the cache! - # this is pretty nasty and unfortunate - df["base_income_quartile"] = pd.Series(pd.qcut(df.income, 4, labels=False), index=df.index).add(1) - df["base_income_octile"] = pd.Series(pd.qcut(df.income, 8, labels=False), index=df.index).add(1) - - # there are some overrides where we move households around in order - # to match the city totals - in the future we will resynthesize and this - # can go away - this csv is generated by scripts/match_city_totals.py - overrides = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/edits/household_building_id_overrides.csv"), - index_col="household_id").building_id - df.loc[overrides.index, "building_id"] = overrides.values - - # turns out we need 4 more households - new_households = df.loc[[1132542, 1306618, 950630, 886585]].reset_index() - # keep unique index - new_households.index += pd.Series(df.index).max() + 1 - df = df.append(new_households) - - store['households_preproc'] = df - - -def assign_deed_restricted_units(df, parcels): - - df["deed_restricted_units"] = 0 - - zone_ids = misc.reindex(parcels.zone_id, df.parcel_id).\ - reindex(df.index).fillna(-1) - # sample deed restricted units to match current deed restricted unit - # zone totals - for taz, row in pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), - "basis_inputs/parcels_buildings_agents/deed_restricted_zone_totals.csv"), - index_col='taz_key').iterrows(): - - cnt = row["units"] - - if cnt <= 0: - continue - - potential_add_locations = df.residential_units[ - (zone_ids == taz) & - (df.residential_units > 0)] - - assert len(potential_add_locations) > 0 - - weights = potential_add_locations / potential_add_locations.sum() - - buildings_ids = potential_add_locations.sample( - cnt, replace=True, weights=weights) - - units = pd.Series(buildings_ids.index.values).value_counts() - df.loc[units.index, "deed_restricted_units"] += units.values - - print("Total deed restricted units after random selection: %d" % - df.deed_restricted_units.sum()) - - df["deed_restricted_units"] = \ - df[["deed_restricted_units", "residential_units"]].min(axis=1) - - print("Total deed restricted units after truncating to res units: %d" % - df.deed_restricted_units.sum()) - - return df - - -@orca.step() -def correct_baseyear_vacancies(buildings, parcels, jobs, store): - # sonoma county has too much vacancy in the buildings so we're - # going to lower it a bit to match job totals - I'm doing it here - # as opposed to in datasources as it requires registered orca - # variables - - ''' - These are the original vacancies - Alameda 0.607865 - Contra Costa 0.464277 - Marin 0.326655 - Napa 0.427900 - San Francisco 0.714938 - San Mateo 0.285090 - Santa Clara 0.368031 - Solano 0.383663 - Sonoma 0.434263 - ''' - - # get buildings by county - buildings_county = misc.reindex(parcels.county, buildings.parcel_id) - buildings_juris = misc.reindex(parcels.juris, buildings.parcel_id) - - # this is the maximum vacancy you can have any a building so it NOT the - # same thing as setting the vacancy for the entire county - SURPLUS_VACANCY_COUNTY = buildings_county.map({ - "Alameda": .42, - "Contra Costa": .57, - "Marin": .28, - "Napa": .7, - "San Francisco": .08, - "San Mateo": .4, - "Santa Clara": .32, - "Solano": .53, - "Sonoma": .4 - }).fillna(.2) - - SURPLUS_VACANCY_JURIS = buildings_juris.map({ - "Berkeley": .65, - "Atherton": 0.05, - "Belvedere": 0, - "Corte Madera": 0, - "Cupertino": .1, - "Healdsburg": 0, - "Larkspur": 0, - "Los Altos Hills": 0, - "Los Gatos": 0, - "Monte Sereno": 0, - "Piedmont": 0, - "Portola Valley": 0, - "Ross": 0, - "San Anselmo": 0, - "Saratoga": 0, - "Woodside": 0, - "Alameda": .2 - }) - - SURPLUS_VACANCY = pd.DataFrame([ - SURPLUS_VACANCY_COUNTY, SURPLUS_VACANCY_JURIS]).min() - - # count of jobs by building - job_counts_by_building = jobs.building_id.value_counts().\ - reindex(buildings.index).fillna(0) - - # with SURPLUS_VACANCY vacancy - job_counts_by_building_surplus = \ - (job_counts_by_building * (SURPLUS_VACANCY+1)).astype('int') - - # min of job spaces and vacancy - correct_job_spaces = pd.DataFrame([ - job_counts_by_building_surplus, buildings.job_spaces]).min() - - # convert back to non res sqft because job spaces is computed - correct_non_res_sqft = correct_job_spaces * buildings.sqft_per_job - - buildings.update_col("non_residential_sqft", correct_non_res_sqft) - - jobs_county = misc.reindex(buildings_county, jobs.building_id) - - print("Vacancy rate by county:\n", - buildings.job_spaces.groupby(buildings_county).sum() / - jobs_county.value_counts() - 1.0) - - jobs_juris = misc.reindex(buildings_juris, jobs.building_id) - - s = buildings.job_spaces.groupby(buildings_juris).sum() / \ - jobs_juris.value_counts() - 1.0 - print("Vacancy rate by juris:\n", s.to_string()) - - return buildings - - -@orca.step() -def preproc_buildings(store, parcels, manual_edits): - # start with buildings from urbansim_defaults - df = store['buildings'] - - # add source of buildings data (vs pipeline, developer model) - df['source'] = 'h5_inputs' - - # this is code from urbansim_defaults - df["residential_units"] = pd.concat( - [df.residential_units, - store.households_preproc.building_id.value_counts()], - axis=1).max(axis=1) - - df["preserved_units"] = 0.0 - df["inclusionary_units"] = 0.0 - df["subsidized_units"] = 0.0 - - # XXX need to make sure jobs don't exceed capacity - - # drop columns we don't needed - df = df.drop(['development_type_id', 'improvement_value', - 'sqft_per_unit', 'nonres_rent_per_sqft', - 'res_price_per_sqft', - 'redfin_home_type', 'costar_property_type', - 'costar_rent'], axis=1) - - # apply manual edits - edits = manual_edits.local - edits = edits[edits.table == 'buildings'] - for index, row, col, val in \ - edits[["id", "attribute", "new_value"]].itertuples(): - df.set_value(row, col, val) - - df["residential_units"] = df.residential_units.fillna(0) - - # for some reason nonres can be more than total sqft - df["building_sqft"] = pd.DataFrame({ - "one": df.building_sqft, - "two": df.residential_sqft + df.non_residential_sqft}).max(axis=1) - - df["building_type"] = df.building_type_id.map({ - 0: "O", - 1: "HS", - 2: "HT", - 3: "HM", - 4: "OF", - 5: "HO", - 6: "SC", - 7: "IL", - 8: "IW", - 9: "IH", - 10: "RS", - 11: "RB", - 12: "MR", - 13: "MT", - 14: "ME", - 15: "PA", - 16: "PA2" - }) - - del df["building_type_id"] # we won't use building type ids anymore - - # keeps parking lots from getting redeveloped - df["building_sqft"][df.building_type.isin(["PA", "PA2"])] = 0 - df["non_residential_sqft"][df.building_type.isin(["PA", "PA2"])] = 0 - - # don't know what an other building type id, set to office - df["building_type"] = df.building_type.replace("O", "OF") - - # set default redfin sale year to 2012 - df["redfin_sale_year"] = df.redfin_sale_year.fillna(2012) - - df["residential_price"] = 0.0 - df["non_residential_rent"] = 0.0 - - df = assign_deed_restricted_units(df, parcels) - - store['buildings_preproc'] = df - - # this runs after the others because it needs access to orca-assigned - # columns - in particular is needs access to the non-residential sqft and - # job spaces columns - orca.run(["correct_baseyear_vacancies"]) - - -@orca.step() -def baseline_data_checks(store): - # TODO - - # tests to make sure our baseline data edits worked as expected - - # spot check we match controls for jobs at the zonal level - - # spot check portola has 1500 jobs - - # check manual edits are applied - - # check deed restricted units match totals - - # check res units >= households - - # check job spaces >= jobs - pass From 8e0862f30864d04aafc05d78e95b568f046933ad Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Wed, 20 Sep 2023 10:14:32 -0700 Subject: [PATCH 43/49] more updates after viz merge --- baus.py | 2 +- baus/datasources.py | 17 +---------------- docs/configuration.md | 8 +------- 3 files changed, 3 insertions(+), 24 deletions(-) diff --git a/baus.py b/baus.py index 213947eeb..d913e4de6 100644 --- a/baus.py +++ b/baus.py @@ -5,7 +5,7 @@ import traceback from baus import \ datasources, variables, models, subsidies, ual, slr, earthquake, \ - utils, preprocessing + utils from baus.tests import validation from baus.summaries import \ core_summaries, geographic_summaries, affordable_housing_summaries, \ diff --git a/baus/datasources.py b/baus/datasources.py index d47735fa9..6161aa17b 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -45,12 +45,6 @@ def viz_dir(run_setup): return os.path.join(run_setup['viz_dir']) -@orca.injectable('paths', cache=True) -def paths(): - with open(os.path.join(misc.configs_dir(), "paths.yaml")) as f: - return yaml.load(f) - - @orca.injectable('accessibility_settings', cache=True) def accessibility_settings(): with open(os.path.join(misc.configs_dir(), "accessibility/accessibility_settings.yaml")) as f: @@ -252,15 +246,6 @@ def landmarks(): index_col="name") -@orca.table(cache=True) -def base_year_summary_taz(mapping): - df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "zone_forecasts/baseyear_taz_summaries.csv"), - dtype={'taz1454': np.int64}, index_col="zone_id") - cmap = mapping["county_id_tm_map"] - df['COUNTY_NAME'] = df.COUNTY.map(cmap) - return df - - # non-residential rent data @orca.table(cache=True) def costar(parcels): @@ -520,7 +505,7 @@ def buildings(store): @orca.table(cache=True) -def residential_units(store) +def residential_units(store): return store['residential_units'] diff --git a/docs/configuration.md b/docs/configuration.md index 5e524717c..157b6083c 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -62,10 +62,4 @@ renter_no_unplaced|Renters|-|Renters|Market Rate Rental Units| -----|----- employment_relocation_rates.csv| A file with the probability of a job relocating during a time step in the forecast, by TAZ and by employment sector. Pairs with employment_relocation_rates.csv which overwrites the model probabilities with calibration factors. household_relocation_rates.csv| A file with the probability of a household relocating during a time step in the forecast, by TAZ, income, and tenure. Pairs with renter_protections_relocation_rates_overwrites.csv which overwrites model probabilities with different relocation rates when the renter protections strategy is enabled. -transition_relocation_settings.yaml| Settings for the transition and relocation models. - -### mapping.yaml -Mapping used in the model to relate variables to one another. - -### paths.yaml -Variables that store file names for use in the model code. \ No newline at end of file +transition_relocation_settings.yaml| Settings for the transition and relocation models. \ No newline at end of file From f2b4d7bd86f669c2160c0d8977b3fd186d2a8e99 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Wed, 20 Sep 2023 13:13:15 -0700 Subject: [PATCH 44/49] update iter years --- baus.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baus.py b/baus.py index d913e4de6..368e8150d 100644 --- a/baus.py +++ b/baus.py @@ -25,7 +25,7 @@ MODE = "simulation" EVERY_NTH_YEAR = 5 -IN_YEAR, OUT_YEAR = 2010, 2050 +IN_YEAR, OUT_YEAR = 2020, 2050 SLACK = "URBANSIM_SLACK" in os.environ From e690670f5fd0cc1d12ba44cdb6b928b001b44344 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Tue, 26 Sep 2023 12:04:30 -0700 Subject: [PATCH 45/49] update model summaries for BAUS Run v1 --- baus.py | 18 +- baus/datasources.py | 48 +- baus/summaries.py | 1604 ----------------- .../summaries/affordable_housing_summaries.py | 30 +- baus/summaries/core_summaries.py | 18 +- baus/summaries/geographic_summaries.py | 44 +- baus/summaries/metrics.py | 73 +- baus/summaries/travel_model_summaries.py | 205 +-- baus/tests/validation.py | 48 +- baus/variables.py | 126 +- configs/developer/developer_settings.yaml | 5 +- 11 files changed, 266 insertions(+), 1953 deletions(-) delete mode 100644 baus/summaries.py diff --git a/baus.py b/baus.py index 368e8150d..0ec28cc1c 100644 --- a/baus.py +++ b/baus.py @@ -101,7 +101,7 @@ def get_baseyear_summary_models(): baseyear_summary_models = [ - "simulation_validation", +# "simulation_validation", "parcel_summary", "building_summary", @@ -113,17 +113,17 @@ def get_baseyear_summary_models(): "geographic_summary", - "growth_geography_metrics", - "deed_restricted_units_metrics", - "household_income_metrics", - "equity_metrics", - "jobs_housing_metrics", +# "growth_geography_metrics", +# "deed_restricted_units_metrics", +# "household_income_metrics", +# "equity_metrics", +# "jobs_housing_metrics", "jobs_metrics", "slr_metrics", - "earthquake_metrics", - "greenfield_metrics", +# "earthquake_metrics", +# "greenfield_metrics", - "taz1_summary", +# "taz1_summary", "maz_marginals", "maz_summary", "taz2_marginals", diff --git a/baus/datasources.py b/baus/datasources.py index 6161aa17b..f8f151a94 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -157,12 +157,7 @@ def year(): @orca.injectable() def initial_year(): - return 2010 - - -@orca.injectable() -def initial_summary_year(): - return 2015 + return 2020 @orca.injectable() @@ -267,7 +262,7 @@ def costar(parcels): @orca.table(cache=True) def zoning_existing(): - return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/existing_policy/boc.csv") + return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/existing_policy/boc.csv")) @orca.table(cache=True) @@ -284,7 +279,8 @@ def proportional_gov_ed_jobs_forecast(): @orca.table(cache=True) def travel_model_zones(): - return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/travel_model_zones.csv")) + df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/travel_model_zones.csv")) + return df.set_index("parcel_id") @orca.table(cache=True) @@ -375,6 +371,7 @@ def add_drop_helper(col, val): @orca.table(cache=True) def parcels(store): df = store['parcels'] + return df.set_index('parcel_id') @orca.table(cache=True) @@ -384,7 +381,8 @@ def parcels_zoning_calculations(parcels): @orca.table(cache=True) def growth_geographies(): - return os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/growth_geographies.csv") + df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/crosswalks/growth_geographies.csv")) + return df.set_index("parcel_id") @orca.table(cache=False) @@ -586,13 +584,6 @@ def telecommute_sqft_per_job_adjusters(): return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "plan_strategies/telecommute_sqft_per_job_adjusters.csv"), index_col="number") -# these are shapes - "zones" in the bay area -@orca.table(cache=True) -def zones(store): - # sort index so it prints out nicely when we want it to - return store['zones'].sort_index() - - # SLR progression by year @orca.table(cache=True) def slr_progression(): @@ -601,9 +592,9 @@ def slr_progression(): # SLR inundation levels for parcels @orca.table(cache=True) -def slr_parcel_inundation(): - return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/hazards/slr_parcel_inundation.csv"), - dtype={'parcel_id': np.int64}, index_col='parcel_id') +def slr_inundation(): + return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/hazards/slr_inundation.csv"), + index_col='parcel_id') @orca.table(cache=True) @@ -669,7 +660,7 @@ def accessory_units(): @orca.table(cache=True) def nodev_sites(): df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/nodev_sites.csv"), index_col="parcel_id") - return df + return df.set_index("parcel_id") # parcels-tract crosswalk that match the Urban Displacement Project census tract vintage @@ -684,32 +675,33 @@ def displacement_risk_tracts(): return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/equity/udp_2017results.csv")) -# Urban Displacement Project census tracts +# Communities of Concern census tracts @orca.table(cache=True) def coc_tracts(): return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/equity/COCs_ACS2018_tbl_TEMP.csv")) -# Urban Displacement Project census tracts @orca.table(cache=True) def buildings_w_eq_codes(): return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/hazards/buildings_w_earthquake_codes.csv")) -# Urban Displacement Project census tracts @orca.table(cache=True) def eq_retrofit_lookup(): return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/hazards/building_eq_categories.csv")) +@orca.table(cache=True) +def base_year_summary_taz(): + return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "zone_forecasts/baseyear_taz_summaries.csv")) + + # this specifies the relationships between tables orca.broadcast('buildings', 'residential_units', cast_index=True, onto_on='building_id') orca.broadcast('residential_units', 'households', cast_index=True, onto_on='unit_id') -orca.broadcast('growth_geographies', 'buildings', cast_index=True, onto_on='parcel_id') orca.broadcast('parcels', 'buildings', cast_index=True, onto_on='parcel_id') -# adding orca.broadcast('buildings', 'households', cast_index=True, onto_on='building_id') orca.broadcast('buildings', 'jobs', cast_index=True, onto_on='building_id') -# not defined in urbansim_Defaults -orca.broadcast('tmnodes', 'buildings', cast_index=True, onto_on='tmnode_id') -orca.broadcast('taz_geography', 'parcels', cast_index=True, onto_on='zone_id') \ No newline at end of file +orca.broadcast('growth_geographies', 'buildings', cast_index=True, onto_on='parcel_id') +orca.broadcast('travel_model_zones', 'buildings', cast_index=True, onto_on='parcel_id') +orca.broadcast('tmnodes', 'buildings', cast_index=True, onto_on='tmnode_id') \ No newline at end of file diff --git a/baus/summaries.py b/baus/summaries.py deleted file mode 100644 index 4451fd9fe..000000000 --- a/baus/summaries.py +++ /dev/null @@ -1,1604 +0,0 @@ - -import sys -import os -import orca -import pandas as pd -from pandas.util import testing as pdt -import numpy as np -from baus.utils import random_indexes, round_series_match_target,\ - scale_by_target, simple_ipf, format_df -from urbansim.utils import misc -import urbansim -import urbansim_defaults -import orca -import orca_test -import pandana -from baus.postprocessing import GEO_SUMMARY_LOADER, \ -TWO_GEO_SUMMARY_LOADER, nontaz_calculator, taz_calculator,\ -county_calculator, juris_to_county - -@orca.step() -def environment_config(run_number, parcels, year): - - f = open(os.path.join(orca.get_injectable("outputs_dir"), "run%d_env_configuration.log" % (run_number)), "w") - - def write(s): - # print s - f.write(s + "\n") - - # package versions - write("python version: %s" % sys.version.split('|')[0]) - write("urbansim version: %s" % urbansim.__version__) -# write("urbansim_defaults version: %s" % urbansim_defaults.__version__) - write("orca version: %s" % orca.__version__) - write("orca_test version: %s" % orca_test.__version__) - write("pandana version: %s" % pandana.__version__) - write("numpy version: %s" % np.__version__) - write("pandas version: %s" % pd.__version__) - write("") - - f.close() - - -@orca.step() -def topsheet(households, jobs, buildings, parcels, zones, year, run_number, parcels_zoning_calculations, - summary, parcels_geography, new_tpp_id, residential_units, travel_model_zones, base_year): - - hh_by_subregion = misc.reindex(travel_model_zones.subregion, households.parcel_id).value_counts() - - # Cols for Draft/Final Blueprint and EIR geographies - households_df = orca.merge_tables('households', [parcels_geography, buildings, households], - columns=['pda_id', 'tra_id', 'ppa_id', 'sesit_id', 'income']) - - hh_by_inpda = households_df.pda_id.notnull().value_counts() - hhincome_by_inpda = households_df.income.groupby(households_df.pda_id.notnull()).mean() - # round to nearest 100s - hhincome_by_inpda = (hhincome_by_inpda/100).round()*100 - - hh_by_intra = households_df.tra_id.notnull().value_counts() - hhincome_by_intra = households_df.income.groupby(households_df.tra_id.notnull()).mean() - # round to nearest 100s - hhincome_by_intra = (hhincome_by_intra/100).round()*100 - - hh_by_insesit = households_df.sesit_id.notnull().value_counts() - hhincome_by_insesit = households_df.income.groupby(households_df.sesit_id.notnull()).mean() - # round to nearest 100s - hhincome_by_insesit = (hhincome_by_insesit/100).round()*100 - - jobs_by_subregion = misc.reindex(travel_model_zones.subregion, jobs.parcel_id).value_counts() - - jobs_df = orca.merge_tables('jobs', [parcels, buildings, jobs], columns=['pda_id', 'tra_id']) - - jobs_by_inpda = jobs_df.pda_id.notnull().value_counts() - jobs_by_intra = jobs_df.tra_id.notnull().value_counts() - - if year == base_year: - # save some info for computing growth measures - orca.add_injectable("base_year_measures", { - "hh_by_subregion": hh_by_subregion, - "jobs_by_subregion": jobs_by_subregion, - "hh_by_inpda": hh_by_inpda, - "hh_by_intra": hh_by_intra, - "hh_by_insesit": hh_by_insesit, - "jobs_by_inpda": jobs_by_inpda, - "jobs_by_intra": jobs_by_intra, - "hhincome_by_intra": hhincome_by_intra, - "hhincome_by_insesit": hhincome_by_insesit - }) - try: - base_year_measures = orca.get_injectable("base_year_measures") - except Exception as e: - # the base year measures don't exist - we didn't run year 2020 - # this can happen when we skip the first year, usually because - # we don't want to waste time doing so - return - - f = open(os.path.join(orca.get_injectable("outputs_dir"), "run%d_topsheet_%d.log" % (run_number, year)), "w") - - def write(s): - # print s - f.write(s + "\n\n") - - def norm_and_round(s): - # normalize and round a series - return str((s/s.sum()).round(2)) - - nhh = len(households) - write("Number of households = %d" % nhh) - nj = len(jobs) - write("Number of jobs = %d" % nj) - - n = len(households.building_id[households.building_id == -1]) - write("Number of unplaced households = %d" % n) - orca.add_injectable("unplaced_hh", n) - - n = len(jobs.building_id[jobs.building_id == -1]) - write("Number of unplaced jobs = %d" % n) - - # we should assert there are no unplaces households and jobs right? - # this is considered an error for the MTC-style model - # could be configured in settings.yaml - - n = buildings.vacant_res_units[buildings.vacant_res_units < 0] - write("Number of overfull buildings = %d" % len(n)) - write("Number of vacant units in overfull buildings = %d" % n.sum()) - - du = buildings.residential_units.sum() - write("Number of residential units in buildings table = %d" % du) - write("Residential vacancy rate = %.2f" % (1-0 - float(nhh)/du)) - - write("Number of residential units in units table = %d" - % len(residential_units)) - - rent_own = residential_units.tenure.value_counts() - write("Split of units by rent/own = %s" % str(rent_own)) - - rent_own = households.tenure[households.building_id == -1].value_counts() - write("Number of unplaced households by rent/own = %s" % str(rent_own)) - - du = buildings.deed_restricted_units.sum() - write("Number of deed restricted units = %d" % du) - - write("Base year mean income by whether household is in tra:\n%s" % base_year_measures["hhincome_by_intra"]) - write("Forecast year mean income by whether household is in tra:\n%s" % hhincome_by_intra) - write("Base year mean income by whether household is in hra/dr:\n%s" % base_year_measures["hhincome_by_insesit"]) - write("Forecast year mean income by whether household is in hra/dr:\n%s" % hhincome_by_insesit) - - jsp = buildings.job_spaces.sum() - write("Number of job spaces = %d" % jsp) - write("Non-residential vacancy rate = %.2f" % (1-0 - float(nj)/jsp)) - - tmp = base_year_measures["hh_by_subregion"] - write("Households base year share by subregion:\n%s" % norm_and_round(tmp)) - - write("Households share by subregion:\n%s" % norm_and_round(hh_by_subregion)) - diff = hh_by_subregion - base_year_measures["hh_by_subregion"] - - write("Households pct of regional growth by subregion:\n%s" % norm_and_round(diff)) - - tmp = base_year_measures["jobs_by_subregion"] - write("Jobs base year share by subregion:\n%s" % norm_and_round(tmp)) - - write("Jobs share by subregion:\n%s" % norm_and_round(jobs_by_subregion)) - diff = jobs_by_subregion - base_year_measures["jobs_by_subregion"] - - write("Jobs pct of regional growth by subregion:\n%s" % norm_and_round(diff)) - - tmp = base_year_measures["hh_by_inpda"] - write("Households base year share in pdas:\n%s" % norm_and_round(tmp)) - - write("Households share in pdas:\n%s" % norm_and_round(hh_by_inpda)) - - diff = hh_by_inpda - base_year_measures["hh_by_inpda"] - write("Households pct of regional growth in pdas:\n%s" % norm_and_round(diff)) - - tmp = base_year_measures["jobs_by_inpda"] - write("Jobs base year share in pdas:\n%s" % norm_and_round(tmp)) - - write("Jobs share in pdas:\n%s" % norm_and_round(jobs_by_inpda)) - - diff = jobs_by_inpda - base_year_measures["jobs_by_inpda"] - write("Jobs pct of regional growth in pdas:\n%s" % norm_and_round(diff)) - - tmp = base_year_measures["hh_by_intra"] - write("Households base year share in tras:\n%s" % norm_and_round(tmp)) - - write("Households share in tras:\n%s" % norm_and_round(hh_by_intra)) - - diff = hh_by_intra - base_year_measures["hh_by_intra"] - write("Households pct of regional growth in tras:\n%s" % norm_and_round(diff)) - - tmp = base_year_measures["jobs_by_intra"] - write("Jobs base year share in tras:\n%s" % norm_and_round(tmp)) - - write("Jobs share in tras:\n%s" % norm_and_round(jobs_by_intra)) - - diff = jobs_by_intra - base_year_measures["jobs_by_intra"] - write("Jobs pct of regional growth in tras:\n%s" % norm_and_round(diff)) - - tmp = base_year_measures["hh_by_insesit"] - write("Households base year share in hra/drs:\n%s" % norm_and_round(tmp)) - - write("Households share in hra/drs:\n%s" % norm_and_round(hh_by_insesit)) - - diff = hh_by_insesit - base_year_measures["hh_by_insesit"] - write("Households pct of regional growth in hra/drs:\n%s" % norm_and_round(diff)) - - if summary.parcel_output is not None: - df = summary.parcel_output - # we mark greenfield as a parcel with less than 500 current sqft - greenfield = df.total_sqft < 500 - - write("Current share of projects which are greenfield development:\n%s" - % norm_and_round(greenfield.value_counts())) - - write("Current share of units which are greenfield development:\n%s" % - norm_and_round(df.residential_units.groupby(greenfield).sum())) - - f.close() - - -@orca.step() -def diagnostic_output(households, buildings, parcels, taz, jobs, developer_settings, zones, year, summary, run_number, residential_units): - - households = households.to_frame() - buildings = buildings.to_frame() - parcels = parcels.to_frame() - zones = zones.to_frame() - - zones['zoned_du'] = parcels.groupby('zone_id').zoned_du.sum() - zones['zoned_du_underbuild'] = parcels.groupby('zone_id').zoned_du_underbuild.sum() - zones['zoned_du_underbuild_ratio'] = zones.zoned_du_underbuild / zones.zoned_du - - zones['residential_units'] = buildings.groupby('zone_id').residential_units.sum() - zones['job_spaces'] = buildings.groupby('zone_id').job_spaces.sum() - tothh = households.zone_id.value_counts().reindex(zones.index).fillna(0) - zones['residential_vacancy'] = 1.0 - tothh / zones.residential_units.replace(0, 1) - zones['non_residential_sqft'] = buildings.groupby('zone_id').non_residential_sqft.sum() - totjobs = jobs.zone_id.value_counts().reindex(zones.index).fillna(0) - zones['non_residential_vacancy'] = 1.0 - totjobs / zones.job_spaces.replace(0, 1) - - zones['retail_sqft'] = buildings.query('general_type == "Retail"').groupby('zone_id').non_residential_sqft.sum() - zones['office_sqft'] = buildings.query('general_type == "Office"').groupby('zone_id').non_residential_sqft.sum() - zones['industrial_sqft'] = buildings.query('general_type == "Industrial"').groupby('zone_id').non_residential_sqft.sum() - - zones['average_income'] = households.groupby('zone_id').income.quantile() - zones['household_size'] = households.groupby('zone_id').persons.quantile() - - zones['building_count'] = buildings.query('general_type == "Residential"').groupby('zone_id').size() - # this price is the max of the original unit vector belows - zones['residential_price'] = buildings.query('general_type == "Residential"').groupby('zone_id').residential_price.quantile() - # these two are the original unit prices averaged up to the building id - ru = residential_units - zones['unit_residential_price'] = ru.unit_residential_price.groupby(ru.zone_id).quantile() - zones['unit_residential_rent'] = ru.unit_residential_rent.groupby(ru.zone_id).quantile() - cap_rate = developer_settings.get('cap_rate') - # this compares price to rent and shows us where price is greater - # rents are monthly and a cap rate is applied in order to do the conversion - zones['unit_residential_price_>_rent'] = (zones.unit_residential_price > (zones.unit_residential_rent * 12 / cap_rate)).astype('int') - - zones['retail_rent'] = buildings[buildings.general_type == "Retail"].groupby('zone_id').non_residential_rent.quantile() - zones['office_rent'] = buildings[buildings.general_type == "Office"].groupby('zone_id').non_residential_rent.quantile() - zones['industrial_rent'] = buildings[buildings.general_type == "Industrial"].groupby('zone_id').non_residential_rent.quantile() - - zones['retail_sqft'] = buildings[buildings.general_type == "Retail"].groupby('zone_id').non_residential_sqft.sum() - - zones['retail_to_res_units_ratio'] = zones.retail_sqft / zones.residential_units.replace(0, 1) - - summary.add_zone_output(zones, "diagnostic_outputs", year) - - # save the dropped buildings to a csv - if "dropped_buildings" in orca.orca._TABLES: - df = orca.get_table("dropped_buildings").to_frame() - print("Dropped buildings", df.describe()) - df.to_csv(os.path.join(orca.get_injectable("outputs_dir"), "run{}_dropped_buildings.csv").format(run_number)) - - -@orca.step() -def geographic_summary(parcels, households, jobs, buildings, run_setup, run_number, year, summary, final_year, travel_model_zones, - base_year, final_year): - # using the following conditional b/c `year` is used to pull a column - # from a csv based on a string of the year in add_population() - # and in add_employment() and 2019 is the - # 'base'/pre-simulation year, as is the 2020 value in the csv. - if year == 2019: - year = base_year - base = True - else: - base = False - - households_df = orca.merge_tables('households', [parcels, buildings, households], - columns=['pda_id', 'zone_id', 'juris', 'superdistrict', - 'persons', 'income', 'base_income_quartile', - 'juris_tra', 'juris_sesit', 'juris_ppa']) - - jobs_df = orca.merge_tables('jobs', [parcels, buildings, jobs], - columns=['pda_id', 'superdistrict', 'juris', 'zone_id', - 'empsix', 'juris_tra', 'juris_sesit', 'juris_ppa']) - - buildings_df = orca.merge_tables('buildings', [parcels, buildings], - columns=['pda_id', 'superdistrict', 'juris', - 'building_type', 'zone_id', 'residential_units', - 'deed_restricted_units', 'preserved_units', - 'inclusionary_units', 'subsidized_units', - 'building_sqft', 'non_residential_sqft', - 'juris_tra', 'juris_sesit', 'juris_ppa']) - - parcel_output = summary.parcel_output - - # because merge_tables returns multiple zone_id_'s, but not the one we need - buildings_df = buildings_df.rename(columns={'zone_id_x': 'zone_id'}) - - geographies = ['superdistrict', 'juris'] - -# disable final blueprint summaries being handled in post processing summaries -# # append Draft/Final Blueprint strategy geographis -# geographies.extend(['pda_id', 'juris_tra', 'juris_sesit', 'juris_ppa']) - - if year in [base_year, 2025, 2030, 2035, 2040, 2045, final_year]: - - for geography in geographies: - - # create table with household/population summaries - - summary_table = pd.pivot_table(households_df, - values=['persons'], - index=[geography], - aggfunc=[np.size]) - - summary_table.columns = ['tothh'] - - # fill in 0 values where there are NA's so that summary table - # outputs are the same over the years otherwise a PDA or summary - # geography would be dropped if it had no employment or housing - if geography == 'superdistrict': - all_summary_geographies = buildings_df[geography].unique() - else: - all_summary_geographies = parcels[geography].unique() - summary_table = summary_table.reindex(all_summary_geographies).fillna(0) - - # turns out the lines above had to be moved up - if there are no - # households in a geography the index is missing that geography - # right off the bat. then when we try and add a jobs or buildings - # aggregation that HAS that geography, it doesn't get saved. ahh - # pandas, so powerful but so darn confusing. - - # income quartile counts - summary_table['hhincq1'] = households_df.query("base_income_quartile == 1").groupby(geography).size() - summary_table['hhincq2'] = households_df.query("base_income_quartile == 2").groupby(geography).size() - summary_table['hhincq3'] = households_df.query("base_income_quartile == 3").groupby(geography).size() - summary_table['hhincq4'] = households_df.query("base_income_quartile == 4").groupby(geography).size() - - # residential buildings by type - summary_table['res_units'] = buildings_df.groupby(geography).residential_units.sum() - summary_table['sfdu'] = buildings_df.query("building_type == 'HS' or building_type == 'HT'").\ - groupby(geography).residential_units.sum() - summary_table['mfdu'] = buildings_df.query("building_type == 'HM' or building_type == 'MR'").\ - groupby(geography).residential_units.sum() - - # employees by sector - summary_table['totemp'] = jobs_df.groupby(geography).size() - summary_table['agrempn'] = jobs_df.query("empsix == 'AGREMPN'").groupby(geography).size() - summary_table['mwtempn'] = jobs_df.query("empsix == 'MWTEMPN'").groupby(geography).size() - summary_table['retempn'] = jobs_df.query("empsix == 'RETEMPN'").groupby(geography).size() - summary_table['fpsempn'] = jobs_df.query("empsix == 'FPSEMPN'").groupby(geography).size() - summary_table['herempn'] = jobs_df.query("empsix == 'HEREMPN'").groupby(geography).size() - summary_table['othempn'] = jobs_df.query("empsix == 'OTHEMPN'").groupby(geography).size() - - # summary columns - summary_table['occupancy_rate'] = summary_table['tothh'] / (summary_table['sfdu'] + summary_table['mfdu']) - summary_table['non_residential_sqft'] = buildings_df.groupby(geography)['non_residential_sqft'].sum() - summary_table['sq_ft_per_employee'] = summary_table['non_residential_sqft'] / summary_table['totemp'] - - # columns re: affordable housing - summary_table['deed_restricted_units'] = buildings_df.groupby(geography).deed_restricted_units.sum() - summary_table['preserved_units'] = buildings_df.groupby(geography).preserved_units.sum() - summary_table['inclusionary_units'] = buildings_df.groupby(geography).inclusionary_units.sum() - summary_table['subsidized_units'] = buildings_df.groupby(geography).subsidized_units.sum() - - # additional columns from parcel_output - if parcel_output is not None: - - # columns re: affordable housing - summary_table['inclusionary_revenue_reduction'] = parcel_output.groupby(geography).policy_based_revenue_reduction.sum() - summary_table['inclusionary_revenue_reduction_per_unit'] = summary_table.inclusionary_revenue_reduction / \ - summary_table.inclusionary_units - summary_table['total_subsidy'] = parcel_output[parcel_output.subsidized_units > 0].\ - groupby(geography).max_profit.sum() * -1 - summary_table['subsidy_per_unit'] = summary_table.total_subsidy / summary_table.subsidized_units - - summary_table = summary_table.sort_index() - - if geography == 'superdistrict': - summary_table["superdistrict_name"] = travel_model_zones.sort_values(['superdistrict'])['superdistrict_name'].unique() - - if base is False: - summary_csv = os.path.join(orca.get_injectable("outputs_dir"), "run{}_{}_summaries_{}.csv").\ - format(run_number, geography, year) - elif base is True: - summary_csv = os.path.join(orca.get_injectable("outputs_dir"), "run{}_{}_summaries_{}.csv").\ - format(run_number, geography, 2009) - - summary_table.to_csv(summary_csv) - - # Write Summary of Accounts - if year == final_year and (run_setup["run_housing_bond_strategy"] or run_setup["run_office_bond_strategy"] - or run_setup['run_vmt_fee_strategy'] or run_setup['run_jobs_housing_fee_strategy']): - - for acct_name, acct in orca.get_injectable("coffer").items(): - fname = os.path.join(orca.get_injectable("outputs_dir"), "run{}_acctlog_{}_{}.csv").\ - format(run_number, acct_name, year) - acct.to_frame().to_csv(fname) - - if year == final_year: - baseyear = base_year - for geography in geographies: - df_base = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "run{}_{}_summaries_{}.csv".\ - format(run_number, geography, baseyear))) - df_final = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "run{}_{}_summaries_{}.csv".\ - format(run_number, geography, final_year))) - df_growth = nontaz_calculator(run_number, df_base, df_final) - df_growth.to_csv(os.path.join(orca.get_injectable("outputs_dir"), "run{}_{}_growth_summaries.csv".\ - format(run_number, geography)), index = False) - - # Write Urban Footprint Summary - if year in [base_year, 2025, 2030, 2035, 2040, 2045, final_year]: - # 02 15 2019 ET: Using perffoot there was no greenfield change - # between 2010 and 2050. Joined the parcels to Urbanized_Footprint - # instead, which improved the diff. The large majority of greenfield - # still occurs in 2010 (base year buildings outside of the - # urbanized area). - - buildings_uf_df = orca.merge_tables( - 'buildings', - [parcels, buildings], - columns=['urbanized', 'year_built', - 'acres', 'residential_units', - 'non_residential_sqft']) - - buildings_uf_df['count'] = 1 - - # residential units per acre in current year - s1 = buildings_uf_df['residential_units'] / buildings_uf_df['acres'] - # residential units per acre > 1 in current year - s2 = s1 > 1 - # urban footprint is 0 in base year (there's no development) - s3 = (buildings_uf_df['urbanized'] == 0) * 1 - # urban footprint is 0 in the base year - # AND residential units per acre > 1 in current year - buildings_uf_df['denser_greenfield'] = s3 * s2 - - # where buildings were built after the base year, - # sum whether urban footprint was 0 or 1 in the base year - df = buildings_uf_df.\ - loc[buildings_uf_df['year_built'] > 2010].\ - groupby('urbanized').sum() - df = df[['count', 'residential_units', 'non_residential_sqft', - 'acres']] - - # where buildings were built after the base year, - # sum if it was denser greenfield - df2 = buildings_uf_df.\ - loc[buildings_uf_df['year_built'] > 2010].\ - groupby('denser_greenfield').sum() - df2 = df2[['count', 'residential_units', 'non_residential_sqft', - 'acres']] - - formatters = {'count': '{:.0f}', - 'residential_units': '{:.0f}', - 'non_residential_sqft': '{:.0f}', - 'acres': '{:.0f}'} - - df = format_df(df, formatters) - - df2 = format_df(df2, formatters) - - df = df.transpose() - - df2 = df2.transpose() - - df[2] = df2[1] - - df.columns = ['urban_footprint_0', 'urban_footprint_1', - 'denser_greenfield'] - uf_summary_csv = os.path.join(orca.get_injectable("outputs_dir"), "run{}_urban_footprint_summary_{}.csv".\ - format(run_number, year)) - df.to_csv(uf_summary_csv) - - -@orca.step() -def building_summary(parcels, run_number, year, - buildings, - initial_year, final_year): - - if year not in [initial_year, 2015, final_year]: - return - - df = orca.merge_tables( - 'buildings', - [parcels, buildings], - columns=['performance_zone', 'year_built', 'building_type', - 'residential_units', 'unit_price', 'zone_id', - 'non_residential_sqft', 'vacant_res_units', - 'deed_restricted_units', 'inclusionary_units', - 'preserved_units', 'subsidized_units', 'job_spaces', - 'x', 'y', 'geom_id', 'source']) - - df.to_csv( - os.path.join(orca.get_injectable("outputs_dir"), "run%d_building_data_%d.csv" % - (run_number, year)) - ) - - -@orca.step() -def parcel_summary(parcels, buildings, households, jobs, run_number, year, parcels_zoning_calculations, - initial_year, final_year, parcels_geography, base_year, final_year): - - # if year not in [base_year, 2035, final_year]: - # return - - df = parcels.to_frame([ - "geom_id", - "x", "y", - "total_job_spaces", - "first_building_type" - ]) - - df2 = parcels_zoning_calculations.to_frame([ - "zoned_du", - "zoned_du_underbuild" - ]) - - df = df.join(df2) - - # bringing in zoning modifications growth geography tag - join_col = 'zoningmodcat' - - if join_col in parcels_geography.to_frame().columns: - parcel_gg = parcels_geography.to_frame(["parcel_id", join_col, "juris"]) - df = df.merge(parcel_gg, on='parcel_id', how='left') - - households_df = orca.merge_tables('households', [buildings, households], columns=['parcel_id', 'base_income_quartile']) - - # add households by quartile on each parcel - for i in range(1, 5): - df['hhq%d' % i] = households_df[households_df.base_income_quartile == i].parcel_id.value_counts() - df["tothh"] = households_df.groupby('parcel_id').size() - - building_df = orca.merge_tables('buildings', [parcels, buildings], - columns=['parcel_id', 'residential_units', 'deed_restricted_units', - 'preserved_units', 'inclusionary_units', 'subsidized_units']) - df['residential_units'] = building_df.groupby('parcel_id')['residential_units'].sum() - df['deed_restricted_units'] = building_df.groupby('parcel_id')['deed_restricted_units'].sum() - df['preserved_units'] = building_df.groupby('parcel_id')['preserved_units'].sum() - df['inclusionary_units'] = building_df.groupby('parcel_id')['inclusionary_units'].sum() - df['subsidized_units'] = building_df.groupby('parcel_id')['subsidized_units'].sum() - - jobs_df = orca.merge_tables('jobs', [buildings, jobs], columns=['parcel_id', 'empsix']) - - # add jobs by empsix category on each parcel - for cat in jobs_df.empsix.unique(): - df[cat] = jobs_df[jobs_df.empsix == cat].parcel_id.value_counts() - df["totemp"] = jobs_df.groupby('parcel_id').size() - - df.to_csv(os.path.join(orca.get_injectable("outputs_dir"), "run%d_parcel_data_%d.csv" % (run_number, year))) - - # if year == final_year: - print('year printed for debug: {}'.format(year)) - if not year == base_year: - print('calculate diff for year {}'.format(year)) - # do diff with initial year - - df2 = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "run%d_parcel_data_%d.csv" % - (run_number, initial_year)), index_col="parcel_id") - - for col in df.columns: - - if col in ["x", "y", "first_building_type", "juris", join_col]: - continue - - # fill na with 0 for parcels with no building in either base year or current year - df[col].fillna(0, inplace=True) - df2[col].fillna(0, inplace=True) - - df[col] = df[col] - df2[col] - - df.to_csv(os.path.join(orca.get_injectable("outputs_dir"), "run%d_parcel_data_diff.csv" % run_number)) - - # if year == final_year: - print('year printed for debug: {}'.format(year)) - if not year == base_year: - - print('calculate diff for year {}'.format(year)) - baseyear = base_year - df_base = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "run%d_parcel_data_%d.csv" % (run_number, baseyear))) - df_final = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "run%d_parcel_data_%d.csv" # % (run_number, final_year))) - % (run_number, year))) - - geographies = ['GG','tra','HRA', 'DIS'] - - for geography in geographies: - df_growth = GEO_SUMMARY_LOADER(run_number, geography, df_base, df_final) - df_growth['county'] = df_growth['juris'].map(juris_to_county) - df_growth.sort_values(by = ['county','juris','geo_category'], ascending=[True, True, False], inplace=True) - df_growth.set_index(['RUNID','county','juris','geo_category'], inplace=True) - df_growth.to_csv(os.path.join(orca.get_injectable("outputs_dir"), "run{}_{}_growth_summaries.csv".\ - format(run_number, geography))) - - geo_1, geo_2, geo_3 = 'tra','DIS','HRA' - - df_growth_1 = TWO_GEO_SUMMARY_LOADER(run_number, geo_1, geo_2, df_base, df_final) - df_growth_1['county'] = df_growth_1['juris'].map(juris_to_county) - df_growth_1.sort_values(by = ['county','juris','geo_category'], ascending=[True, True, False], inplace=True) - df_growth_1.set_index(['RUNID','county','juris','geo_category'], inplace=True) - df_growth_1.to_csv(os.path.join(orca.get_injectable("outputs_dir"), "run{}_{}_growth_summaries.csv".\ - format(run_number, geo_1 + geo_2))) - - df_growth_2 = TWO_GEO_SUMMARY_LOADER(run_number, geo_1, geo_3, df_base, df_final) - df_growth_2['county'] = df_growth_2['juris'].map(juris_to_county) - df_growth_2.sort_values(by = ['county','juris','geo_category'], ascending=[True, True, False], inplace=True) - df_growth_2.set_index(['RUNID','county','juris','geo_category'], inplace=True) - df_growth_2.to_csv(os.path.join(orca.get_injectable("outputs_dir"), "run{}_{}_growth_summaries.csv".\ - format(run_number, geo_1 + geo_2))) - -@orca.step() -def travel_model_output(parcels, households, jobs, buildings, year, summary, final_year, - tm1_taz1_forecast_inputs, run_number, base_year_summary_taz, travel_model_zones, - tm1_tm2_maz_forecast_inputs, tm1_tm2_regional_demographic_forecast, tm1_tm2_regional_controls): - - - parcels = parcels.to_frame().merge(travel_model_zones, on='parcel_id', columns=["taz_tm1", "maz_tm2"]) - - households_df = orca.merge_tables('households', - [parcels, buildings, households], - columns=['taz_tm1', - 'base_income_quartile', - 'income', 'persons', - 'maz_id']) - - taz_tm1_df = pd.DataFrame(index=travel_model_zones.sort_values(['taz_tm1'])['taz_tm1'].unique()) - - taz_tm1_df["zone"] = taz_tm1_df.index - taz_tm1_df["sd"] = parcels.sort_values(['taz_tm1']).groupby(['taz_tm1'])['superdistrict'].first() - taz_tm1_df["county"] = parcels.sort_values(['taz_tm1']).groupby(['taz_tm1'])['county'].first() - - jobs_df = orca.merge_tables( - 'jobs', - [parcels, buildings, jobs], - columns=['zone_id', 'zone_id_x', 'empsix'] - ) - - jobs_df = orca.merge_tables('jobs', - [parcels, buildings, jobs], - columns=['taz_tm1', 'empsix']) - - # check if needed- only the zone_id_x works, while zone_id contains null - # no duplicate zone_ids emerged to use, so one was created from parcels - - def getsectorcounts(sector): - return jobs_df.query("empsix == '%s'" % sector).groupby('taz_tm1').size() - - taz_tm1_df["agrempn"] = getsectorcounts("AGREMPN") - taz_tm1_df["fpsempn"] = getsectorcounts("FPSEMPN") - taz_tm1_df["herempn"] = getsectorcounts("HEREMPN") - taz_tm1_df["retempn"] = getsectorcounts("RETEMPN") - taz_tm1_df["mwtempn"] = getsectorcounts("MWTEMPN") - taz_tm1_df["othempn"] = getsectorcounts("OTHEMPN") - taz_tm1_df["totemp"] = jobs_df.groupby('taz_tm1').size() - - def gethhcounts(filter): - return households_df.query(filter).groupby('taz_tm1').size() - - taz_tm1_df["hhincq1"] = gethhcounts("base_income_quartile == 1") - taz_tm1_df["hhincq2"] = gethhcounts("base_income_quartile == 2") - taz_tm1_df["hhincq3"] = gethhcounts("base_income_quartile == 3") - taz_tm1_df["hhincq4"] = gethhcounts("base_income_quartile == 4") - taz_tm1_df["hhpop"] = households_df.groupby('taz_tm1').persons.sum() - taz_tm1_df["tothh"] = households_df.groupby('taz_tm1').size() - - - zone_forecast_inputs = tm1_taz1_forecast_inputs.to_frame() - zone_forecast_inputs.index = zone_forecast_inputs.zone_id - taz_tm1_df["shpop62p"] = zone_forecast_inputs.sh_62plus - taz_tm1_df["gqpop"] = zone_forecast_inputs["gqpop" + str(year)[-2:]].fillna(0) - - taz_tm1_df["totacre"] = zone_forecast_inputs.totacre_abag - # total population = group quarters plus households population - taz_tm1_df["totpop"] = (taz_df.hhpop + taz_df.gqpop).fillna(0) - - buildings_df = buildings.to_frame(['taz_tm1', - 'building_type', - 'residential_units', - 'building_sqft', - 'lot_size_per_unit']) - - taz_tm1_df["res_units"] = buildings_df.groupby('zone_id').residential_units.sum() - taz_tm1_df["mfdu"] = buildings_df.query("building_type == 'HM' or building_type == 'MR'").\ - groupby('taz_tm1').residential_units.sum() - taz_tm1_df["sfdu"] = buildings_df.query("building_type == 'HS' or building_type == 'HT'").\ - groupby('taz_tm1').residential_units.sum() - - f = orca.get_injectable('parcel_first_building_type_is') - - def count_acres_with_mask(mask): - mask *= parcels.acres - return mask.groupby(parcels.zone_id).sum() - - taz_tm1_df["resacre_unweighted"] = count_acres_with_mask(f('residential') | f('mixedresidential')) - taz_tm1_df["ciacre_unweighted"] = count_acres_with_mask(f('select_non_residential')) - taz_tm1_df["ciacre"] = scaled_ciacre(base_year_summary_taz.CIACRE_UNWEIGHTED, taz_tm1_df.ciacre_unweighted) - taz_tm1_df["resacre"] = scaled_resacre(base_year_summary_taz.RESACRE_UNWEIGHTED, taz_tm1_df.resacre_unweighted) - - rc = tm1_tm2_regional_controls.to_frame() - taz_tm1_df = add_population(taz_tm1_df, year, rc) - taz_tm1_df.totpop = taz_df.hhpop + taz_df.gqpop - taz_tm1_df = add_employment(taz_tm1_df, year, rc) - taz_tm1_df["density_pop"] = taz_tm1_df.totpop / taz_tm1_df.totacre - taz_tm1_df["density_pop"] = taz_tm1_df["density_pop"].fillna(0) - taz_tm1_df["density_emp"] = (2.5 * taz_tm1_df.totemp) / taz_tm1_tm1_df.totacre - taz_tm1_df["density_emp"] = taz_tm1_df["density_emp"].fillna(0) - taz_tm1_df["density"] = taz_tm1_df["density_pop"] + taz_tm1_df["density_emp"] - taz_tm1_df["areatype"] = pd.cut(taz_tm1_df.density, - bins=[0, 6, 30, 55, 100, 300, np.inf], - labels=[5, 4, 3, 2, 1, 0]) - taz_tm1_df = add_age_categories(taz_tm1_df, year, rc) - orca.add_table('taz_summary_1', taz_tm1_df) - - summary.add_zone_output(taz_tm1_df, "travel_model_output", year) - summary.write_zone_output() - - summary.write_parcel_output(add_xy={ - "xy_table": "parcels", - "foreign_key": "parcel_id", - "x_col": "x", - "y_col": "y" - }) - - # uppercase columns to match travel model template - taz_tm1_df.columns = [x.upper() for x in taz_tm1_df.columns] - - maz_tm2_df = pd.DataFrame(index=travel_model_zones.maz_tm2.unique().index) - - mazi = tm1_tm2_maz_forecast_inputs.to_frame() - mazi_yr = str(year)[2:] - - maz_tm2_df["hhpop"] = households_df.groupby('maz_id').persons.sum() - maz_tm2_df["tothh"] = households_df.groupby('maz_id').size() - - tothh = taz_tm1_df.tothh.sum() - maz_tm2_df = add_households(maz, tothh) - - maz_tm2_df['gq_type_univ'] = mazi['gqpopu' + mazi_yr] - maz_tm2_df['gq_type_mil'] = mazi['gqpopm' + mazi_yr] - maz_tm2_df['gq_type_othnon'] = mazi['gqpopo' + mazi_yr] - maz_tm2_df['gq_tot_pop'] = maz_tm2_df['gq_type_univ'] + maz_tm2_df['gq_type_mil'] + maz_tm2_df['gq_type_othnon'] - tot_gqpop = maz_tm2_df.gq_tot_pop.sum() - - rdf = tm1_tm2_regional_demographic_forecast.to_frame() - tfi = tm1_taz1_forecast_inputs.to_frame() - tfi.index = tfi.TAZ1454 - - taz_tm1_df['gq_type_univ'] = maz.groupby('taz1454').gq_type_univ.sum().fillna(0) - taz_tm1_df['gq_type_mil'] = maz.groupby('taz1454').gq_type_mil.sum().fillna(0) - taz_tm1_df['gq_type_othnon'] = maz.groupby('taz1454').gq_type_othnon.sum().fillna(0) - taz_tm1_df['gq_tot_pop'] = maz.groupby('taz1454').gq_tot_pop.sum().fillna(0) - - taz_tm1_df['hh'] = taz_tm1_df.tothh - taz_tm1_df['hh_size_1'] = taz_tm1_df['tothh'] * tfi.shrs1_2010 - taz_tm1_df['hh_size_2'] = taz_tm1_df['tothh'] * tfi.shrs2_2010 - taz_tm1_df['hh_size_3'] = taz_tm1_df['tothh'] * tfi.shrs3_2010 - taz_tm1_df['hh_size_4_plus'] = taz_tm1_df['tothh'] * tfi.shrs4_2010 - - taz_tm1_df['county'] = maz.groupby('taz1454').COUNTY.first() - taz_tm1_df['county_name'] = maz.groupby('taz1454').county_name.first() - - taz_tm1_df['hh_wrks_0'] = taz_tm1_df['tothh'] * tfi.shrw0_2010 - taz_tm1_df['hh_wrks_1'] = taz_tm1_df['tothh'] * tfi.shrw1_2010 - taz_tm1_df['hh_wrks_2'] = taz_tm1_df['tothh'] * tfi.shrw2_2010 - taz_tm1_df['hh_wrks_3_plus'] = taz_tm1_df['tothh'] * tfi.shrw3_2010 - - taz_tm1_df['hh_kids_no'] = taz_df['tothh'] * tfi.shrn_2010 - taz_tm1_df['hh_kids_yes'] = taz_df['tothh'] * tfi.shry_2010 - taz_tm1_df = adjust_hhsize(taz_df, year, rdf, tothh) - taz_tm1_df = adjust_hhwkrs(taz_df, year, rdf, tothh) - taz_tm1_df = adjust_hhkids(taz_df, year, rdf, tothh) - del taz_tm1_df['hh'] - - taz_df.index.name = 'TAZ' - - taz_df.fillna(0).to_csv(os.path.join(orca.get_injectable("outputs_dir"), - "run{}_taz_summaries_{}.csv").format(run_number, year)) - - # aggregate TAZ summaries to create county summaries - - county_df = pd.DataFrame(index=['San Francisco', 'San Mateo', 'Santa Clara', 'Alameda', - 'Contra Costa', 'Solano', 'Napa', 'Sonoma','Marin']) - county_df["COUNTY_NAME"] = county_df.index - - taz_cols = ["AGREMPN", "FPSEMPN", "HEREMPN", "RETEMPN", "MWTEMPN", - "OTHEMPN", "TOTEMP", "HHINCQ1", "HHINCQ2", "HHINCQ3", - "HHINCQ4", "HHPOP", "TOTHH", "SHPOP62P", "GQPOP", - "TOTACRE", "TOTPOP", "RES_UNITS", "MFDU", "SFDU", - "RESACRE_UNWEIGHTED", "CIACRE_UNWEIGHTED", "EMPRES", - "AGE0004", "AGE0519", "AGE2044", "AGE4564", "AGE65P"] - - for col in taz_cols: - taz_df_grouped = taz_df.groupby('county_name').sum() - county_df[col] = taz_df_grouped[col] - - county_df["DENSITY"] = (county_df.TOTPOP + (2.5 * county_df.TOTEMP)) / county_df.TOTACRE - - county_df["AREATYPE"] = pd.cut( - county_df.DENSITY, - bins=[0, 6, 30, 55, 100, 300, np.inf], - labels=[5, 4, 3, 2, 1, 0]) - - base_year_summary_taz = base_year_summary_taz.to_frame() - base_year_summary_county = base_year_summary_taz.groupby('COUNTY_NAME').sum() - base_year_summary_county_ciacre = base_year_summary_county['CIACRE_UNWEIGHTED'] - base_year_summary_county_resacre = base_year_summary_county['RESACRE_UNWEIGHTED'] - - county_df["CIACRE"] = scaled_ciacre(base_year_summary_county_ciacre, county_df.CIACRE_UNWEIGHTED) - county_df["RESACRE"] = scaled_resacre(base_year_summary_county_resacre, county_df.RESACRE_UNWEIGHTED) - - county_df = county_df[["COUNTY_NAME", "AGREMPN", "FPSEMPN", "HEREMPN", - "RETEMPN", "MWTEMPN", "OTHEMPN", "TOTEMP", - "HHINCQ1", "HHINCQ2", "HHINCQ3", "HHINCQ4", - "HHPOP", "TOTHH", "SHPOP62P", "GQPOP", - "TOTACRE", "TOTPOP", "DENSITY", "AREATYPE", - "RES_UNITS", "MFDU", "SFDU", "RESACRE_UNWEIGHTED", - "CIACRE_UNWEIGHTED", "CIACRE", "RESACRE", "EMPRES", - "AGE0004", "AGE0519", "AGE2044", "AGE4564", - "AGE65P"]] - county_df = county_df.set_index('COUNTY_NAME') - - county_df.fillna(0).to_csv(os.path.join(orca.get_injectable("outputs_dir"), - "run{}_county_summaries_{}.csv").format(run_number, year)) - - if year == final_year: - baseyear = base_year - df_base = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), - "run%d_taz_summaries_%d.csv" % (run_number, baseyear))) - df_final = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), - "run%d_taz_summaries_%d.csv" % (run_number, final_year))) - df_growth = taz_calculator(run_number, - df_base, df_final) - df_growth = df_growth.set_index(['RUNID', 'TAZ','SD', - 'SD_NAME','COUNTY','CNTY_NAME']) - df_growth.to_csv(os.path.join(orca.get_injectable("outputs_dir"), - "run%d_taz_growth_summaries.csv" % run_number)) - df_growth_c = county_calculator(run_number, - df_base, df_final) - df_growth_c.to_csv(os.path.join(orca.get_injectable("outputs_dir"), - "run%d_county_growth_summaries.csv" % run_number),index = False) - # add region marginals - pd.DataFrame(data={'REGION': [1], 'gq_num_hh_region': [tot_gqpop]}).to_csv(os.path.join(orca.get_injectable("outputs_dir"), - "run{}_regional_marginals_{}.csv").format(run_number, year), - index=False) - - -@orca.step() -def travel_model_2_output(parcels, households, jobs, buildings, maz, year, tm2_emp27_employment_shares, run_number, - tm1_tm2_maz_forecast_inputs, tm2_taz2_forecast_inputs, tm2_occupation_shares, - tm1_tm2_regional_demographic_forecast, tm1_tm2_regional_controls, base_year, final_year): - - if year not in [base_year, 2025, 2030, 2035, 2040, 2045, final_year]: - # only summarize for years which are multiples of 5 - return - - maz = maz.to_frame(['TAZ', 'COUNTY', 'county_name', 'taz1454']) - rc = tm1_tm2_regional_controls.to_frame() - - pcl = parcels.to_frame(['maz_id', 'acres']) - maz['ACRES'] = pcl.groupby('maz_id').acres.sum() - - hh_df = orca.merge_tables('households', - [parcels, buildings, households], - columns=['zone_id', - 'base_income_quartile', - 'income', - 'persons', - 'maz_id']) - - hh_df.maz_id = hh_df.maz_id.fillna(213906) - - def gethhcounts(filter): - return hh_df.query(filter).groupby('maz_id').size() - - tothh = len(hh_df) - maz["hhincq1"] = gethhcounts("base_income_quartile == 1") - maz["hhincq2"] = gethhcounts("base_income_quartile == 2") - maz["hhincq3"] = gethhcounts("base_income_quartile == 3") - maz["hhincq4"] = gethhcounts("base_income_quartile == 4") - maz["hhpop"] = hh_df.groupby('maz_id').persons.sum() - maz["tothh"] = hh_df.groupby('maz_id').size() - maz = add_households(maz, tothh) - - jobs_df = orca.merge_tables('jobs', - [parcels, buildings, jobs], - columns=['maz_id', 'empsix']) - - bldg_df = orca.merge_tables('buildings', - [buildings, parcels], - columns=['maz_id', 'residential_units']) - - tm2_emp27_employment_shares = tm2_emp27_employment_shares.to_frame() - - def getsectorcounts(empsix, empsh): - emp = jobs_df.query("empsix == '%s'" % empsix).\ - groupby('maz_id').size() - return emp * tm2_emp27_employment_shares.loc[tm2_emp27_employment_shares.empsh == empsh, - str(year)].values[0] - - maz["ag"] = getsectorcounts("AGREMPN", "ag") - maz["natres"] = getsectorcounts("AGREMPN", "natres") - - maz["fire"] = getsectorcounts("FPSEMPN", "fire") - maz["serv_bus"] = getsectorcounts("FPSEMPN", "serv_bus") - maz["prof"] = getsectorcounts("FPSEMPN", "prof") - maz["lease"] = getsectorcounts("FPSEMPN", "lease") - - maz["art_rec"] = getsectorcounts("HEREMPN", "art_rec") - maz["serv_soc"] = getsectorcounts("HEREMPN", "serv_soc") - maz["serv_per"] = getsectorcounts("HEREMPN", "serv_per") - maz["ed_high"] = getsectorcounts("HEREMPN", "ed_high") - maz["ed_k12"] = getsectorcounts("HEREMPN", "ed_k12") - maz["ed_oth"] = getsectorcounts("HEREMPN", "ed_oth") - maz["health"] = getsectorcounts("HEREMPN", "health") - - maz["man_tech"] = getsectorcounts("MWTEMPN", "man_tech") - maz["man_lgt"] = getsectorcounts("MWTEMPN", "man_lgt") - maz["logis"] = getsectorcounts("MWTEMPN", "logis") - maz["man_bio"] = getsectorcounts("MWTEMPN", "man_bio") - maz["transp"] = getsectorcounts("MWTEMPN", "transp") - maz["man_hvy"] = getsectorcounts("MWTEMPN", "man_hvy") - maz["util"] = getsectorcounts("MWTEMPN", "util") - - maz["info"] = getsectorcounts("OTHEMPN", "info") - maz["gov"] = getsectorcounts("OTHEMPN", "gov") - maz["constr"] = getsectorcounts("OTHEMPN", "constr") - - maz["hotel"] = getsectorcounts("RETEMPN", "hotel") - maz["ret_loc"] = getsectorcounts("RETEMPN", "ret_loc") - maz["ret_reg"] = getsectorcounts("RETEMPN", "ret_reg") - maz["eat"] = getsectorcounts("RETEMPN", "eat") - - maz["emp_total"] = jobs_df.groupby('maz_id').size() - - maz = maz.fillna(0) - - emp_cols = ['ag', 'natres', 'logis', 'man_bio', 'man_hvy', 'man_lgt', - 'man_tech', 'transp', 'util', 'eat', 'hotel', 'ret_loc', - 'ret_reg', 'fire', 'lease', 'prof', 'serv_bus', 'art_rec', - 'ed_high', 'ed_k12', 'ed_oth', 'health', 'serv_per', - 'serv_soc', 'constr', 'info', 'gov'] - for i, r in maz.iterrows(): - c = r[emp_cols] - maz.loc[i, emp_cols] = round_series_match_target(r[emp_cols], - r.emp_total, 0) - - maz['num_hh'] = maz['tothh'] - - mazi = tm1_tm2_maz_forecast_inputs.to_frame() - mazi_yr = str(year)[2:] - maz['gq_type_univ'] = mazi['gqpopu' + mazi_yr] - maz['gq_type_mil'] = mazi['gqpopm' + mazi_yr] - maz['gq_type_othnon'] = mazi['gqpopo' + mazi_yr] - maz['gq_tot_pop'] = maz['gq_type_univ'] + maz['gq_type_mil']\ - + maz['gq_type_othnon'] - maz['gqpop'] = maz['gq_tot_pop'] - maz = add_population_tm2(maz, year) - - maz['POP'] = maz.gq_tot_pop + maz.hhpop - maz['HH'] = maz.tothh.fillna(0) - - maz['RetEmp'] = maz.hotel + maz.ret_loc + maz.ret_reg + maz.eat - maz['ACRES'] = pcl.groupby('maz_id').acres.sum() - maz['residential_units'] = bldg_df.groupby('maz_id').\ - residential_units.sum() - maz['DUDen'] = maz.residential_units / maz.ACRES - maz['EmpDen'] = maz.emp_total / maz.ACRES - maz['RetEmpDen'] = maz.RetEmp / maz.ACRES - maz['PopDen'] = maz.POP / maz.ACRES - - maz['hh_size_1'] = maz.tothh.fillna(0) * mazi.shrs1_2010 - maz['hh_size_2'] = maz.tothh.fillna(0) * mazi.shrs2_2010 - maz['hh_size_3'] = maz.tothh.fillna(0) * mazi.shrs3_2010 - maz['hh_size_4_plus'] = maz.tothh.fillna(0) * mazi.shs4_2010 - rdf = tm1_tm2_regional_demographic_forecast.to_frame() - maz = adjust_hhsize(maz, year, rdf, tothh) - - taz2 = pd.DataFrame(index=tm2_taz2_forecast_inputs.index) - t2fi = tm2_taz2_forecast_inputs.to_frame() - taz2['hh'] = maz.groupby('TAZ').tothh.sum() - taz2['hh_inc_30'] = maz.groupby('TAZ').hhincq1.sum().fillna(0) - taz2['hh_inc_30_60'] = maz.groupby('TAZ').hhincq2.sum().fillna(0) - taz2['hh_inc_60_100'] = maz.groupby('TAZ').hhincq3.sum().fillna(0) - taz2['hh_inc_100_plus'] = maz.groupby('TAZ').hhincq4.sum().fillna(0) - - taz2['pop_hhsize1'] = maz.groupby('TAZ').hh_size_1.sum() - taz2['pop_hhsize2'] = maz.groupby('TAZ').hh_size_2.sum() * 2 - taz2['pop_hhsize3'] = maz.groupby('TAZ').hh_size_3.sum() * 3 - taz2['pop_hhsize4'] = maz.groupby('TAZ').hh_size_4_plus.sum() * 4.781329 - - taz2['pop'] = taz2.pop_hhsize1 + taz2.pop_hhsize2\ - + taz2.pop_hhsize3 + taz2.pop_hhsize4 - - taz2['hhpop'] = maz.groupby('TAZ').hhpop.sum() - taz2['county_name'] = maz.groupby('TAZ').county_name.first() - - taz2['pers_age_00_19'] = taz2['hhpop'] * t2fi.shra1_2010 - taz2['pers_age_20_34'] = taz2['hhpop'] * t2fi.shra2_2010 - taz2['pers_age_35_64'] = taz2['hhpop'] * t2fi.shra3_2010 - taz2['pers_age_65_plus'] = taz2['hhpop'] * t2fi.shra4_2010 - - taz2['hh_wrks_0'] = taz2['hh'] * t2fi.shrw0_2010 - taz2['hh_wrks_1'] = taz2['hh'] * t2fi.shrw1_2010 - taz2['hh_wrks_2'] = taz2['hh'] * t2fi.shrw2_2010 - taz2['hh_wrks_3_plus'] = taz2['hh'] * t2fi.shrw3_2010 - - taz2['hh_kids_no'] = taz2['hh'] * t2fi.shrn_2010 - taz2['hh_kids_yes'] = taz2['hh'] * t2fi.shry_2010 - taz2 = adjust_hhwkrs(taz2, year, rdf, tothh) - taz2 = adjust_page(taz2, year) - taz2 = adjust_hhkids(taz2, year, rdf, tothh) - taz2.index.name = 'TAZ2' - - county = pd.DataFrame(index=[[4, 5, 9, 7, 1, 2, 3, 6, 8]]) - - county['pop'] = maz.groupby('county_name').POP.sum() - - county[['hh_wrks_1', 'hh_wrks_2', 'hh_wrks_3_plus']] =\ - taz2.groupby('county_name').agg({'hh_wrks_1': 'sum', - 'hh_wrks_2': 'sum', - 'hh_wrks_3_plus': 'sum'}) - - county['workers'] = county.hh_wrks_1 + county.hh_wrks_2 * 2\ - + county.hh_wrks_3_plus * 3.474036 - - cef = tm2_occupation_shares.to_frame() - cef = cef.loc[cef.year == year].set_index('county_name') - county['pers_occ_management'] = county.workers * cef.shr_occ_management - county['pers_occ_management'] = round_series_match_target( - county['pers_occ_management'], np.round( - county['pers_occ_management'].sum()), 0) - county['pers_occ_professional'] = county.workers *\ - cef.shr_occ_professional - county['pers_occ_professional'] = round_series_match_target( - county['pers_occ_professional'], np.round( - county['pers_occ_professional'].sum()), 0) - county['pers_occ_services'] = county.workers * cef.shr_occ_services - county['pers_occ_services'] = round_series_match_target( - county['pers_occ_services'], np.round( - county['pers_occ_services'].sum()), 0) - county['pers_occ_retail'] = county.workers * cef.shr_occ_retail - county['pers_occ_retail'] = round_series_match_target( - county['pers_occ_retail'], np.round( - county['pers_occ_retail'].sum()), 0) - county['pers_occ_manual'] = county.workers * cef.shr_occ_manual - county['pers_occ_manual'] = round_series_match_target( - county['pers_occ_manual'], np.round( - county['pers_occ_manual'].sum()), 0) - county['pers_occ_military'] = county.workers * cef.shr_occ_military - county['pers_occ_military'] = round_series_match_target( - county['pers_occ_military'], np.round( - county['pers_occ_military'].sum()), 0) - - county['gq_tot_pop'] = maz.groupby('county_name').gq_tot_pop.sum() - - maz[['HH', 'POP', 'emp_total', 'ag', 'natres', 'logis', - 'man_bio', 'man_hvy', 'man_lgt', 'man_tech', - 'transp', 'util', 'eat', 'hotel', - 'ret_loc', 'ret_reg', 'fire', 'lease', - 'prof', 'serv_bus', 'art_rec', 'ed_high', - 'ed_k12', 'ed_oth', 'health', 'serv_per', - 'serv_soc', 'constr', 'info', 'gov', - 'DUDen', 'EmpDen', 'PopDen', 'RetEmpDen']].fillna(0).to_csv( - os.path.join(orca.get_injectable("outputs_dir"), "run{}_maz_summaries_{}.csv".format(run_number, year))) - - maz[['num_hh', 'hh_size_1', 'hh_size_2', - 'hh_size_3', 'hh_size_4_plus', 'gq_tot_pop', - 'gq_type_univ', 'gq_type_mil', 'gq_type_othnon']].fillna(0).to_csv( - os.path.join(orca.get_injectable("outputs_dir"), "run{}_maz_marginals_{}.csv".format(run_number, year))) - - taz2[['hh_inc_30', 'hh_inc_30_60', - 'hh_inc_60_100', 'hh_inc_100_plus', - 'hh_wrks_0', 'hh_wrks_1', 'hh_wrks_2', - 'hh_wrks_3_plus', 'pers_age_00_19', - 'pers_age_20_34', 'pers_age_35_64', - 'pers_age_65_plus', 'hh_kids_no', - 'hh_kids_yes']].fillna(0).to_csv( - os.path.join(orca.get_injectable("outputs_dir"), "run{}_taz2_marginals_{}.csv".format(run_number, year))) - - county[['pers_occ_management', 'pers_occ_professional', - 'pers_occ_services', 'pers_occ_retail', - 'pers_occ_manual', 'pers_occ_military', - 'gq_tot_pop']].fillna(0).to_csv( - os.path.join(orca.get_injectable("outputs_dir"), "run{}_county_marginals_{}.csv".format(run_number, year))) - - -def scaled_ciacre(mtcc, us_outc): - zfi = zone_forecast_inputs() - abgc = zfi.ciacre10_abag - sim_difference = [us_outc - mtcc][0] - sim_difference[sim_difference < 0] = 0 - combined_acres = abgc + sim_difference - return combined_acres - - -def scaled_resacre(mtcr, us_outr): - zfi = zone_forecast_inputs() - abgr = zfi.resacre10_abag - sim_difference = [us_outr - mtcr][0] - sim_difference[sim_difference < 0] = 0 - combined_acres = abgr + sim_difference - return combined_acres - - -def zone_forecast_inputs(): - return pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), 'zone_forecasts/tm1_taz1_forecast_inputs.csv'), - index_col="zone_id") - - -def add_population(df, year, tm1_tm2_regional_controls): - rc = tm1_tm2_regional_controls - target = rc.totpop.loc[year] - df.gqpop.sum() - zfi = zone_forecast_inputs() - s = df.tothh * zfi.meanhhsize - - s = scale_by_target(s, target) # , .15 - - df["hhpop"] = round_series_match_target(s, target, 0) - df["hhpop"] = df.hhpop.fillna(0) - return df - - -def add_population_tm2(df, year, tm1_tm2_regional_controls): - rc = tm1_tm2_regional_controls - target = rc.totpop.loc[year] - df.gqpop.sum() - s = df.hhpop - s = scale_by_target(s, target, .15) - df["hhpop"] = round_series_match_target(s, target, 0) - df["hhpop"] = df.hhpop.fillna(0) - return df - - -# temporary function to balance hh while some parcels have -# unassigned MAZ -def add_households(df, tothh): - s = scale_by_target(df.tothh, tothh) # , .15 - - df["tothh"] = round_series_match_target(s, tothh, 0) - df["tothh"] = df.tothh.fillna(0) - return df - - -# add employemnt to the dataframe - this uses a regression with -# estimated coefficients done by @mkreilly - - -def add_employment(df, year, tm1_tm2_regional_controls): - - hhs_by_inc = df[["hhincq1", "hhincq2", "hhincq3", "hhincq4"]] - hh_shares = hhs_by_inc.divide(hhs_by_inc.sum(axis=1), axis="index") - - zfi = zone_forecast_inputs() - - empshare = 0.46381 * hh_shares.hhincq1 + 0.49361 * hh_shares.hhincq2 +\ - 0.56938 * hh_shares.hhincq3 + 0.29818 * hh_shares.hhincq4 +\ - zfi.zonal_emp_sh_resid10 - - # I really don't think more than 70% of people should be employed - # in a given zone - this also makes sure that the employed residents - # is less then the total population (after scaling) - if the - # assertion below is triggered you can fix it by reducing this - # .7 even a little bit more - empshare = empshare.fillna(0).clip(.3, .7) - - empres = empshare * df.totpop - - rc = tm1_tm2_regional_controls - target = rc.empres.loc[year] - - empres = scale_by_target(empres, target) - - df["empres"] = round_series_match_target(empres, target, 0) - - # this should really make the assertion below pass, but this now - # only occurs very infrequently - df["empres"] = df[["empres", "totpop"]].min(axis=1) - - # make sure employed residents is less than total residents - assert (df.empres <= df.totpop).all() - - return df - - -# add age categories necessary for the TM -def add_age_categories(df, year, tm1_tm2_regional_controls): - zfi = zone_forecast_inputs() - rc = tm1_tm2_regional_controls - - seed_matrix = zfi[["sh_age0004", "sh_age0519", "sh_age2044", - "sh_age4564", "sh_age65p"]].\ - mul(df.totpop, axis='index').as_matrix() - - row_marginals = df.totpop.values - agecols = ["age0004", "age0519", "age2044", "age4564", "age65p"] - col_marginals = rc[agecols].loc[year].values - - target = df.totpop.sum() - col_marginals = scale_by_target(pd.Series(col_marginals), - target).round().astype('int') - - seed_matrix[seed_matrix == 0] = .1 - seed_matrix[row_marginals == 0, :] = 0 - - mat = simple_ipf(seed_matrix, col_marginals, row_marginals) - agedf = pd.DataFrame(mat) - agedf.columns = [col.upper() for col in agecols] - agedf.index = zfi.index - for ind, row in agedf.iterrows(): - target = df.totpop.loc[ind] - row = row.round() - agedf.loc[ind] = round_series_match_target(row, target, 0) - - for col in agedf.columns: - df[col] = agedf[col] - - return df - - -def adjust_hhsize(df, year, rdf, total_hh): - col_marginals = (rdf.loc[rdf.year == year, - ['shrs1', 'shrs2', 'shrs3', - 'shrs4']] * total_hh).values[0] - row_marginals = df.hh.fillna(0).values - seed_matrix = np.round(df[['hh_size_1', 'hh_size_2', - 'hh_size_3', 'hh_size_4_plus']]).as_matrix() - - target = df.hh.sum() - col_marginals = scale_by_target(col_marginals, - target).round().astype('int') - - seed_matrix[seed_matrix == 0] = .1 - seed_matrix[row_marginals == 0, :] = 0 - - mat = simple_ipf(seed_matrix, col_marginals, row_marginals) - hhsizedf = pd.DataFrame(mat) - - hhsizedf.columns = ['hh_size_1', 'hh_size_2', - 'hh_size_3', 'hh_size_4_plus'] - hhsizedf.index = df.index - for ind, row in hhsizedf.iterrows(): - target = df.hh.loc[ind] - row = row.round() - hhsizedf.loc[ind] = round_series_match_target(row, target, 0) - - for col in hhsizedf.columns: - df[col] = hhsizedf[col] - - return df - - -def adjust_hhwkrs(df, year, rdf, total_hh): - col_marginals = (rdf.loc[rdf.year == year, - ['shrw0', 'shrw1', 'shrw2', - 'shrw3']] * total_hh).values[0] - row_marginals = df.hh.fillna(0).values - seed_matrix = np.round(df[['hh_wrks_0', 'hh_wrks_1', - 'hh_wrks_2', 'hh_wrks_3_plus']]).as_matrix() - - target = df.hh.sum() - col_marginals = scale_by_target(col_marginals, - target).round().astype('int') - - seed_matrix[seed_matrix == 0] = .1 - seed_matrix[row_marginals == 0, :] = 0 - - mat = simple_ipf(seed_matrix, col_marginals, row_marginals) - hhwkrdf = pd.DataFrame(mat) - - hhwkrdf.columns = ['hh_wrks_0', 'hh_wrks_1', 'hh_wrks_2', 'hh_wrks_3_plus'] - hhwkrdf.index = df.index - for ind, row in hhwkrdf.iterrows(): - target = df.hh.loc[ind] - row = row.round() - hhwkrdf.loc[ind] = round_series_match_target(row, target, 0) - - for col in hhwkrdf.columns: - df[col] = hhwkrdf[col] - - return df - - -def adjust_page(df, year, tm1_tm2_regional_controls): - rc = tm1_tm2_regional_controls - rc['age0019'] = rc.age0004 + rc.age0519 - col_marginals = rc.loc[year, - ['age0019', 'age2044', 'age4564', - 'age65p']] - row_marginals = df['hhpop'].fillna(0).values - seed_matrix = np.round(df[['pers_age_00_19', 'pers_age_20_34', - 'pers_age_35_64', - 'pers_age_65_plus']]).as_matrix() - - target = df['hhpop'].sum() - col_marginals = scale_by_target(col_marginals, - target).round().astype('int') - - seed_matrix[seed_matrix == 0] = .1 - seed_matrix[row_marginals == 0, :] = 0 - - mat = simple_ipf(seed_matrix, col_marginals, row_marginals) - pagedf = pd.DataFrame(mat) - - pagedf.columns = ['pers_age_00_19', 'pers_age_20_34', - 'pers_age_35_64', 'pers_age_65_plus'] - pagedf.index = df.index - for ind, row in pagedf.iterrows(): - target = np.round(df['hhpop'].loc[ind]) - row = row.round() - pagedf.loc[ind] = round_series_match_target(row, target, 0) - - for col in pagedf.columns: - df[col] = pagedf[col] - - return df - - -def adjust_hhkids(df, year, rdf, total_hh): - col_marginals = (rdf.loc[rdf.year == year, - ['shrn', 'shry']] * total_hh).values[0] - row_marginals = df.hh.fillna(0).values - seed_matrix = np.round(df[['hh_kids_no', 'hh_kids_yes']]).as_matrix() - - target = df.hh.sum() - col_marginals = scale_by_target(col_marginals, - target).round().astype('int') - - seed_matrix[seed_matrix == 0] = .1 - seed_matrix[row_marginals == 0, :] = 0 - - mat = simple_ipf(seed_matrix, col_marginals, row_marginals) - hhkidsdf = pd.DataFrame(mat) - - hhkidsdf.columns = ['hh_kids_no', 'hh_kids_yes'] - hhkidsdf.index = df.index - for ind, row in hhkidsdf.iterrows(): - target = np.round(df.hh.loc[ind]) - row = row.round() - hhkidsdf.loc[ind] = round_series_match_target(row, target, 0) - - for col in hhkidsdf.columns: - df[col] = hhkidsdf[col] - - return df - - -@orca.step() -def hazards_slr_summary(run_setup, run_number, year, households, jobs, parcels): - - if run_setup['run_slr']: - - destroy_parcels = orca.get_table("destroy_parcels") - if len(destroy_parcels) > 0: - - def write(s): - # print s - f.write(s + "\n\n") - - f = open(os.path.join(orca.get_injectable("outputs_dir"), "run%d_hazards_slr_%d.log" % - (run_number, year)), "w") - - n = len(destroy_parcels) - write("Number of impacted parcels = %d" % n) - - try: - slr_demolish_cum = orca.get_table("slr_demolish_cum").to_frame() - except Exception as e: - slr_demolish_cum = pd.DataFrame() - slr_demolish = orca.get_table("slr_demolish").to_frame() - slr_demolish_cum = slr_demolish.append(slr_demolish_cum) - orca.add_table("slr_demolish_cum", slr_demolish_cum) - - n = slr_demolish_cum['residential_units'].sum() - write("Number of impacted residential units = %d" % n) - n = slr_demolish_cum['building_sqft'].sum() - write("Number of impacted building sqft = %d" % n) - - # income quartile counts - try: - hh_unplaced_slr_cum = \ - orca.get_table("hh_unplaced_slr_cum").to_frame() - except Exception as e: - hh_unplaced_slr_cum = pd.DataFrame() - hh_unplaced_slr = orca.get_injectable("hh_unplaced_slr") - hh_unplaced_slr_cum = hh_unplaced_slr.append(hh_unplaced_slr_cum) - orca.add_table("hh_unplaced_slr_cum", hh_unplaced_slr_cum) - - write("Number of impacted households by type") - hs = pd.DataFrame(index=[0]) - hs['hhincq1'] = \ - (hh_unplaced_slr_cum["base_income_quartile"] == 1).sum() - hs['hhincq2'] = \ - (hh_unplaced_slr_cum["base_income_quartile"] == 2).sum() - hs['hhincq3'] = \ - (hh_unplaced_slr_cum["base_income_quartile"] == 3).sum() - hs['hhincq4'] = \ - (hh_unplaced_slr_cum["base_income_quartile"] == 4).sum() - hs.to_string(f, index=False) - - write("") - - # employees by sector - try: - jobs_unplaced_slr_cum = \ - orca.get_table("jobs_unplaced_slr_cum").to_frame() - except Exception as e: - jobs_unplaced_slr_cum = pd.DataFrame() - jobs_unplaced_slr = orca.get_injectable("jobs_unplaced_slr") - jobs_unplaced_slr_cum = jobs_unplaced_slr.append(jobs_unplaced_slr_cum) - orca.add_table("jobs_unplaced_slr_cum", jobs_unplaced_slr_cum) - - write("Number of impacted jobs by sector") - js = pd.DataFrame(index=[0]) - js['agrempn'] = (jobs_unplaced_slr_cum["empsix"] == 'AGREMPN').sum() - js['mwtempn'] = (jobs_unplaced_slr_cum["empsix"] == 'MWTEMPN').sum() - js['retempn'] = (jobs_unplaced_slr_cum["empsix"] == 'RETEMPN').sum() - js['fpsempn'] = (jobs_unplaced_slr_cum["empsix"] == 'FPSEMPN').sum() - js['herempn'] = (jobs_unplaced_slr_cum["empsix"] == 'HEREMPN').sum() - js['othempn'] = (jobs_unplaced_slr_cum["empsix"] == 'OTHEMPN').sum() - js.to_string(f, index=False) - - f.close() - - slr_demolish.to_csv(os.path.join(orca.get_injectable("outputs_dir"), - "run%d_hazards_slr_buildings_%d.csv" - % (run_number, year))) - - -@orca.step() -def hazards_eq_summary(run_setup, run_number, year, households, jobs, parcels, buildings, final_year): - - if run_setup['run_eq']: - if year == 2035: - - f = open(os.path.join(orca.get_injectable("outputs_dir"), "run%d_hazards_eq_%d.log" % - (run_number, year)), "w") - - def write(s): - # print s - f.write(s + "\n\n") - - write("Number of buildings with earthquake buildings codes") - code = orca.get_injectable("code") - code_counts = [[x, code.count(x)] for x in set(code)] - code_counts_df = pd.DataFrame(code_counts, - columns=['building_code', 'count']) - code_counts_df.to_string(f, index=False) - - write("") - - write("Number of buildings with fragility codes") - fragilities = orca.get_injectable("fragilities") - fragility_counts = [[x, fragilities.count(x)] - for x in set(fragilities)] - fragility_counts_df = pd.DataFrame(fragility_counts, - columns=['fragility_code', 'count']) - fragility_counts_df.to_string(f, index=False) - - write("") - - # buildings counts - eq_buildings = orca.get_injectable("eq_buildings") - n = len(eq_buildings) - write("Total number of buildings destroyed = %d" % n) - existing_buildings = orca.get_injectable("existing_buildings") - n = len(existing_buildings) - write("Number of existing buildings destroyed = %d" % n) - new_buildings = orca.get_injectable("new_buildings") - n = len(new_buildings) - write("Number of new buildings destroyed = %d" % n) - fire_buildings = orca.get_injectable("fire_buildings") - n = len(fire_buildings) - write("Number of buildings destroyed by fire = %d" % n) - - eq_demolish = orca.get_table("eq_demolish") - n = eq_demolish['residential_units'].sum() - write("Number of impacted residential units = %d" % n) - n = eq_demolish['building_sqft'].sum() - write("Number of impacted building sqft = %d" % n) - - # income quartile counts - write("Number of impacted households by type") - hh_unplaced_eq = orca.get_injectable("hh_unplaced_eq") - hh_summary = pd.DataFrame(index=[0]) - hh_summary['hhincq1'] = \ - (hh_unplaced_eq["base_income_quartile"] == 1).sum() - hh_summary['hhincq2'] = \ - (hh_unplaced_eq["base_income_quartile"] == 2).sum() - hh_summary['hhincq3'] = \ - (hh_unplaced_eq["base_income_quartile"] == 3).sum() - hh_summary['hhincq4'] = \ - (hh_unplaced_eq["base_income_quartile"] == 4).sum() - hh_summary.to_string(f, index=False) - - write("") - - # employees by sector - write("Number of impacted jobs by sector") - jobs_unplaced_eq = orca.get_injectable("jobs_unplaced_eq") - jobs_summary = pd.DataFrame(index=[0]) - jobs_summary['agrempn'] = \ - (jobs_unplaced_eq["empsix"] == 'AGREMPN').sum() - jobs_summary['mwtempn'] = \ - (jobs_unplaced_eq["empsix"] == 'MWTEMPN').sum() - jobs_summary['retempn'] = \ - (jobs_unplaced_eq["empsix"] == 'RETEMPN').sum() - jobs_summary['fpsempn'] = \ - (jobs_unplaced_eq["empsix"] == 'FPSEMPN').sum() - jobs_summary['herempn'] = \ - (jobs_unplaced_eq["empsix"] == 'HEREMPN').sum() - jobs_summary['othempn'] = \ - (jobs_unplaced_eq["empsix"] == 'OTHEMPN').sum() - jobs_summary.to_string(f, index=False) - - f.close() - - # print out demolished buildings - eq_demolish = eq_demolish.to_frame() - eq_demolish_taz = misc.reindex(parcels.zone_id, - eq_demolish.parcel_id) - eq_demolish['taz'] = eq_demolish_taz - eq_demolish['count'] = 1 - eq_demolish = eq_demolish.drop(['parcel_id', 'year_built', - 'redfin_sale_year'], axis=1) - eq_demolish = eq_demolish.groupby(['taz']).sum() - eq_demolish.to_csv(os.path.join(orca.get_injectable("outputs_dir"), - "run%d_hazards_eq_demolish_buildings_%d.csv" - % (run_number, year))) - - # print out retrofit buildings that were saved - if run_setup['eq_mitigation']: - retrofit_bldgs_tot = orca.get_table("retrofit_bldgs_tot") - retrofit_bldgs_tot = retrofit_bldgs_tot.to_frame() - retrofit_bldgs_tot_taz = misc.reindex(parcels.zone_id, - retrofit_bldgs_tot.geoid) - retrofit_bldgs_tot['taz'] = retrofit_bldgs_tot_taz - retrofit_bldgs_tot['count'] = 1 - retrofit_bldgs_tot = retrofit_bldgs_tot[[ - 'taz', 'residential_units', 'residential_sqft', - 'non_residential_sqft', 'building_sqft', 'stories', - 'redfin_sale_price', 'non_residential_rent', - 'deed_restricted_units', 'residential_price', 'count']] - retrofit_bldgs_tot = retrofit_bldgs_tot.groupby(['taz']).sum() - retrofit_bldgs_tot.\ - to_csv(os.path.join( - orca.get_injectable("outputs_dir"), "run%d_hazards_eq_retrofit_buildings_%d.csv" - % (run_number, year))) - - # print out buildings in 2030, 2035, and 2050 so Horizon team can compare - # building inventory by TAZ - if year in [2030, 2035, final_year] and eq: - buildings = buildings.to_frame() - buildings_taz = misc.reindex(parcels.zone_id, - buildings.parcel_id) - buildings['taz'] = buildings_taz - buildings['count'] = 1 - buildings = buildings[['taz', 'count', 'residential_units', - 'residential_sqft', 'non_residential_sqft', - 'building_sqft', 'stories', 'redfin_sale_price', - 'non_residential_rent', 'deed_restricted_units', - 'residential_price']] - buildings = buildings.groupby(['taz']).sum() - buildings.to_csv(os.path.join(orca.get_injectable("outputs_dir"), - "run%d_hazards_eq_buildings_list_%d.csv" - % (run_number, year))) - - -@orca.step() -def slack_report(year, base_year, slack_enabled, run_number, devproj_len, devproj_len_geomid, devproj_len_proc): - - if slack_enabled: - from slacker import Slacker - import socket - slack = Slacker(os.environ["SLACK_TOKEN"]) - host = socket.gethostname() - - if year == base_year: - dropped_devproj_geomid = devproj_len - devproj_len_geomid - dropped_devproj_proc = devproj_len_geomid - devproj_len_proc - slack.chat.post_message( - '#urbansim_sim_update', - 'Development projects for run %d on %s: %d to start, ' - '%d dropped by geom_id check, ' - '%d dropped by processing' - % (run_number, host, devproj_len, dropped_devproj_geomid, dropped_devproj_proc), as_user=True) - - unplaced_hh = orca.get_injectable("unplaced_hh") - if unplaced_hh > 0: - slack.chat.post_message( - '#urbansim_sim_update', - 'WARNING: unplaced households in %d for run %d on %s' - % (year, run_number, host), as_user=True) diff --git a/baus/summaries/affordable_housing_summaries.py b/baus/summaries/affordable_housing_summaries.py index 4bcb3cbaa..c38a4f338 100644 --- a/baus/summaries/affordable_housing_summaries.py +++ b/baus/summaries/affordable_housing_summaries.py @@ -6,14 +6,14 @@ from baus import datasources @orca.step() -def deed_restricted_units_summary(run_name, parcels, buildings, year, initial_summary_year, final_year, superdistricts_geography): +def deed_restricted_units_summary(run_name, parcels, buildings, year, initial_year, final_year, travel_model_zones): - if year != initial_summary_year and year != final_year: + if year != initial_year and year != final_year: return - # get buldings table and geography columns to tally deed restricted (dr) units - buildings = orca.merge_tables('buildings', [parcels, buildings], - columns=['juris', 'superdistrict', 'county', 'residential_units', + # get buildings table and geography columns to tally deed restricted (dr) units + buildings = orca.merge_tables('buildings', [parcels, buildings, travel_model_zones], + columns=['jurisdiction', 'superdistrict', 'county','residential_units', 'deed_restricted_units', 'preserved_units', 'inclusionary_units', 'subsidized_units', 'source']) # use the buildings "source" column to get dr units by source @@ -53,7 +53,7 @@ def deed_restricted_units_summary(run_name, parcels, buildings, year, initial_su "affordable_housing_summaries/{}_region_dr_summary_{}.csv").format(run_name, year)) #### geographic deed restricted units summary #### - geographies = ['juris', 'superdistrict', 'county'] + geographies = ['jurisdiction', 'superdistrict', 'county'] for geography in geographies: @@ -65,8 +65,8 @@ def deed_restricted_units_summary(run_name, parcels, buildings, year, initial_su # add superdistrict name if geography == 'superdistrict': - superdistricts_geography = superdistricts_geography.to_frame() - summary_table = summary_table.merge(superdistricts_geography[['name']], left_index=True, right_index=True) + tm_zones = travel_model_zones.to_frame() + summary_table = summary_table.merge(tm_zones[['superdistrict_name']], left_index=True, right_index=True) # add total dr units summary_table['total_dr_units'] = buildings.groupby(geography)["deed_restricted_units"].sum() @@ -90,7 +90,7 @@ def deed_restricted_units_summary(run_name, parcels, buildings, year, initial_su @orca.step() -def deed_restricted_units_growth_summary(year, initial_summary_year, final_year, run_name): +def deed_restricted_units_growth_summary(year, initial_year, final_year, run_name): if year != final_year: return @@ -100,10 +100,10 @@ def deed_restricted_units_growth_summary(year, initial_summary_year, final_year, for geography in geographies: # use 2015 as the base year - year1 = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "affordable_housing_summaries/%s_%s_dr_summary_%d.csv" % (run_name, geography, initial_summary_year))) + year1 = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "affordable_housing_summaries/%s_%s_dr_summary_%d.csv" % (run_name, geography, initial_year))) year2 = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "affordable_housing_summaries/%s_%s_dr_summary_%d.csv" % (run_name, geography, final_year))) - dr_growth = year1.merge(year2, on=geography, suffixes=("_"+str(initial_summary_year), "_"+str(final_year))) + dr_growth = year1.merge(year2, on=geography, suffixes=("_"+str(initial_year), "_"+str(final_year))) dr_growth["run_name"] = run_name @@ -112,15 +112,15 @@ def deed_restricted_units_growth_summary(year, initial_summary_year, final_year, for col in columns: # growth in units - dr_growth[col+'_growth'] = dr_growth[col+"_"+str(final_year)] - dr_growth[col+"_"+str(initial_summary_year)] + dr_growth[col+'_growth'] = dr_growth[col+"_"+str(final_year)] - dr_growth[col+"_"+str(initial_year)] # change in the regional share of units in the geography - dr_growth[col+"_"+str(initial_summary_year)+"_share"] = (round(dr_growth[col+"_"+str(initial_summary_year)] / - dr_growth[col+"_"+str(initial_summary_year)].sum(), 2)) + dr_growth[col+"_"+str(initial_year)+"_share"] = (round(dr_growth[col+"_"+str(initial_year)] / + dr_growth[col+"_"+str(initial_year)].sum(), 2)) dr_growth[col+"_"+str(final_year)+"_share"] = (round(dr_growth[col+"_"+str(final_year)] / dr_growth[col+"_"+str(final_year)].sum(), 2) ) dr_growth[col+'_share_change'] = (dr_growth[col+"_"+str(final_year)+"_share"] - - dr_growth[col+"_"+str(initial_summary_year)+"_share"]) + dr_growth[col+"_"+str(initial_year)+"_share"]) dr_growth = dr_growth.fillna(0) dr_growth.to_csv(os.path.join(orca.get_injectable("outputs_dir"), "affordable_housing_summaries/{}_{}_dr_growth.csv").format(run_name, geography)) \ No newline at end of file diff --git a/baus/summaries/core_summaries.py b/baus/summaries/core_summaries.py index cf31ceadd..faae13eaa 100644 --- a/baus/summaries/core_summaries.py +++ b/baus/summaries/core_summaries.py @@ -6,16 +6,18 @@ @orca.step() -def parcel_summary(run_name, parcels, buildings, households, jobs, year, initial_summary_year, interim_summary_year, final_year): +def parcel_summary(run_name, parcels, buildings, households, jobs, year, initial_year, interim_summary_year, final_year): - if year not in [initial_summary_year, interim_summary_year, final_year]: + if year not in [initial_year, interim_summary_year, final_year]: return + + df = parcels.to_frame(["parcel_id"]) - df = parcels.to_frame(["geom_id", "x", "y"]) # add building data for parcels building_df = orca.merge_tables('buildings', [parcels, buildings], columns=['parcel_id', 'residential_units', 'deed_restricted_units', 'preserved_units', 'inclusionary_units', 'subsidized_units', 'non_residential_sqft']) + for col in building_df.columns: if col == 'parcel_id': continue @@ -38,19 +40,17 @@ def parcel_summary(run_name, parcels, buildings, households, jobs, year, initial @orca.step() -def parcel_growth_summary(year, run_name, initial_summary_year, final_year): +def parcel_growth_summary(year, run_name, initial_year, final_year): if year != final_year: return df1 = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "core_summaries/%s_parcel_summary_%d.csv" % - (run_name, initial_summary_year)), index_col="parcel_id") + (run_name, initial_year)), index_col="parcel_id") df2 = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "core_summaries/%s_parcel_summary_%d.csv" % (run_name, final_year)), index_col="parcel_id") for col in df1.columns: - if col in ["geom_id", "x", "y"]: - continue # fill na with 0 otherwise it drops the parcel data during subtraction df1[col].fillna(0, inplace=True) @@ -62,9 +62,9 @@ def parcel_growth_summary(year, run_name, initial_summary_year, final_year): @orca.step() -def building_summary(run_name, parcels, buildings, year, initial_summary_year, final_year, interim_summary_year): +def building_summary(run_name, parcels, buildings, year, initial_year, final_year, interim_summary_year): - if year not in [initial_summary_year, interim_summary_year, final_year]: + if year not in [initial_year, interim_summary_year, final_year]: return df = orca.merge_tables('buildings', diff --git a/baus/summaries/geographic_summaries.py b/baus/summaries/geographic_summaries.py index 6960f6935..a19c521c0 100644 --- a/baus/summaries/geographic_summaries.py +++ b/baus/summaries/geographic_summaries.py @@ -7,20 +7,20 @@ @orca.step() -def geographic_summary(parcels, households, jobs, buildings, year, superdistricts_geography, - initial_summary_year, interim_summary_year, final_year, run_name): +def geographic_summary(parcels, households, jobs, buildings, year, travel_model_zones, + initial_year, interim_summary_year, final_year, run_name): - if year not in [initial_summary_year, interim_summary_year, final_year]: + if year not in [initial_year, interim_summary_year, final_year]: return households_df = orca.merge_tables('households', [parcels, buildings, households], - columns=['juris', 'superdistrict', 'county', 'subregion', 'base_income_quartile',]) + columns=['jurisdiction', 'superdistrict', 'county', 'subregion', 'base_income_quartile',]) jobs_df = orca.merge_tables('jobs', [parcels, buildings, jobs], - columns=['juris', 'superdistrict', 'county', 'subregion', 'empsix']) + columns=['jurisdiction', 'superdistrict', 'county', 'subregion', 'empsix']) buildings_df = orca.merge_tables('buildings', [parcels, buildings], - columns=['juris', 'superdistrict', 'county', 'subregion', 'building_type', + columns=['jurisdiction', 'superdistrict', 'county', 'subregion', 'building_type', 'residential_units', 'deed_restricted_units', 'non_residential_sqft']) #### summarize regional results #### @@ -48,21 +48,15 @@ def geographic_summary(parcels, households, jobs, buildings, year, superdistrict region.to_csv(os.path.join(orca.get_injectable("outputs_dir"), "geographic_summaries/{}_region_summary_{}.csv").format(run_name, year)) #### summarize by sub-regional geography #### - geographies = ['juris', 'superdistrict', 'county', 'subregion'] + geographies = ['jurisdiction', 'superdistrict', 'county', 'subregion'] for geography in geographies: - # remove rows with null geography- seen with "county" - buildings_df = buildings_df[~pd.isna(buildings_df[geography])] - households_df = households_df[~pd.isna(households_df[geography])] - jobs_df = jobs_df[~pd.isna(jobs_df[geography])] - summary_table = pd.DataFrame(index=buildings_df[geography].unique()) # add superdistrict name if geography == 'superdistrict': - superdistricts_geography = superdistricts_geography.to_frame() - summary_table = summary_table.merge(superdistricts_geography[['name']], left_index=True, right_index=True) + summary_table["superdistrict_name"] = travel_model_zones.to_frame().groupby("superdistrict").superdistrict_name.first() # households summary_table['tothh'] = households_df.groupby(geography).size() @@ -93,25 +87,25 @@ def geographic_summary(parcels, households, jobs, buildings, year, superdistrict @orca.step() -def geographic_growth_summary(year, final_year, initial_summary_year, run_name): +def geographic_growth_summary(year, final_year, initial_year, run_name): if year != final_year: return - geographies = ['region', 'juris', 'superdistrict', 'county', 'subregion'] + geographies = ['region', 'jurisdiction', 'superdistrict', 'county', 'subregion'] for geography in geographies: # use 2015 as the base year - year1 = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "geographic_summaries/%s_%s_summary_%d.csv" % (run_name, geography, initial_summary_year))) + year1 = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "geographic_summaries/%s_%s_summary_%d.csv" % (run_name, geography, initial_year))) year2 = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "geographic_summaries/%s_%s_summary_%d.csv" % (run_name, geography, final_year))) - geog_growth = year1.merge(year2, on=geography, suffixes=("_"+str(initial_summary_year), "_"+str(final_year))) + geog_growth = year1.merge(year2, on=geography, suffixes=("_"+str(initial_year), "_"+str(final_year))) geog_growth["run_name"] = run_name if geography == 'superdistrict': - geog_growth = geog_growth.rename(columns={"name_"+(str(initial_summary_year)): "name"}) + geog_growth = geog_growth.rename(columns={"name_"+(str(initial_year)): "name"}) geog_growth = geog_growth.drop(columns=["name_"+(str(final_year))]) columns = ['tothh', 'totemp', 'residential_units', 'deed_restricted_units', 'non_residential_sqft'] @@ -119,24 +113,24 @@ def geographic_growth_summary(year, final_year, initial_summary_year, run_name): for col in columns: # growth in households/jobs/etc. geog_growth[col+"_growth"] = (geog_growth[col+"_"+str(final_year)] - - geog_growth[col+"_"+str(initial_summary_year)]) + geog_growth[col+"_"+str(initial_year)]) # percent change in geography's households/jobs/etc. geog_growth[col+'_pct_change'] = (round((geog_growth[col+"_"+str(final_year)] / - geog_growth[col+"_"+str(initial_summary_year)] - 1) * 100, 2)) + geog_growth[col+"_"+str(initial_year)] - 1) * 100, 2)) # percent geography's growth of households/jobs/etc. of all regional growth in households/jobs/etc. geog_growth[col+'_pct_of_regional_growth'] = (round(((geog_growth[col+"_growth"]) / (geog_growth[col+"_"+str(final_year)].sum() - - geog_growth[col+"_"+str(initial_summary_year)].sum())) * 100, 2)) + geog_growth[col+"_"+str(initial_year)].sum())) * 100, 2)) # change in the regional share of households/jobs/etc. in the geography - geog_growth[col+"_"+str(initial_summary_year)+"_regional_share"] = (round(geog_growth[col+"_"+str(initial_summary_year)] / - geog_growth[col+"_"+str(initial_summary_year)].sum(), 2)) + geog_growth[col+"_"+str(initial_year)+"_regional_share"] = (round(geog_growth[col+"_"+str(initial_year)] / + geog_growth[col+"_"+str(initial_year)].sum(), 2)) geog_growth[col+"_"+str(final_year)+"_regional_share"] = (round(geog_growth[col+"_"+str(final_year)] / geog_growth[col+"_"+str(final_year)].sum(), 2)) geog_growth[col+'_regional_share_change'] = (geog_growth[col+"_"+str(final_year)+"_regional_share"] - - geog_growth[col+"_"+str(initial_summary_year)+"_regional_share"]) + geog_growth[col+"_"+str(initial_year)+"_regional_share"]) geog_growth = geog_growth.fillna(0) geog_growth.to_csv(os.path.join(orca.get_injectable("outputs_dir"), diff --git a/baus/summaries/metrics.py b/baus/summaries/metrics.py index c58cfa84b..50fe2ec5a 100644 --- a/baus/summaries/metrics.py +++ b/baus/summaries/metrics.py @@ -7,15 +7,16 @@ @orca.step() -def growth_geography_metrics(parcels, parcels_geography, buildings, households, jobs, year, - initial_summary_year, final_year, run_name): +def growth_geography_metrics(parcels, growth_geographies, buildings, households, jobs, year, + initial_year, final_year, run_name): - if year != initial_summary_year and year != final_year: + + if year != initial_year and year != final_year: return - households_df = orca.merge_tables('households', [parcels, buildings, households, parcels_geography], - columns=['income', 'base_income_quartile', 'gg_id', 'pda_id', 'tra_id', 'sesit_id']) - jobs_df = orca.merge_tables('jobs', [parcels, buildings, jobs, parcels_geography], + households_df = orca.merge_tables('households', [parcels, buildings, households, growth_geographies], + columns=['base_income_quartile', 'gg_id', 'pda_id', 'tra_id', 'sesit_id']) + jobs_df = orca.merge_tables('jobs', [parcels, buildings, jobs, growth_geographies], columns=['empsix', 'gg_id', 'pda_id', 'tra_id', 'sesit_id']) # intialize growth geographies summary table @@ -44,7 +45,7 @@ def growth_geography_metrics(parcels, parcels_geography, buildings, households, if year != final_year: return - year1 = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "metrics/%s_growth_geography_summary_%d.csv" % (run_name, initial_summary_year))) + year1 = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "metrics/%s_growth_geography_summary_%d.csv" % (run_name, initial_year))) year2 = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "metrics/%s_growth_geography_summary_%d.csv" % (run_name, final_year))) growth_geog_growth = pd.DataFrame(index=[0]) @@ -56,7 +57,7 @@ def growth_geography_metrics(parcels, parcels_geography, buildings, households, 'gg_jobs', 'pda_jobs', 'gg_non_pda_jobs', 'hra_jobs', 'tra_jobs', 'hra_tra_jobs'] for col in columns: - growth_geog_growth[col+"_"+str(initial_summary_year)] = year1[col] + growth_geog_growth[col+"_"+str(initial_year)] = year1[col] growth_geog_growth[col+"_"+str(final_year)] = year2[col] # growth in units growth_geog_growth[col+'_growth'] = year2[col] - year1[col] @@ -74,12 +75,12 @@ def growth_geography_metrics(parcels, parcels_geography, buildings, households, @orca.step() -def deed_restricted_units_metrics(parcels, buildings, year, initial_summary_year, final_year, parcels_geography, run_name): +def deed_restricted_units_metrics(parcels, buildings, year, initial_year, final_year, growth_geographies, run_name): - if year != initial_summary_year and year != final_year: + if year != initial_year and year != final_year: return - buildings_df = orca.merge_tables('buildings', [parcels, buildings, parcels_geography], + buildings_df = orca.merge_tables('buildings', [parcels, buildings, growth_geographies], columns=['residential_units', 'deed_restricted_units', 'gg_id', 'pda_id', 'tra_id', 'sesit_id', 'coc_id']) @@ -101,7 +102,7 @@ def deed_restricted_units_metrics(parcels, buildings, year, initial_summary_year if year != final_year: return - dr_units_summary_y1 = orca.get_table(("dr_units_summary_{}").format(initial_summary_year)).to_frame() + dr_units_summary_y1 = orca.get_table(("dr_units_summary_{}").format(initial_year)).to_frame() dr_units_summary_y2 = orca.get_table(("dr_units_summary_{}").format(final_year)).to_frame() dr_units_growth = pd.DataFrame(index=['total']) @@ -119,13 +120,13 @@ def deed_restricted_units_metrics(parcels, buildings, year, initial_summary_year @orca.step() -def household_income_metrics(year, initial_summary_year, final_year, parcels, buildings, households, - parcels_geography, run_name): +def household_income_metrics(year, initial_year, final_year, parcels, buildings, households, + growth_geographies, run_name): - if year != initial_summary_year and year != final_year: + if year != initial_year and year != final_year: return - hh_df = orca.merge_tables('households', [parcels, buildings, households, parcels_geography], + hh_df = orca.merge_tables('households', [parcels, buildings, households, growth_geographies], columns=['base_income_quartile', 'gg_id', 'pda_id', 'tra_id', 'sesit_id', 'coc_id']) ### low income households ### @@ -161,10 +162,10 @@ def household_income_metrics(year, initial_summary_year, final_year, parcels, bu "metrics/{}_household_income_metrics_{}.csv").format(run_name, year)) @orca.step() -def equity_metrics(year, initial_summary_year, final_year, parcels, buildings, households, parcel_tract_crosswalk, +def equity_metrics(year, initial_year, final_year, parcels, buildings, households, parcel_tract_crosswalk, displacement_risk_tracts, coc_tracts, run_name): - if year != initial_summary_year and year != final_year: + if year != initial_year and year != final_year: return hh_df = orca.merge_tables('households', [parcels, buildings, households], columns=['base_income_quartile']) @@ -198,12 +199,12 @@ def equity_metrics(year, initial_summary_year, final_year, parcels, buildings, h if year != final_year: return - dis_tract_hhs_y1 = orca.get_table(("dis_tract_hhs_{}").format(initial_summary_year)).to_frame() + dis_tract_hhs_y1 = orca.get_table(("dis_tract_hhs_{}").format(initial_year)).to_frame() dis_tract_hhs_y2 = orca.get_table(("dis_tract_hhs_{}").format(final_year)).to_frame() dis_tract_hhs_change = dis_tract_hhs_y1.merge(dis_tract_hhs_y2, left_index=True, right_index=True, suffixes=('_y1', '_y2')) dis_tract_hhs_change.name = 'dis_tracts' - coc_tract_hhs_y1 = orca.get_table(("coc_tract_hhs_{}").format(initial_summary_year)).to_frame() + coc_tract_hhs_y1 = orca.get_table(("coc_tract_hhs_{}").format(initial_year)).to_frame() coc_tract_hhs_y2 = orca.get_table(("coc_tract_hhs_{}").format(final_year)).to_frame() coc_tract_hhs_change = coc_tract_hhs_y1.merge(coc_tract_hhs_y2, left_index=True, right_index=True, suffixes=('_y1', '_y2')) coc_tract_hhs_change.name = 'coc_tracts' @@ -229,9 +230,9 @@ def equity_metrics(year, initial_summary_year, final_year, parcels, buildings, h @orca.step() -def jobs_housing_metrics(parcels, buildings, jobs, households, year, initial_summary_year, final_year, run_name): +def jobs_housing_metrics(parcels, buildings, jobs, households, year, initial_year, final_year, run_name): - if year == initial_summary_year or year == final_year: + if year == initial_year or year == final_year: jobs_df = orca.merge_tables('jobs', [parcels, buildings, jobs], columns=['empsix', 'county']) @@ -254,11 +255,11 @@ def jobs_housing_metrics(parcels, buildings, jobs, households, year, initial_sum @orca.step() -def jobs_metrics(year, parcels, buildings, jobs, parcels_geography, initial_summary_year, final_year, run_name): +def jobs_metrics(year, parcels, buildings, jobs, growth_geographies, initial_year, final_year, run_name): - if year == initial_summary_year or year == final_year: + if year == initial_year or year == final_year: - jobs_df = orca.merge_tables('jobs', [parcels, buildings, jobs, parcels_geography], columns=['empsix', 'ppa_id']) + jobs_df = orca.merge_tables('jobs', [parcels, buildings, jobs, growth_geographies], columns=['empsix', 'ppa_id']) jobs_summary = pd.DataFrame(index=['total']) jobs_summary['totemp'] = jobs_df.size @@ -269,7 +270,7 @@ def jobs_metrics(year, parcels, buildings, jobs, parcels_geography, initial_summ if year == final_year: # now calculate growth metrics - jobs_summary_y1 = orca.get_table(("jobs_summary_{}").format(initial_summary_year)).to_frame() + jobs_summary_y1 = orca.get_table(("jobs_summary_{}").format(initial_year)).to_frame() jobs_summary_y2 = orca.get_table(("jobs_summary_{}").format(final_year)).to_frame() # job growth @@ -285,7 +286,7 @@ def jobs_metrics(year, parcels, buildings, jobs, parcels_geography, initial_summ @orca.step() -def slr_metrics(run_setup, parcels, buildings, parcels_geography, slr_parcel_inundation, households, year, final_year, run_name): +def slr_metrics(run_setup, parcels, buildings, slr_inundation, households, year, final_year, run_name): # TODO (long-term): reconsider whether "2050 affected households" makes sense for the metric, # since there should be no HH on these parcels @@ -297,7 +298,7 @@ def slr_metrics(run_setup, parcels, buildings, parcels_geography, slr_parcel_inu return hh_df = orca.merge_tables('households', [parcels, buildings, households]) - hh_df = hh_df.merge(slr_parcel_inundation.to_frame(), on='parcel_id', how='left') + hh_df = hh_df.merge(slr_inundation.to_frame(), on='parcel_id', how='left') slr_metrics = pd.DataFrame(index=['total']) # households protected and unprotected from sea level rise @@ -326,7 +327,7 @@ def slr_metrics(run_setup, parcels, buildings, parcels_geography, slr_parcel_inu @orca.step() -def earthquake_metrics(run_setup, parcels_geography, buildings_w_eq_codes, eq_retrofit_lookup, households, year, final_year, run_name): +def earthquake_metrics(run_setup, growth_geographies, buildings_w_eq_codes, eq_retrofit_lookup, households, year, final_year, run_name): if not run_setup['run_eq']: return @@ -337,13 +338,13 @@ def earthquake_metrics(run_setup, parcels_geography, buildings_w_eq_codes, eq_re # TODO (long-term)- this seems to rely on a buildings -> earthquake code assignment from a certain run (which?) # it could easily use the buildings table from the model run instead (existing buildings + stochastic developer buildings) - parcels_geography = parcels_geography.to_frame() + growth_geographies = growth_geographies.to_frame() buildings_w_eq_codes = buildings_w_eq_codes.to_frame() eq_retrofit_lookup = eq_retrofit_lookup.to_frame() # select the buldings that were retrofit based on their eq code retrofit_buildings = buildings_w_eq_codes.merge(eq_retrofit_lookup, left="earthquake_code", right_on="building_eq_code", how ="inner") - retrofit_buildings = retrofit_buildings.merge(parcels_geography, on='parcel_id', how='left') + retrofit_buildings = retrofit_buildings.merge(growth_geographies, on='parcel_id', how='left') # get the total cost of the retrofit per buildings based on building's number of units retrofit_buildings['cost_retrofit_total'] = retrofit_buildings['residential_units'] * retrofit_buildings['cost_retrofit'] @@ -391,12 +392,12 @@ def wildfire_metrics(): @orca.step() -def greenfield_metrics(buildings, parcels, year, initial_summary_year, final_year, run_name): +def greenfield_metrics(buildings, parcels, year, initial_year, final_year, run_name): - if year != initial_summary_year and year != final_year: + if year != initial_year and year != final_year: return - # TODO (long-term)- update the urbanized area used, this uses "Urbanize_Footprint" shapefile joined to parcels + # TODO (long-term)- update the urbanized area used, this uses "Urbanized_Footprint" shapefile joined to parcels # most greenfield occurs in the baseyear here since the shapefile is older than the input data # so also update the start year for the metric buildings_uf_df = orca.merge_tables('buildings', [parcels, buildings], @@ -414,10 +415,10 @@ def greenfield_metrics(buildings, parcels, year, initial_summary_year, final_yea # this uses observed data (Vital Signs?) greenfield_metric["annual_greenfield_development_acres_2015"] = 6642/2 # this uses the model calculation - buildings_out_uf_2015 = orca.get_table(("buildings_outside_urban_footprint_{}").format(initial_summary_year)).to_frame() + buildings_out_uf_2015 = orca.get_table(("buildings_outside_urban_footprint_{}").format(initial_year)).to_frame() buildings_out_uf_2050 = orca.get_table(("buildings_outside_urban_footprint_{}").format(final_year)).to_frame() greenfield_metric["annual_greenfield_dev_acres_2050"] = (((buildings_out_uf_2050["acres"].sum() - buildings_out_uf_2015["acres"].sum()) / - (final_year - initial_summary_year))).round(0) + (final_year - initial_year))).round(0) greenfield_metric = greenfield_metric.transpose() greenfield_metric.to_csv(os.path.join(orca.get_injectable("outputs_dir"), "metrics/{}_greenfield_metric.csv").format(run_name)) \ No newline at end of file diff --git a/baus/summaries/travel_model_summaries.py b/baus/summaries/travel_model_summaries.py index d75d37684..4593fe51a 100644 --- a/baus/summaries/travel_model_summaries.py +++ b/baus/summaries/travel_model_summaries.py @@ -220,64 +220,51 @@ def adjust_hhkids(df, year, rdf, total_hh): ###################################################### @orca.step() -def taz1_summary(parcels, households, jobs, buildings, zones, maz, year, base_year_summary_taz, taz_geography, +def taz1_summary(parcels, households, jobs, buildings, travel_model_zones, year, base_year_summary_taz, tm1_taz1_forecast_inputs, tm1_tm2_maz_forecast_inputs, tm1_tm2_regional_demographic_forecast, - tm1_tm2_regional_controls, initial_summary_year, interim_summary_year, final_year, run_name): + tm1_tm2_regional_controls, initial_year, interim_summary_year, final_year, run_name): - if year not in [initial_summary_year, interim_summary_year, final_year]: + if year not in [initial_year, interim_summary_year, final_year]: return - # (1) add relevant geographies to TAZ summaries - taz_df = pd.DataFrame(index=zones.index) - taz_df["sd"] = taz_geography.superdistrict - taz_df["zone"] = zones.index - taz_df["county"] = taz_geography.county_name - - # create a zone_id for parcels to make sure it can get used in the merge bug mentioned below - parcels = parcels.to_frame() - parcels["zone_id_x"] = parcels.zone_id - orca.add_table('parcels', parcels) - parcels = orca.get_table("parcels") + # (1) add relevant geographies to TAZ1 summaries + taz_df = pd.DataFrame(index=travel_model_zones.to_frame().groupby("taz_tm1").taz_tm1.first()) + taz_df["sd"] = travel_model_zones.to_frame().groupby("taz_tm1").superdistrict.first() + taz_county = travel_model_zones.to_frame().merge(parcels.to_frame(columns=['parcel_id', 'county']), on='parcel_id') + taz_df["county"] = taz_county.groupby("taz_tm1").county.first() # (2) summarize households by TAZ1 - households_df = orca.merge_tables('households', [parcels, buildings, households], - columns=['zone_id', 'zone_id_x', 'base_income_quartile', - 'income', 'persons', 'maz_id']) - # merge_tables returns multiple zone_id_'s, but not the one we need - households_df["zone_id"] = households_df.zone_id_x + households_df = orca.merge_tables('households', [parcels, buildings, households, travel_model_zones], + columns=['base_income_quartile', 'persons', 'taz_tm1', 'maz_tm2']) def gethhcounts(filter): - return households_df.query(filter).groupby('zone_id').size() + return households_df.query(filter).groupby('taz_tm1').size() taz_df["hhincq1"] = gethhcounts("base_income_quartile == 1") taz_df["hhincq2"] = gethhcounts("base_income_quartile == 2") taz_df["hhincq3"] = gethhcounts("base_income_quartile == 3") taz_df["hhincq4"] = gethhcounts("base_income_quartile == 4") - taz_df["hhpop"] = households_df.groupby('zone_id').persons.sum() - taz_df["tothh"] = households_df.groupby('zone_id').size() + taz_df["hhpop"] = households_df.groupby('taz_tm1').persons.sum() + taz_df["tothh"] = households_df.groupby('taz_tm1').size() # (3) summarize jobs by TAZ1 - jobs_df = orca.merge_tables( - 'jobs', - [parcels, buildings, jobs], - columns=['zone_id', 'zone_id_x', 'empsix']) - # merge_tables returns multiple zone_id_'s, but not the one we need - jobs_df["zone_id"] = jobs_df.zone_id_x + jobs_df = orca.merge_tables('jobs', [parcels, buildings, jobs, travel_model_zones], columns=['empsix', 'taz_tm1']) def getsectorcounts(sector): - return jobs_df.query("empsix == '%s'" % sector).groupby('zone_id').size() + return jobs_df.query("empsix == '%s'" % sector).groupby('taz_tm1').size() taz_df["agrempn"] = getsectorcounts("AGREMPN") taz_df["fpsempn"] = getsectorcounts("FPSEMPN") taz_df["herempn"] = getsectorcounts("HEREMPN") taz_df["retempn"] = getsectorcounts("RETEMPN") taz_df["mwtempn"] = getsectorcounts("MWTEMPN") taz_df["othempn"] = getsectorcounts("OTHEMPN") - taz_df["totemp"] = jobs_df.groupby('zone_id').size() + taz_df["totemp"] = jobs_df.groupby('taz_tm1').size() # (4) add residenital units by TAZ1 - buildings_df = buildings.to_frame(['zone_id', 'building_type', 'residential_units']) - taz_df["res_units"] = buildings_df.groupby('zone_id').residential_units.sum() - taz_df["mfdu"] = buildings_df.query("building_type == 'HM' or building_type == 'MR'").groupby('zone_id').residential_units.sum() - taz_df["sfdu"] = buildings_df.query("building_type == 'HS' or building_type == 'HT'").groupby('zone_id').residential_units.sum() + buildings_df = orca.merge_tables('buildings', [parcels, buildings, travel_model_zones], + columns=['residential_units', 'building_type', 'taz_tm1']) + taz_df["res_units"] = buildings_df.groupby('taz_tm1').residential_units.sum() + taz_df["mfdu"] = buildings_df.query("building_type == 'HM' or building_type == 'MR'").groupby('taz_tm1').residential_units.sum() + taz_df["sfdu"] = buildings_df.query("building_type == 'HS' or building_type == 'HT'").groupby('taz_tm1').residential_units.sum() # (5) add variables from the taz forecast inputs zfi = tm1_taz1_forecast_inputs.to_frame() @@ -322,11 +309,9 @@ def getsectorcounts(sector): # (9) use maz forecast inputs to forecast group quarters mazi = tm1_tm2_maz_forecast_inputs.to_frame() mazi_yr = str(year)[2:] - maz = maz.to_frame(['taz1454']) - # fix for maz_id issue - households_df.maz_id = households_df.maz_id.fillna(213906) - maz["hhpop"] = households_df.groupby('maz_id').persons.sum() - maz["tothh"] = households_df.groupby('maz_id').size() + maz = maz.to_frame(['taz1454']) + maz["hhpop"] = households_df.groupby('tm2_maz').persons.sum() + maz["tothh"] = households_df.groupby('tm2_maz').size() maz = add_households(maz, taz_df.tothh.sum()) maz['gq_type_univ'] = mazi['gqpopu' + mazi_yr] maz['gq_type_mil'] = mazi['gqpopm' + mazi_yr] @@ -354,18 +339,18 @@ def count_acres_with_mask(mask): @orca.step() -def taz1_growth_summary(year, initial_summary_year, final_year, run_name): +def taz1_growth_summary(year, initial_year, final_year, run_name): if year != final_year: return # use 2015 as the base year - year1 = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "travel_model_summaries/%s_taz1_summary_%d.csv" % (run_name, initial_summary_year))) + year1 = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "travel_model_summaries/%s_taz1_summary_%d.csv" % (run_name, initial_year))) year2 = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "travel_model_summaries/%s_taz1_summary_%d.csv" % (run_name, final_year))) - taz_summary = year1.merge(year2, on='TAZ', suffixes=("_"+str(initial_summary_year), "_"+str(final_year))) - taz_summary = taz_summary.rename(columns={"SD_"+(str(initial_summary_year)): "SD", "COUNTY_"+(str(initial_summary_year)): "COUNTY", - "ZONE_"+(str(initial_summary_year)): "ZONE"}) + taz_summary = year1.merge(year2, on='TAZ', suffixes=("_"+str(initial_year), "_"+str(final_year))) + taz_summary = taz_summary.rename(columns={"SD_"+(str(initial_year)): "SD", "COUNTY_"+(str(initial_year)): "COUNTY", + "ZONE_"+(str(initial_year)): "ZONE"}) taz_summary = taz_summary.drop(columns=["SD_"+(str(final_year)), "COUNTY_"+(str(final_year)), "ZONE_"+(str(final_year))]) taz_summary["run_name"] = run_name @@ -375,43 +360,44 @@ def taz1_growth_summary(year, initial_summary_year, final_year, run_name): for col in columns: taz_summary[col+"_growth"] = (taz_summary[col+"_"+str(final_year)] - - taz_summary[col+"_"+str(initial_summary_year)]) + taz_summary[col+"_"+str(initial_year)]) # percent change in geography's households/jobs/etc. taz_summary[col+'_pct_change'] = (round((taz_summary[col+"_"+str(final_year)] / - taz_summary[col+"_"+str(initial_summary_year)] - 1) * 100, 2)) + taz_summary[col+"_"+str(initial_year)] - 1) * 100, 2)) # percent geography's growth of households/jobs/etc. of all regional growth in households/jobs/etc. taz_summary[col+'_pct_of_regional_growth'] = (round(((taz_summary[col+"_growth"]) / (taz_summary[col+"_"+str(final_year)].sum() - - taz_summary[col+"_"+str(initial_summary_year)].sum())) * 100, 2)) + taz_summary[col+"_"+str(initial_year)].sum())) * 100, 2)) - taz_summary[col+"_"+str(initial_summary_year)+"_share"] = (round(taz_summary[col+"_"+str(initial_summary_year)] / - taz_summary[col+"_"+str(initial_summary_year)].sum(), 2)) + taz_summary[col+"_"+str(initial_year)+"_share"] = (round(taz_summary[col+"_"+str(initial_year)] / + taz_summary[col+"_"+str(initial_year)].sum(), 2)) taz_summary[col+"_"+str(final_year)+"_share"] = (round(taz_summary[col+"_"+str(final_year)] / taz_summary[col+"_"+str(final_year)].sum(), 2) ) taz_summary[col+'_share_change'] = (taz_summary[col+"_"+str(final_year)+"_share"] - - taz_summary[col+"_"+str(initial_summary_year)+"_share"]) + taz_summary[col+"_"+str(initial_year)+"_share"]) taz_summary.fillna(0).to_csv(os.path.join(orca.get_injectable("outputs_dir"), "travel_model_summaries/{}_taz1_summary_growth.csv").format(run_name)) @orca.step() -def maz_marginals(parcels, households, buildings, maz, year, - tm1_tm2_maz_forecast_inputs, tm1_tm2_regional_demographic_forecast, initial_summary_year, +def maz_marginals(parcels, households, buildings, travel_model_zones, year, + tm1_tm2_maz_forecast_inputs, tm1_tm2_regional_demographic_forecast, initial_year, interim_summary_year, final_year, run_name): - if year not in [initial_summary_year, interim_summary_year, final_year]: + if year not in [initial_year, interim_summary_year, final_year]: return # (1) intiialize maz dataframe - maz_m = maz.to_frame(['TAZ', 'county_name']) + maz_m = pd.DataFrame(index=travel_model_zones.to_frame().groupby("maz_tm2").maz_tm2.first()) + maz_m['taz_tm2'] = travel_model_zones.to_frame().groupby("maz_tm2").taz_tm2.first() + maz_county = travel_model_zones.to_frame().merge(parcels.to_frame(columns=['parcel_id', 'county']), on='parcel_id') + maz_m["county"] = maz_county.groupby("maz_tm2").county.first() # (2) add households by MAZ - hh_df = orca.merge_tables('households', [parcels, buildings, households], columns=['maz_id']) - # apply fix to maz_id - hh_df.maz_id = hh_df.maz_id.fillna(213906) - maz_m["tothh"] = hh_df.groupby('maz_id').size() + hh_df = orca.merge_tables('households', [parcels, buildings, travel_model_zones, households], columns=['maz_tm2']) + maz_m["tothh"] = hh_df.groupby('maz_tm2').size() maz_m['tothh'] = maz_m.tothh.fillna(0) maz_m = add_households(maz_m, maz_m.tothh.sum()) @@ -438,38 +424,43 @@ def maz_marginals(parcels, households, buildings, maz, year, @orca.step() -def maz_summary(parcels, jobs, households, buildings, maz, year, tm2_emp27_employment_shares, - tm1_tm2_regional_controls, initial_summary_year, interim_summary_year, final_year, run_name): +def maz_summary(parcels, jobs, households, buildings, year, travel_model_zones, tm2_emp27_employment_shares, + tm1_tm2_regional_controls, initial_year, interim_summary_year, final_year, run_name): - if year not in [initial_summary_year, interim_summary_year, final_year]: + if year not in [initial_year, interim_summary_year, final_year]: return # (1) intiialize maz dataframe - maz_df = maz.to_frame(['TAZ', 'county_name']) + maz_df = pd.DataFrame(index=travel_model_zones.to_frame().groupby("maz_tm2").maz_tm2.first()) + maz_df['taz_tm2'] = travel_model_zones.to_frame().groupby("maz_tm2").taz_tm2.first() + parcel_df = travel_model_zones.to_frame().merge(parcels.to_frame(columns=['parcel_id', 'county', 'acres']), + on='parcel_id') + maz_df["county"] = parcel_df.groupby("taz_tm1").county.first() + buildings_df = travel_model_zones.to_frame().merge(buildings.to_frame(columns=['parcel_id', 'residential_units']), + on='parcel_id') # (2) get tothh from maz marginals dataframe maz_marginals_df = orca.get_table("maz_marginals_df").to_frame() maz_df['tothh'] = maz_marginals_df['tothh'] # (3) summarize household data by MAZ - hh_df = orca.merge_tables('households', [parcels, buildings, households], - columns=['persons', 'base_income_quartile', 'maz_id']) + hh_df = orca.merge_tables('households', [parcels, buildings, travel_model_zones, households], + columns=['persons', 'base_income_quartile', 'maz_tm2']) def gethhcounts(filter): - return hh_df.query(filter).groupby('maz_id').size() + return hh_df.query(filter).groupby('maz_tm2').size() maz_df["hhincq1"] = gethhcounts("base_income_quartile == 1") maz_df["hhincq2"] = gethhcounts("base_income_quartile == 2") maz_df["hhincq3"] = gethhcounts("base_income_quartile == 3") maz_df["hhincq4"] = gethhcounts("base_income_quartile == 4") # (4) summarize jobs by MAZ - jobs_df = orca.merge_tables('jobs', - [parcels, buildings, jobs], - columns=['maz_id', 'empsix']) + jobs_df = orca.merge_tables('jobs', [parcels, buildings, travel_model_zones, jobs], + columns=['maz_tm2', 'empsix']) # use the EMPSIX to EMP27 shares to disaggregate jobs tm2_emp27_employment_shares = tm2_emp27_employment_shares.to_frame() def getsectorcounts(empsix, empsh): - emp = jobs_df.query("empsix == '%s'" % empsix).groupby('maz_id').size() + emp = jobs_df.query("empsix == '%s'" % empsix).groupby('maz_tm2').size() return emp * tm2_emp27_employment_shares.loc[tm2_emp27_employment_shares.empsh == empsh, str(year)].values[0] maz_df["ag"] = getsectorcounts("AGREMPN", "ag") maz_df["natres"] = getsectorcounts("AGREMPN", "natres") @@ -498,7 +489,7 @@ def getsectorcounts(empsix, empsh): maz_df["ret_loc"] = getsectorcounts("RETEMPN", "ret_loc") maz_df["ret_reg"] = getsectorcounts("RETEMPN", "ret_reg") maz_df["eat"] = getsectorcounts("RETEMPN", "eat") - maz_df["emp_total"] = jobs_df.groupby('maz_id').size() + maz_df["emp_total"] = jobs_df.groupby('maz_tm2').size() maz_df = maz_df.fillna(0) emp_cols = ['ag', 'natres', 'logis', 'man_bio', 'man_hvy', 'man_lgt', 'man_tech', 'transp', 'util', 'eat', 'hotel', 'ret_loc', @@ -509,17 +500,15 @@ def getsectorcounts(empsix, empsh): maz_df.loc[i, emp_cols] = round_series_match_target(r[emp_cols], r.emp_total, 0) # (5) add population - maz_df["hhpop"] = hh_df.groupby('maz_id').persons.sum() + maz_df["hhpop"] = hh_df.groupby('maz_tm2').persons.sum() # use marginals dataframe to get gqpop maz_df['gqpop'] = maz_marginals_df['gq_tot_pop'] maz_df = add_population_tm2(maz_df, year, tm1_tm2_regional_controls.to_frame()) maz_df['pop'] = maz_df.gqpop + maz_df.hhpop # (6) add density variables - pcl_df = parcels.to_frame(['maz_id', 'acres']) - bldg_df = orca.merge_tables('buildings', [buildings, parcels], columns=['maz_id', 'residential_units']) - maz_df['ACRES'] = pcl_df.groupby('maz_id').acres.sum() - maz_df['residential_units'] = bldg_df.groupby('maz_id').residential_units.sum() + maz_df['ACRES'] = parcel_df.groupby('maz_tm2').acres.sum() + maz_df['residential_units'] = buildings_df.groupby('maz_tm2').residential_units.sum() maz_df['DUDen'] = maz_df.residential_units / maz_df.ACRES maz_df['EmpDen'] = maz_df.emp_total / maz_df.ACRES maz_df['RetEmp'] = maz_df.hotel + maz_df.ret_loc + maz_df.ret_reg + maz_df.eat @@ -532,18 +521,18 @@ def getsectorcounts(empsix, empsh): @orca.step() -def maz_growth_summary(year, initial_summary_year, final_year, run_name): +def maz_growth_summary(year, initial_year, final_year, run_name): if year != final_year: return # use 2015 as the base year - year1 = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "travel_model_summaries/%s_maz_summary_%d.csv" % (run_name, initial_summary_year))) + year1 = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "travel_model_summaries/%s_maz_summary_%d.csv" % (run_name, initial_year))) year2 = pd.read_csv(os.path.join(orca.get_injectable("outputs_dir"), "travel_model_summaries/%s_maz_summary_%d.csv" % (run_name, final_year))) - maz_summary = year1.merge(year2, on='MAZ', suffixes=("_"+str(initial_summary_year), "_"+str(final_year))) - maz_summary = maz_summary.rename(columns={"TAZ_"+(str(initial_summary_year)): "TAZ", "county_name_"+(str(initial_summary_year)): "county_name"}) - maz_summary = maz_summary.drop(columns=["TAZ_"+(str(final_year)), "county_name_"+(str(final_year))]) + maz_summary = year1.merge(year2, on='MAZ', suffixes=("_"+str(initial_year), "_"+str(final_year))) + maz_summary = maz_summary.rename(columns={"TAZ_"+(str(initial_year)): "TAZ", "county_"+(str(initial_year)): "county"}) + maz_summary = maz_summary.drop(columns=["TAZ_"+(str(final_year)), "county_"+(str(final_year))]) maz_summary["run_name"] = run_name @@ -551,23 +540,23 @@ def maz_growth_summary(year, initial_summary_year, final_year, run_name): for col in columns: - maz_summary[col+'_growth'] = maz_summary[col+"_"+str(final_year)] - maz_summary[col+"_"+str(initial_summary_year)] + maz_summary[col+'_growth'] = maz_summary[col+"_"+str(final_year)] - maz_summary[col+"_"+str(initial_year)] - maz_summary[col+"_"+str(initial_summary_year)+"_share"] = (round(maz_summary[col+"_"+str(initial_summary_year)] / - maz_summary[col+"_"+str(initial_summary_year)].sum(), 2)) + maz_summary[col+"_"+str(initial_year)+"_share"] = (round(maz_summary[col+"_"+str(initial_year)] / + maz_summary[col+"_"+str(initial_year)].sum(), 2)) maz_summary[col+"_"+str(final_year)+"_share"] = (round(maz_summary[col+"_"+str(final_year)] / maz_summary[col+"_"+str(final_year)].sum(), 2)) maz_summary[col+'_share_change'] = (maz_summary[col+"_"+str(final_year)+"_share"] - - maz_summary[col+"_"+str(initial_summary_year)+"_share"]) + maz_summary[col+"_"+str(initial_year)+"_share"]) maz_summary.fillna(0).to_csv(os.path.join(orca.get_injectable("outputs_dir"), "travel_model_summaries/{}_maz_summary_growth.csv").format(run_name)) @orca.step() def taz2_marginals(tm2_taz2_forecast_inputs, tm1_tm2_regional_demographic_forecast, tm1_tm2_regional_controls, - year, initial_summary_year, interim_summary_year, final_year, run_name): + year, initial_year, interim_summary_year, final_year, run_name): - if year not in [initial_summary_year, interim_summary_year, final_year]: + if year not in [initial_year, interim_summary_year, final_year]: return # (1) bring in taz2 dataframe @@ -576,18 +565,18 @@ def taz2_marginals(tm2_taz2_forecast_inputs, tm1_tm2_regional_demographic_foreca # (2) summarize maz vars for household income and population maz_summary_df = orca.get_table("maz_summary_df").to_frame() - taz2['county_name'] = maz_summary_df.groupby('TAZ').county_name.first() - taz2['tothh'] = maz_summary_df.groupby('TAZ').tothh.sum() - taz2['hh_inc_30'] = maz_summary_df.groupby('TAZ').hhincq1.sum().fillna(0) - taz2['hh_inc_30_60'] = maz_summary_df.groupby('TAZ').hhincq2.sum().fillna(0) - taz2['hh_inc_60_100'] = maz_summary_df.groupby('TAZ').hhincq3.sum().fillna(0) - taz2['hh_inc_100_plus'] = maz_summary_df.groupby('TAZ').hhincq4.sum().fillna(0) - taz2['hhpop'] = maz_summary_df.groupby('TAZ').hhpop.sum() + taz2['county'] = maz_summary_df.groupby('taz_tm2').county.first() + taz2['tothh'] = maz_summary_df.groupby('taz_tm2').tothh.sum() + taz2['hh_inc_30'] = maz_summary_df.groupby('taz_tm2').hhincq1.sum().fillna(0) + taz2['hh_inc_30_60'] = maz_summary_df.groupby('taz_tm2').hhincq2.sum().fillna(0) + taz2['hh_inc_60_100'] = maz_summary_df.groupby('taz_tm2').hhincq3.sum().fillna(0) + taz2['hh_inc_100_plus'] = maz_summary_df.groupby('taz_tm2').hhincq4.sum().fillna(0) + taz2['hhpop'] = maz_summary_df.groupby('taz_tm2').hhpop.sum() maz_marginals_df = orca.get_table("maz_marginals_df").to_frame() - taz2['pop_hhsize1'] = maz_marginals_df.groupby('TAZ').hh_size_1.sum() - taz2['pop_hhsize2'] = maz_marginals_df.groupby('TAZ').hh_size_2.sum() * 2 - taz2['pop_hhsize3'] = maz_marginals_df.groupby('TAZ').hh_size_3.sum() * 3 - taz2['pop_hhsize4'] = (maz_marginals_df.groupby('TAZ').hh_size_4_plus.sum() * 4.781329).round(0) + taz2['pop_hhsize1'] = maz_marginals_df.groupby('taz_tm2').hh_size_1.sum() + taz2['pop_hhsize2'] = maz_marginals_df.groupby('taz_tm2').hh_size_2.sum() * 2 + taz2['pop_hhsize3'] = maz_marginals_df.groupby('taz_tm2').hh_size_3.sum() * 3 + taz2['pop_hhsize4'] = (maz_marginals_df.groupby('taz_tm2').hh_size_4_plus.sum() * 4.781329).round(0) taz2['pop'] = taz2.pop_hhsize1 + taz2.pop_hhsize2 + taz2.pop_hhsize3 + taz2.pop_hhsize4 # (3a) add person age, household workers, and presence of children using taz2 forecast inputs @@ -615,29 +604,29 @@ def taz2_marginals(tm2_taz2_forecast_inputs, tm1_tm2_regional_demographic_foreca @orca.step() -def county_marginals(tm2_occupation_shares, year, initial_summary_year, +def county_marginals(tm2_occupation_shares, year, initial_year, interim_summary_year, final_year, run_name): - if year not in [initial_summary_year, interim_summary_year, final_year]: + if year not in [initial_year, interim_summary_year, final_year]: return maz = orca.get_table("maz_summary_df").to_frame() taz2 = orca.get_table("taz2_summary_df").to_frame() # (1) initialize county dataframe - county = pd.DataFrame(index=maz.county_name.unique()) + county = pd.DataFrame(index=maz.county.unique()) # (2) add population - county['gqpop'] = maz.groupby('county_name').gqpop.sum() - county['pop'] = maz.groupby('county_name').pop.sum() + county['gqpop'] = maz.groupby('county').gqpop.sum() + county['pop'] = maz.groupby('county').pop.sum() # (3) add occupations - county[['hh_wrks_1', 'hh_wrks_2', 'hh_wrks_3_plus']] = taz2.groupby('county_name').agg({'hh_wrks_1': 'sum', - 'hh_wrks_2': 'sum', - 'hh_wrks_3_plus': 'sum'}) + county[['hh_wrks_1', 'hh_wrks_2', 'hh_wrks_3_plus']] = taz2.groupby('county').agg({'hh_wrks_1': 'sum', + 'hh_wrks_2': 'sum', + 'hh_wrks_3_plus': 'sum'}) county['workers'] = (county.hh_wrks_1 + county.hh_wrks_2 * 2 + county.hh_wrks_3_plus * 3.474036).round(0) cef = tm2_occupation_shares.to_frame() - cef = cef.loc[cef.year == year].set_index('county_name') + cef = cef.loc[cef.year == year].set_index('county') county['pers_occ_management'] = county.workers * cef.shr_occ_management county['pers_occ_management'] = round_series_match_target(county['pers_occ_management'], np.round(county['pers_occ_management'].sum()), 0) county['pers_occ_professional'] = county.workers * cef.shr_occ_professional @@ -655,9 +644,9 @@ def county_marginals(tm2_occupation_shares, year, initial_summary_year, @orca.step() -def region_marginals(year, initial_summary_year, interim_summary_year, final_year, run_name): +def region_marginals(year, initial_year, interim_summary_year, final_year, run_name): - if year not in [initial_summary_year, interim_summary_year, final_year]: + if year not in [initial_year, interim_summary_year, final_year]: return # (1) get group quarters from MAZ summaries diff --git a/baus/tests/validation.py b/baus/tests/validation.py index be49b4931..118ed6ce7 100644 --- a/baus/tests/validation.py +++ b/baus/tests/validation.py @@ -29,19 +29,19 @@ def check_household_controls(households, household_controls, year): assert_series_equal( current_household_controls, - households.base_income_quartile.value_counts() + households.hh_inc_cat1.value_counts() ) # make sure the employment controls are currently being matched -def check_job_controls(jobs, employment_controls, year, mapping): +def check_job_controls(jobs, employment_controls, year, developer_settings): print("Check job controls") current_employment_controls = employment_controls.local.loc[year] current_employment_controls = current_employment_controls.\ set_index("empsix_id").number_of_jobs - empsix_map = mapping["empsix_name_to_id"] - current_counts = jobs.empsix.map(empsix_map).value_counts() + empsix_map = developer_settings["empsix_name_to_id"] + current_counts = jobs.emp6_cat.map(empsix_map).value_counts() assert_series_equal( current_employment_controls, @@ -49,28 +49,6 @@ def check_job_controls(jobs, employment_controls, year, mapping): ) -def check_residential_units(residential_units, buildings): - print("Check residential units") - - # assert we fanned out the residential units correctly - assert len(residential_units) == buildings.residential_units.sum() - - # make sure the unit counts per building add up - assert_series_equal( - buildings.residential_units[ - buildings.residential_units > 0].sort_index(), - residential_units.building_id.value_counts().sort_index() - ) - - # make sure we moved deed restricted units to the res units table correctly - assert_series_equal( - buildings.deed_restricted_units[ - buildings.residential_units > 0].sort_index(), - residential_units.deed_restricted.groupby( - residential_units.building_id).sum().sort_index() - ) - - # make sure everyone gets a house - this might not exist in the real world, # but due to the nature of control totals it exists here def check_no_unplaced_households(households, year): @@ -91,28 +69,18 @@ def check_no_overfull_buildings(buildings): # assert True not in (buildings.vacant_job_spaces < 0).value_counts() -# households have both unit ids and building ids - make sure they're in sync -def check_unit_ids_match_building_ids(households, residential_units): - print("Check unit ids and building ids match") - building_ids = misc.reindex( - residential_units.building_id, households.unit_id) - assert_series_equal(building_ids, households.building_id, 25000) - - @orca.step() def simulation_validation(buildings, households, jobs, residential_units, year, - household_controls, employment_controls, mapping): + household_controls, employment_controls, developer_settings): - check_job_controls(jobs, employment_controls, year, mapping) + check_job_controls(jobs, employment_controls, year, developer_settings) check_household_controls(households, household_controls, year) - check_residential_units(residential_units, buildings) - check_no_unplaced_households(households, year) check_no_unplaced_jobs(jobs, year) -# check_no_overfull_buildings(households, buildings) + check_no_overfull_buildings(households, buildings) - check_unit_ids_match_building_ids(households, residential_units) \ No newline at end of file + return \ No newline at end of file diff --git a/baus/variables.py b/baus/variables.py index ec09ca521..e78d5d059 100644 --- a/baus/variables.py +++ b/baus/variables.py @@ -84,7 +84,22 @@ def naics(jobs): @orca.column('jobs', cache=True) def empsix_id(jobs): - return jobs.empsix + return jobs.emp6_cat + + +@orca.column('jobs', cache=True) +def empsix(jobs): + return jobs.emp6_cat + + +##################### +# HOUSEHOLDS VARIABLES +##################### + + +@orca.column('households', cache=True) +def base_income_quartile(households): + return households.hh_inc_cat1 ############################# @@ -331,10 +346,6 @@ def retail_ratio(nodes): # PARCELS VARIABLES ##################### -@orca.column('parcels') -def maz_id(parcels, travel_model_zones): - return travel_model_zones.maz_tm2.reindex(parcels.index) - @orca.column("parcels") def residential_sales_price_sqft(parcel_sales_price_sqft_func): @@ -421,87 +432,65 @@ def fees_per_sqft(parcels, run_setup): @orca.column('parcels', cache=True) -def pda_id(parcels, parcels_geography): - return parcels_geography.pda_id.reindex(parcels.index) - - -@orca.column('parcels', cache=True) -def cat_id(parcels, parcels_geography): - return parcels_geography.cat_id.reindex(parcels.index) - - -@orca.column('parcels', cache=True) -def tra_id(parcels, parcels_geography): - return parcels_geography.tra_id.reindex(parcels.index) - - -@orca.column('parcels', cache=True) -def juris_tra(parcels, parcels_geography): - return parcels_geography.juris_tra.reindex(parcels.index) +def pda_id(growth_geographies): + return growth_geographies.pda_id @orca.column('parcels', cache=True) -def sesit_id(parcels, parcels_geography): - return parcels_geography.sesit_id.reindex(parcels.index) +def cat_id(growth_geographies): + return growth_geographies.cat_id @orca.column('parcels', cache=True) -def juris_sesit(parcels, parcels_geography): - return parcels_geography.juris_sesit.reindex(parcels.index) +def tra_id(growth_geographies): + return growth_geographies.tra_id @orca.column('parcels', cache=True) -def ppa_id(parcels, parcels_geography): - return parcels_geography.ppa_id.reindex(parcels.index) +def juris_tra(growth_geographies): + return growth_geographies.juris_tra @orca.column('parcels', cache=True) -def juris_ppa(parcels, parcels_geography): - return parcels_geography.juris_ppa.reindex(parcels.index) +def sesit_id(growth_geographies): + return growth_geographies.sesit_id @orca.column('parcels', cache=True) -def coc_id(parcels, parcels_geography): - return parcels_geography.coc_id.reindex(parcels.index) +def juris_sesit(growth_geographies): + return growth_geographies.juris_sesit @orca.column('parcels', cache=True) -def juris_coc(parcels, parcels_geography): - return parcels_geography.juris_coc.reindex(parcels.index) +def ppa_id(growth_geographies): + return growth_geographies.ppa_id @orca.column('parcels', cache=True) -def superdistrict(parcels, travel_model_zones): - return misc.reindex(travel_model_zones.superdistrict, parcels.parcel_id) +def juris_ppa(growth_geographies): + return growth_geographies.juris_ppa @orca.column('parcels', cache=True) -def subregion(parcels, taz_geography): - return misc.reindex(taz_geography.subregion, parcels.zone_id) +def coc_id(growth_geographies): + return growth_geographies.coc_id -# perffoot is a dummy indicating the FOOTprint for the PERFormance targets @orca.column('parcels', cache=True) -def urban_footprint(parcels, parcels_geography): - return parcels_geography.perffoot.reindex(parcels.index) +def juris_coc(growth_geographies): + return growth_geographies.juris_coc -# perfzone is a dummy for geography for a performance target @orca.column('parcels', cache=True) -def performance_zone(parcels, parcels_geography): - return parcels_geography.perfarea.reindex(parcels.index) +def superdistrict(travel_model_zones): + return travel_model_zones.superdistrict # urbanized is a dummy for urbanized area, sourced from shapefile at: # M:\urban_modeling\data\LandUse\landuse_raw\urban_footprint_2009 @orca.column('parcels', cache=True) -def urbanized(parcels, parcels_geography): - return parcels_geography.urbanized.reindex(parcels.index) - - -@orca.column('parcels', cache=True) -def juris(parcels, parcels_geography): - return parcels_geography.juris_name +def urbanized(parcels, growth_geographies): + return growth_geographies.urbanized.reindex(parcels.index) @orca.column('parcels', cache=True) @@ -777,11 +766,6 @@ def land_cost(parcels): return parcels.building_purchase_price + parcels.parcel_size * s -@orca.column('parcels', cache=True) -def county(parcels): - return parcels.county - - @orca.column('parcels', cache=True) def cost_shifters(parcels, cost_shifters): return parcels.county.map(cost_shifters["cost_shifters"]) @@ -809,8 +793,8 @@ def tmnode_id(parcels, net): @orca.column('parcels', cache=True) -def subregion(travel_model_zones, parcels): - return misc.reindex(travel_model_zones.subregion, parcels.parcel_id) +def subregion(travel_model_zones): + return travel_model_zones.subregion @orca.column('parcels', cache=True) @@ -924,9 +908,10 @@ def ave_unit_sqft(buildings): return buildings.sqft_per_unit.groupby(buildings.zone_id).quantile(.6) -GROSS_AVE_UNIT_SIZE = 1000.0 -PARCEL_USE_EFFICIENCY = .8 -HEIGHT_PER_STORY = 12.0 +@orca.column('parcels') +def shape_area(parcels): + return parcels.acres + ################################### # Zoning Capacity Variables @@ -947,13 +932,7 @@ def zoned_du_vacant(parcels, parcels_zoning_calculations): @orca.column('parcels_zoning_calculations', cache=True) def effective_max_dua(zoning_existing, parcels): - max_dua_from_far = zoning_existing.max_far * 43560 / GROSS_AVE_UNIT_SIZE - - max_far_from_height = (zoning_existing.max_height / HEIGHT_PER_STORY) * PARCEL_USE_EFFICIENCY - - max_dua_from_height = max_far_from_height * 43560 / GROSS_AVE_UNIT_SIZE - - s = pd.concat([zoning_existing.max_dua, max_dua_from_far, max_dua_from_height], axis=1).min(axis=1) + s = zoning_existing.max_dua # take the max dua IFF the upzone value is greater than the current value # i.e. don't let the upzoning operation accidentally downzone @@ -967,10 +946,7 @@ def effective_max_dua(zoning_existing, parcels): strategy_min_dua = orca.get_table("zoning_strategy").dua_down - s = pd.concat([ - s, - strategy_min_dua - ], axis=1).min(axis=1) + s = pd.concat([s, strategy_min_dua], axis=1).min(axis=1) s3 = parcel_is_allowed('residential') @@ -980,9 +956,7 @@ def effective_max_dua(zoning_existing, parcels): @orca.column('parcels_zoning_calculations', cache=True) def effective_max_far(zoning_existing, parcels): - max_far_from_height = (zoning_existing.max_height / HEIGHT_PER_STORY) * PARCEL_USE_EFFICIENCY - - s = pd.concat([zoning_existing.max_far, max_far_from_height], axis=1).min(axis=1) + s = zoning_existing.max_far # take the max far IFF the upzone value is greater than the current value # i.e. don't let the upzoning operation accidentally downzone @@ -1020,7 +994,7 @@ def zoned_du_underbuild(parcels, parcels_zoning_calculations): s = (parcels_zoning_calculations.zoned_du - parcels.total_residential_units - parcels.total_non_residential_sqft / - GROSS_AVE_UNIT_SIZE).clip(lower=0) + 1000).clip(lower=0) # gross ave unit size hardcoded as 1000 ratio = (s / parcels.total_residential_units).replace(np.inf, 1) # if the ratio of additional units to existing units is not at least .5 # we don't build it - I mean we're not turning a 10 story building into an diff --git a/configs/developer/developer_settings.yaml b/configs/developer/developer_settings.yaml index 2b8f3c62a..44c91f7ef 100644 --- a/configs/developer/developer_settings.yaml +++ b/configs/developer/developer_settings.yaml @@ -99,7 +99,6 @@ building_type_map: # in the building table - in the long run, the developer # forms and the building types should be the same and the # developer model should account for the differences. -# travel_model_ variables are for the travel_model_summary step. form_to_btype: residential: - HS @@ -179,5 +178,5 @@ empsix_name_to_id: OTHEMPN: 6 -# convert square meters to square feet -parcel_size_factor: 10.764 \ No newline at end of file +# convert acres to square feet +parcel_size_factor: 43560 \ No newline at end of file From a83f3c06346bc5845d2e903c9b5fd301da0a8f3a Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Tue, 31 Oct 2023 10:48:12 -0700 Subject: [PATCH 46/49] tm summary fix --- baus/summaries/travel_model_summaries.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/baus/summaries/travel_model_summaries.py b/baus/summaries/travel_model_summaries.py index 4593fe51a..7ebc876ee 100644 --- a/baus/summaries/travel_model_summaries.py +++ b/baus/summaries/travel_model_summaries.py @@ -309,23 +309,25 @@ def getsectorcounts(sector): # (9) use maz forecast inputs to forecast group quarters mazi = tm1_tm2_maz_forecast_inputs.to_frame() mazi_yr = str(year)[2:] - maz = maz.to_frame(['taz1454']) - maz["hhpop"] = households_df.groupby('tm2_maz').persons.sum() - maz["tothh"] = households_df.groupby('tm2_maz').size() + maz = pd.DataFrame(index=travel_model_zones.to_frame().groupby("maz_tm2").maz_tm2.first()) + maz['taz_tm1'] = travel_model_zones.to_frame().groupby("maz_tm2").taz_tm1.first() + maz["hhpop"] = households_df.groupby('maz_tm2').persons.sum() + maz["tothh"] = households_df.groupby('maz_tm2').size() maz = add_households(maz, taz_df.tothh.sum()) maz['gq_type_univ'] = mazi['gqpopu' + mazi_yr] maz['gq_type_mil'] = mazi['gqpopm' + mazi_yr] maz['gq_type_othnon'] = mazi['gqpopo' + mazi_yr] maz['gq_tot_pop'] = maz['gq_type_univ'] + maz['gq_type_mil'] + maz['gq_type_othnon'] - taz_df['gq_type_univ'] = maz.groupby('taz1454').gq_type_univ.sum().fillna(0) - taz_df['gq_type_mil'] = maz.groupby('taz1454').gq_type_mil.sum().fillna(0) - taz_df['gq_type_othnon'] = maz.groupby('taz1454').gq_type_othnon.sum().fillna(0) - taz_df['gq_tot_pop'] = maz.groupby('taz1454').gq_tot_pop.sum().fillna(0) + taz_df['gq_type_univ'] = maz.groupby('taz_tm1').gq_type_univ.sum().fillna(0) + taz_df['gq_type_mil'] = maz.groupby('taz_tm1').gq_type_mil.sum().fillna(0) + taz_df['gq_type_othnon'] = maz.groupby('taz_tm1').gq_type_othnon.sum().fillna(0) + taz_df['gq_tot_pop'] = maz.groupby('taz_tm1').gq_tot_pop.sum().fillna(0) # (10) add acreage variables def count_acres_with_mask(mask): - mask *= parcels.acres - return mask.groupby(parcels.zone_id).sum() + parcels_df = parcels.to_frame().merge(travel_model_zones.to_frame(), on='parcel_id') + mask *= parcels_df.acres + return mask.groupby(parcels_df.taz_tm1).sum() f = orca.get_injectable('parcel_first_building_type_is') taz_df["resacre_unweighted"] = count_acres_with_mask(f('residential') | f('mixedresidential')) taz_df["ciacre_unweighted"] = count_acres_with_mask(f('select_non_residential')) From 693e12792268a37d246497b309d3909b0edca84a Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Tue, 31 Oct 2023 10:49:09 -0700 Subject: [PATCH 47/49] add slr year argument --- baus/slr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baus/slr.py b/baus/slr.py index 25a813014..a14ab661f 100644 --- a/baus/slr.py +++ b/baus/slr.py @@ -10,7 +10,7 @@ @orca.step() -def slr_inundate(slr_progression, parcels, run_setup): +def slr_inundate(slr_progression, parcels, run_setup, year): # inundated parcels are all parcels at or below the SLR progression level in that year slr_progression = slr_progression.to_frame() From a6d0ad4357bacbfa0e3f15a051bee8644cff6fb2 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Tue, 31 Oct 2023 12:08:52 -0700 Subject: [PATCH 48/49] institutions updates --- baus/datasources.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/baus/datasources.py b/baus/datasources.py index f8f151a94..da1ee7271 100644 --- a/baus/datasources.py +++ b/baus/datasources.py @@ -659,8 +659,16 @@ def accessory_units(): @orca.table(cache=True) def nodev_sites(): - df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/nodev_sites.csv"), index_col="parcel_id") - return df.set_index("parcel_id") + df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/nodev_sites.csv"), + index_col="parcel_id") + return df + + +@orca.table(cache=True) +def institutions(): + df = pd.read_csv(os.path.join(orca.get_injectable("inputs_dir"), "basis_inputs/parcels_buildings_agents/institutions.csv"), + index_col="parcel_id") + return df # parcels-tract crosswalk that match the Urban Displacement Project census tract vintage @@ -704,4 +712,4 @@ def base_year_summary_taz(): orca.broadcast('buildings', 'jobs', cast_index=True, onto_on='building_id') orca.broadcast('growth_geographies', 'buildings', cast_index=True, onto_on='parcel_id') orca.broadcast('travel_model_zones', 'buildings', cast_index=True, onto_on='parcel_id') -orca.broadcast('tmnodes', 'buildings', cast_index=True, onto_on='tmnode_id') \ No newline at end of file +orca.broadcast('tmnodes', 'buildings', cast_index=True, onto_on='tmnode_id') From d1d1d2977ca047c5a85695d306893736ebf32d65 Mon Sep 17 00:00:00 2001 From: Elizabeth Theocharides Date: Tue, 31 Oct 2023 12:09:06 -0700 Subject: [PATCH 49/49] nodev updates --- baus/variables.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/baus/variables.py b/baus/variables.py index e78d5d059..3d2e72963 100644 --- a/baus/variables.py +++ b/baus/variables.py @@ -623,21 +623,23 @@ def total_non_residential_sqft(parcels, buildings): def nodev(parcels, nodev_sites, static_parcels): # start with nodev parcels: parcels where development is off-limits # the input table tells us what category of nodev the various entries are: - # protected open space, small single-family lots, etc. - nd = nodev_sites[nodev_sites["no_dev"] == 1].index + # protected open space, small single-family lots, etc. + nodev_sites = nodev_sites.to_frame() + nd = nodev_sites[nodev_sites["no_dev"] == 1].index.values # then add all static parcels: a subset of nodev parcels where # jobs and households don't relocate, including: # institutions (where job growth is handled separately) and sea level rise parcels - nd.append(static_parcels.index) + nd.append(static_parcels) # development projects and buildings less than 20 years old also become off limits in developer_settings.yaml return nd.reindex(parcels.index) @orca.injectable() -def static_parcels(parcels, nodev_sites): +def static_parcels(parcels, institutions): # start with insitutions # these are parcels where households and jobs don't move - static_parcels = nodev_sites[nodev_sites.institutions_flag == 1].index.values + institutions = institutions.to_frame() + static_parcels = institutions.index.values # add sea level rise parcels parcels = parcels.to_frame() static_parcels.append(parcels[parcels.slr_nodev ==1].index.values)