Skip to content

Commit

Permalink
updates to remove append and add concat for pandas
Browse files Browse the repository at this point in the history
  • Loading branch information
mhweber committed May 14, 2024
1 parent f4f0bdd commit 7ba0d12
Show file tree
Hide file tree
Showing 6 changed files with 70 additions and 50 deletions.
7 changes: 7 additions & 0 deletions ControlTable_StreamCat.csv
Original file line number Diff line number Diff line change
Expand Up @@ -402,3 +402,10 @@ WetnessIndex,Continuous,WetIndex,none,wetness_index.tif,,WetIndex,Mean,1,0,0,0,,
WWTPAll,Point,WWTPAllDens,none,WWTP_All_CWA_Active_2013_CONUS.shp,,WWTP,Density,1,0,0,0,,Yes,yes,,12/29/2021
WWTPMajor,Point,WWTPMajorDens,none,WWTP_Major_CWA_Active_2013_CONUS.shp,,WWTP,Density,1,0,0,0,,Yes,yes,,12/30/2021
WWTPMinor,Point,WWTPMinorDens,none,WWTP_Minor_CWA_Active_2013_CONUS.shp,,WWTP,Density,1,0,0,0,,Yes,yes,,12/30/2021
WetnessIndex,Continuous,WetIndex,none,wetness_index.tif,,WetIndex,Mean,1,0,0,0,,Yes,yes,Yes,7/4/2020
NPP_YrMean,Continuous,NPP_YrMean,none,E:\WorkingData\To_Be_Flow_Accumulated,,ClimTerms_2012_10,Mean,1,0,0,1,,No,No,No,5/6/2024
NPP,Continuous,NPP,none,E:\WorkingData\To_Be_Flow_Accumulated,,ClimTerms_2012_10,Mean,1,0,0,1,,No,No,No,5/6/2024
Precip_YrMean,Continuous,Precip_YrMean,none,E:\WorkingData\To_Be_Flow_Accumulated,,ClimTerms_2012_10,Mean,1,0,0,1,,No,No,No,5/6/2024
LST_YrMean,Continuous,LST_YrMean,none,E:\WorkingData\To_Be_Flow_Accumulated,,ClimTerms_2012_10,Mean,1,0,0,1,,No,No,No,5/6/2024
LST,Continuous,LST,none,E:\WorkingData\To_Be_Flow_Accumulated,,ClimTerms_2012_10,Mean,1,0,0,1,,No,No,No,5/6/2024
SNOW_YrMean,Continuous,SNOW_YrMean,none,E:\WorkingData\To_Be_Flow_Accumulated,,ClimTerms_2012_10,Mean,1,0,0,1,,No,No,No,5/6/2024
16 changes: 8 additions & 8 deletions MakeFinalTables.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import pandas as pd

from stream_cat_config import(
LOCAL_DIR,
# LOCAL_DIR,
FINAL_DIR,
ACCUM_DIR,
LENGTHS,
Expand Down Expand Up @@ -60,7 +60,7 @@ def build_stats(tbl, stats):
print(ctl.query("run == 1").MetricName.head())
#exit()

inputs = np.load(ACCUM_DIR + "vpu_inputs.npy", allow_pickle=True).item()
inputs = np.load(ACCUM_DIR + "/vpu_inputs.npy", allow_pickle=True).item()

runners = ctl.query("run == 1").groupby("Final_Table_Name")
tables = runners["FullTableName"].unique().to_dict()
Expand All @@ -72,7 +72,7 @@ def build_stats(tbl, stats):
for metric in metrics:
accumulated_file = OUT_DIR / fn.format(metric, vpu)
if not accumulated_file.exists():
missing.append(accumulated_file)
missing = pd.concat([missing,accumulated_file], axis=0, ignore_index=False)

if len(missing) > 0:
for miss in missing:
Expand Down Expand Up @@ -168,8 +168,8 @@ def build_stats(tbl, stats):
).replace("M3", "")
tbl[sum_col_cat] = tbl["Cat" + summary] / weighted_cat_area
tbl[sum_col_ws] = tbl["Ws" + summary] / weighted_ws_area
cat_sums.append(sum_col_cat)
ws_sums.append(sum_col_ws)
cat_sums = pd.concat([cat_sums,sum_col_cat], axis=0, ignore_index=False)
ws_sums = pd.concat([ws_sums,sum_col_ws], axis=0, ignore_index=False)
if table in ["RoadStreamCrossings", "CanalDensity"]:
tbl[cat_colname] = (
tbl.CatSum / weighted_cat_area * row.Conversion
Expand Down Expand Up @@ -209,10 +209,10 @@ def build_stats(tbl, stats):
for col in tbl.columns:
if "CatVALUE" in col and not "Up" in col:
tbl[col] = (tbl[col] * 1e-6) / weighted_cat_area * 100
catcols.append(col)
catcols = pd.concat([catcols,col], axis=0, ignore_index=False)
if "WsVALUE" in col:
tbl[col] = (tbl[col] * 1e-6) / weighted_ws_area * 100
wscols.append(col)
wscols = pd.concat([wscols,col], axis=0, ignore_index=False)
if metric_count == 0:
final = tbl[front_cols + catcols + wscols]
final.columns = front_cols + cat_named + ws_named
Expand Down Expand Up @@ -263,7 +263,7 @@ def build_stats(tbl, stats):
for vpu in states_dict[state]["VPUs"]:
vpu_tbl = pd.read_csv(FINAL_DIR / region_fn.format(table, vpu))
vpu_tbl.query("COMID in @keepers", inplace=True)
state_tbl = state_tbl.append(vpu_tbl)
state_tbl = pd.concat([state_tbl,vpu_tbl], axis=0, ignore_index=False)
state_tbl.to_csv(state_file, index=False)

# ZIP up every state as we write them out
Expand Down
29 changes: 21 additions & 8 deletions PartitionDownscaledResults.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,37 @@
COMID_VPU = pd.read_csv(lookupdir + 'COMID_HydroRegion.csv')

COMID_VPU.head()
COMID_VPU['VPU'].replace({4: '04', 5: '05', 6: '06', 7: '07', 8: '08',
11: '11', 12: '12', 13: '13', 14: '14', 15: '15',
16: '16', 17: '17', 18: '18'}, inplace=True)

# array of unique VPUs
VPU = COMID_VPU['VPU'].unique()

# Nutrient file
nut_dir = 'O:/PRIV/CPHEA/PESD/COR/CORFILES/Geospatial_Library_Projects/StreamCat/NutrientInventory/Inputs/'
nut = pd.read_csv(nut_dir + 'COMID_Scaled_AgVars.csv')

#nut_dir = 'O:/PRIV/CPHEA/PESD/COR/CORFILES/Geospatial_Library_Projects/StreamCat/NutrientInventory/Inputs/'
nut_dir = 'E:/WorkingData/To_Be_Flow_Accumulated/'
nut = pd.read_csv(nut_dir + 'ClimTerms_2012_10.csv')
cat_area = pd.read_csv('O:/PRIV/CPHEA/PESD/COR/CORFILES/Geospatial_Library_Projects/StreamCat/NutrientInventory/Inputs/COMID_Scaled_AgVars.csv')
cat_area = cat_area[['COMID','CatAreaSqKm']]
cat_area.head()
# add VPU using lookup table
nut = pd.merge(nut, COMID_VPU, how='left', left_on=['COMID'], right_on=['COMID'])
nut = pd.merge(nut, cat_area, how='left', left_on=['COMID'], right_on=['COMID'])
nut = nut.drop('Unnamed: 0', axis=1)
# nut = nut.drop('...1', axis=1)
list(nut)

# select columns - this part we can modify to iterate through columns
farm_fert = nut[['COMID', 'N_Fert_Farm_kg_Cat_Ag_2007', 'CatAreaSqKm', 'VPU']]

farm_fert = farm_fert.set_axis(['COMID', 'CatSum', 'CatCount', 'VPU'], axis=1)
final = nut[['COMID', 'SNOW_YrMean', 'CatAreaSqKm', 'VPU']]
final = final.rename(columns={'SNOW_YrMean': 'CatSum'})
final['CatCount'] = final['CatAreaSqKm']
final['CatPctFull'] = 100
final = final.set_axis(['COMID', 'CatSum', 'CatAreaSqKm','VPU', 'CatCount', 'CatPctFull'], axis=1)

for i in VPU:
print(i)
df = farm_fert[farm_fert['VPU'] == i]
df = final[final['VPU'] == i]
df = df.drop(columns=['VPU'])
df.to_csv(nut_dir + '/ByHydroregion/FarmFert_' + str(i) + '.csv',
df.to_csv(nut_dir + '/Allocation_and_Accumulation/SNOW_YrMean_' + str(i) + '.csv',
index=False)
60 changes: 30 additions & 30 deletions StreamCat.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@


from stream_cat_config import (
LOCAL_DIR,
LYR_DIR,
MASK_DIR_RP100,
MASK_DIR_SLP10,
Expand All @@ -48,6 +47,7 @@
)
from StreamCat_functions import (
Accumulation,
AdjustCOMs,
PointInPoly,
appendConnectors,
createCatStats,
Expand All @@ -73,7 +73,7 @@
# TODO: work out children OR bastards only
makeNumpyVectors(inter_vpu, NHD_DIR)

INPUTS = np.load(ACCUM_DIR +"vpu_inputs.npy", allow_pickle=True).item()
INPUTS = np.load(ACCUM_DIR +"/vpu_inputs.npy", allow_pickle=True).item()

already_processed = []

Expand Down Expand Up @@ -113,34 +113,34 @@
end="",
flush=True,
)
for zone, hydroregion in INPUTS.items():
if not os.path.exists(f"{OUT_DIR}/{row.FullTableName}_{zone}.csv"):
print(zone, end=", ", flush=True)
pre = f"{NHD_DIR}/NHDPlus{hydroregion}/NHDPlus{zone}"
if not row.accum_type == "Point":
izd = (
f"{mask_dir}/{zone}.tif"
if mask_dir
else f"{pre}/NHDPlusCatchment/cat"
)
cat = createCatStats(
row.accum_type,
layer,
izd,
OUT_DIR,
zone,
row.by_RPU,
mask_dir,
NHD_DIR,
hydroregion,
apm,
)
if row.accum_type == "Point":
izd = f"{pre}/NHDPlusCatchment/Catchment.shp"
cat = PointInPoly(
points, zone, izd, pct_full, mask_dir, apm, summary
)
cat.to_csv(f"{OUT_DIR}/{row.FullTableName}_{zone}.csv", index=False)
# for zone, hydroregion in INPUTS.items():
# if not os.path.exists(f"{OUT_DIR}/{row.FullTableName}_{zone}.csv"):
# print(zone, end=", ", flush=True)
# pre = f"{NHD_DIR}/NHDPlus{hydroregion}/NHDPlus{zone}"
# if not row.accum_type == "Point":
# izd = (
# f"{mask_dir}/{zone}.tif"
# if mask_dir
# else f"{pre}/NHDPlusCatchment/cat"
# )
# cat = createCatStats(
# row.accum_type,
# layer,
# izd,
# OUT_DIR,
# zone,
# row.by_RPU,
# mask_dir,
# NHD_DIR,
# hydroregion,
# apm,
# )
# if row.accum_type == "Point":
# izd = f"{pre}/NHDPlusCatchment/Catchment.shp"
# cat = PointInPoly(
# points, zone, izd, pct_full, mask_dir, apm, summary
# )
# cat.to_csv(f"{OUT_DIR}/{row.FullTableName}_{zone}.csv", index=False)
print("done!")
print("Accumulating...", end="", flush=True)
for zone in INPUTS:
Expand Down
2 changes: 1 addition & 1 deletion StreamCatMetrics.csv
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ DSID,indicator_category,metric_name,AOI,year,final_table,webtool_name,metric_des
36,Anthropogenic,PctUrbOp[Year][AOI],"Cat, Ws, CatRp100, WsRp100","2001, 2004, 2006, 2008, 2011, 2013, 2016, 2019",NLCD,"Developed, Open Space Land Use Percentage","Percent of AOI classified as developed, open space land use (NLCD class 21)",Percent,E6436B49-3888-476D-8F2E-9415FBCCF850,NLCD,https://www.mrlc.gov/data,6/4/2021,https://edg.epa.gov/metadata/catalog/search/resource/details.page?uuid=E6436B49-3888-476D-8F2E-9415FBCCF850,"Specifc metadata not located, generic StreamCat metadata URL used."
36,Natural,PctWdWet[Year][AOI],"Cat, Ws, CatRp100, WsRp100","2001, 2004, 2006, 2008, 2011, 2013, 2016, 2019",NLCD,Woody Wetland Percentage 2001,Percent of AOI classified as woody wetland land cover (NLCD class 90),Percent,E6436B49-3888-476D-8F2E-9415FBCCF850,NLCD,https://www.mrlc.gov/data,6/4/2021,https://edg.epa.gov/metadata/catalog/search/resource/details.page?uuid=E6436B49-3888-476D-8F2E-9415FBCCF850,"Specifc metadata not located, generic StreamCat metadata URL used."
42,Natural,PctNonAgIntrodManagVeg[AOI],"Cat, Ws, CatRp100, WsRp100",,NonAgIntrodManagVeg,Nonnative Vegetation Landcover,"% Nonagriculture nonnative introduced or managed vegetation landcover type reclassed from LANDFIRE Existing Vegetation Type (EVT), within AOI",Percent,B3B702B9-1A95-4C4D-B296-1365FAA7E8FE,Landfire,https://www.landfire.gov/vegetation.php,1/1/2015,https://edg.epa.gov/metadata/catalog/search/resource/details.page?uuid=B3B702B9-1A95-4C4D-B296-1365FAA7E8FE,
43,Natural,prG_BMMI[Year],Other,"2008, 2009",NRSA_PredictedBioCondition,NRSA BMMI Good Biological Condition,Predicted probability that a stream segment is in good biologial condition based on a random forest model of the NRSA benthic invertebrate multimetric index (BMMI),Percent,347BAA74-DA58-4F3D-BD51-7A424CAA8EBD,Hill et al. 2017,https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5796808/,9/4/2017,https://edg.epa.gov/metadata/catalog/search/resource/details.page?uuid=347BAA74-DA58-4F3D-BD51-7A424CAA8EBD,"Specifc metadata not located, generic StreamCat metadata URL used."
43,Natural,prG_BMMI0809,Other,,NRSA_PredictedBioCondition,NRSA BMMI Good Biological Condition,Predicted probability that a stream segment is in good biologial condition based on a random forest model of the NRSA benthic invertebrate multimetric index (BMMI),Percent,347BAA74-DA58-4F3D-BD51-7A424CAA8EBD,Hill et al. 2017,https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5796808/,9/4/2017,https://edg.epa.gov/metadata/catalog/search/resource/details.page?uuid=347BAA74-DA58-4F3D-BD51-7A424CAA8EBD,"Specifc metadata not located, generic StreamCat metadata URL used."
45,Anthropogenic,Pestic97[AOI],"Cat, Ws",1997,Pesticides97,Mean Pesticide Use,Mean pesticide use (kg/km2) in yr. 1997 within AOI.,Percent,FF4DC154-0BEE-4818-8417-331705B40A12,USGS Water Mission Area,http://water.usgs.gov/GIS/metadata/usgswrd/XML/agpest97grd.xml,9/19/2013,https://edg.epa.gov/metadata/catalog/search/resource/details.page?uuid=FF4DC154-0BEE-4818-8417-331705B40A12,"Specifc metadata not located, generic StreamCat metadata URL used."
46,Natural,Precip[Year][AOI],"Cat, Ws","2008, 2009",PRISM_0809,Mean Precipitation,PRISM climate data - Mean precipitation (mm) within the AOI. Period: 2008 & 2009,millimeters,CE7978C6-6F61-4643-9BAB-695FA4478364,PRISM,http://prism.oregonstate.edu,6/8/2015,https://edg.epa.gov/metadata/catalog/search/resource/details.page?uuid=CE7978C6-6F61-4643-9BAB-695FA4478364,
46,Natural,Tmean[Year][AOI],"Cat, Ws","2008, 2009",PRISM_0809,Mean Air Temperature,PRISM climate data - Mean temperature (°C) within the AOI. Period: 2008 & 2009,kilogram/square kilometer,CE7978C6-6F61-4643-9BAB-695FA4478364,PRISM,http://prism.oregonstate.edu,6/8/2015,https://edg.epa.gov/metadata/catalog/search/resource/details.page?uuid=CE7978C6-6F61-4643-9BAB-695FA4478364,"Specifc metadata not located, generic StreamCat metadata URL used."
Expand Down
6 changes: 3 additions & 3 deletions StreamCat_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -818,12 +818,12 @@ def interVPU(tbl, cols, accum_type, zone, Connector, interVPUtbl):
if any(interVPUtbl.toCOMIDs.values > 0):
con = pd.read_csv(Connector).set_index("COMID")
con.columns = map(str, con.columns)
toVPUs = toVPUs.append(con)
toVPUs = pd.concat([toVPUs,con], axis=0, ignore_index=False)
toVPUs.to_csv(Connector)
if os.path.exists(Connector): # if Connector already exists, read it in and append
con = pd.read_csv(Connector).set_index("COMID")
con.columns = map(str, con.columns)
throughVPUs = throughVPUs.append(con)
throughVPUs = pd.concat([throughVPUs, con], axis=0, ignore_index=False)
throughVPUs.to_csv(Connector)


Expand Down Expand Up @@ -1228,7 +1228,7 @@ def appendConnectors(cat, Connector, zone, interVPUtbl):
)
]

cat = cat.append(con)
cat = pd.concat([cat, con], axis=0, ignore_index=False)
return cat.reset_index(drop=True)


Expand Down

0 comments on commit 7ba0d12

Please sign in to comment.