From cbe63103d039ed82c45f76bc3c4f0059cbb0b303 Mon Sep 17 00:00:00 2001 From: Sijia Wang Date: Thu, 7 Dec 2023 17:32:39 -0500 Subject: [PATCH] make int downcast an option --- activitysim/core/assign.py | 4 +++- activitysim/core/util.py | 10 +++++++--- .../extensions/stop_frequency_university_parking.py | 5 +++++ 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/activitysim/core/assign.py b/activitysim/core/assign.py index 57cb871e0..40864a3e6 100644 --- a/activitysim/core/assign.py +++ b/activitysim/core/assign.py @@ -424,6 +424,8 @@ def rng_lognormal(random_draws, mu, sigma, broadcast=True, scale=False): # we stored result in dict - convert to df variables = util.df_from_dict(variables, index=df.index) - util.auto_opt_pd_dtypes(variables, inplace=True) + util.auto_opt_pd_dtypes( + variables, downcast_int=False, downcast_float=False, inplace=True + ) return variables, trace_results, trace_assigned_locals diff --git a/activitysim/core/util.py b/activitysim/core/util.py index 8c20beb35..ee00b219c 100644 --- a/activitysim/core/util.py +++ b/activitysim/core/util.py @@ -316,7 +316,7 @@ def quick_loc_series(loc_list, target_series): return df.right -def assign_in_place(df, df2, downcast_float=False): +def assign_in_place(df, df2, downcast_int=False, downcast_float=False): """ update existing row values in df from df2, adding columns to df if they are not there @@ -383,11 +383,11 @@ def assign_in_place(df, df2, downcast_float=False): if pd.api.types.is_object_dtype(df[c]): df[c] = df[c].astype("category") - auto_opt_pd_dtypes(df, downcast_float, inplace=True) + auto_opt_pd_dtypes(df, downcast_int, downcast_float, inplace=True) def auto_opt_pd_dtypes( - df_: pd.DataFrame, downcast_float=False, inplace=False + df_: pd.DataFrame, downcast_int=False, downcast_float=False, inplace=False ) -> Optional[pd.DataFrame]: """ Automatically downcast Number dtypes for minimal possible, @@ -397,6 +397,8 @@ def auto_opt_pd_dtypes( ---------- df_ : pd.DataFrame assignment left-hand-side (dest) + downcast_int: bool + if True, downcast int columns if possible downcast_float: bool if True, downcast float columns if possible inplace: bool @@ -429,6 +431,8 @@ def auto_opt_pd_dtypes( continue # Handle integer types if pd.api.types.is_integer_dtype(dtype): + if not downcast_int: + continue # there is a bug in pandas to_numeric # when convert int and floats gt 16777216 # https://github.com/pandas-dev/pandas/issues/43693 diff --git a/activitysim/examples/production_semcog/extensions/stop_frequency_university_parking.py b/activitysim/examples/production_semcog/extensions/stop_frequency_university_parking.py index 7264fed35..10573ba98 100644 --- a/activitysim/examples/production_semcog/extensions/stop_frequency_university_parking.py +++ b/activitysim/examples/production_semcog/extensions/stop_frequency_university_parking.py @@ -124,6 +124,11 @@ def stop_frequency_university_parking( park_trips = park_to_campus | park_from_campus + # check if parking_name is in the purpose category + if not parking_name in trip_choosers.purpose.cat.categories: + trip_choosers.purpose = trip_choosers.purpose.cat.add_categories( + [parking_name] + ) trip_choosers.loc[park_trips, "purpose"] = parking_name trip_choosers.loc[park_trips, "destination_logsum"] = pd.NA trip_choosers.loc[park_trips, "destination"] = trip_choosers.loc[