diff --git a/urbansim/models/dcm.py b/urbansim/models/dcm.py index 50c63371..6cce4f76 100644 --- a/urbansim/models/dcm.py +++ b/urbansim/models/dcm.py @@ -234,13 +234,15 @@ class MNLDiscreteChoiceModel(DiscreteChoiceModel): Whether (and how much) to sample alternatives during prediction. Note that this can lead to multiple choosers picking the same alternative. - choice_column : optional + choice_column : str, optional Name of the column in the `alternatives` table that choosers should choose. e.g. the 'building_id' column. If not provided the alternatives index is used. - name : optional + name : str, optional Optional descriptive name for this model that may be used in output. + normalize : bool, optional default False + subtract the mean and divide by the standard deviation before fitting the Coefficients """ def __init__( @@ -251,7 +253,8 @@ def __init__( interaction_predict_filters=None, estimation_sample_size=None, prediction_sample_size=None, - choice_column=None, name=None): + choice_column=None, name=None, + normalize=False): self._check_prob_choice_mode_compat(probability_mode, choice_mode) self._check_prob_mode_interaction_compat( probability_mode, interaction_predict_filters) @@ -270,6 +273,7 @@ def __init__( self.choice_column = choice_column self.name = name if name is not None else 'MNLDiscreteChoiceModel' self.sim_pdf = None + self.normalize = normalize self.log_likelihoods = None self.fit_parameters = None @@ -308,7 +312,8 @@ def from_yaml(cls, yaml_str=None, str_or_buffer=None): estimation_sample_size=cfg.get('estimation_sample_size', None), prediction_sample_size=cfg.get('prediction_sample_size', None), choice_column=cfg.get('choice_column', None), - name=cfg.get('name', None) + name=cfg.get('name', None), + normalize=cfg.get('normalize', False), ) if cfg.get('log_likelihoods', None): @@ -420,7 +425,7 @@ def fit(self, choosers, alternatives, current_choice): 'the input columns.') self.log_likelihoods, self.fit_parameters = mnl.mnl_estimate( - model_design.as_matrix(), chosen, self.sample_size) + model_design.as_matrix(), chosen, self.sample_size, self.normalize) self.fit_parameters.index = model_design.columns logger.debug('finish: fit LCM model {}'.format(self.name)) @@ -534,10 +539,18 @@ def probabilities(self, choosers, alternatives, filter_tables=True): coeffs = [self.fit_parameters['Coefficient'][x] for x in model_design.columns] - normalization_mean = [self.fit_parameters['Normalization Mean'][x] - for x in model_design.columns] - normalization_std = [self.fit_parameters['Normalization Std'][x] - for x in model_design.columns] + normalization_mean = self.fit_parameters['Normalization Mean'] + if normalization_mean: + normalization_mean = [normalization_mean[x] + for x in model_design.columns] + else: + normalization_mean = 0.0 + normalization_std = self.fit_parameters['Normalization Std'] + if normalization_std: + normalization_std = [normalization_std[x] + for x in model_design.columns] + else: + normalization_std = 1.0 # probabilities are returned from mnl_simulate as a 2d array # with choosers along rows and alternatives along columns @@ -549,9 +562,9 @@ def probabilities(self, choosers, alternatives, filter_tables=True): probabilities = mnl.mnl_simulate( model_design.as_matrix(), coeffs, + numalts, normalization_mean, normalization_std, - numalts=numalts, returnprobs=True) # want to turn probabilities into a Series with a MultiIndex @@ -688,7 +701,8 @@ def to_dict(self): 'fitted': self.fitted, 'log_likelihoods': self.log_likelihoods, 'fit_parameters': (yamlio.frame_to_yaml_safe(self.fit_parameters) - if self.fitted else None) + if self.fitted else None), + 'normalize': self.normalize, } def to_yaml(self, str_or_buffer=None): diff --git a/urbansim/urbanchoice/mnl.py b/urbansim/urbanchoice/mnl.py index 1a43f21f..f32d4d4d 100644 --- a/urbansim/urbanchoice/mnl.py +++ b/urbansim/urbanchoice/mnl.py @@ -118,7 +118,7 @@ def mnl_loglik(beta, data, chosen, numalts, weights=None, lcgrad=False, return -1 * loglik, -1 * gradarr -def mnl_simulate(data, coeff, normalization_mean, normalization_std, numalts, GPU=False, returnprobs=True): +def mnl_simulate(data, coeff, numalts, normalization_mean=0.0, normalization_std=1.0, GPU=False, returnprobs=True): """ Get the probabilities for each chooser choosing between `numalts` alternatives. @@ -131,12 +131,12 @@ def mnl_simulate(data, coeff, normalization_mean, normalization_std, numalts, GP choosers. Alternatives must be in the same order for each chooser. coeff : 1D array The model coefficients corresponding to each column in `data`. - normalization_mean : 1D array - The model normalization constant corresponding to each column in `data`. - normalization_std : 1D array - The model normalization factor corresponding to each column in `data`. numalts : int The number of alternatives available to each chooser. + normalization_mean : 1D array, optional + The model normalization constant corresponding to each column in `data`. + normalization_std : 1D array, optional + The model normalization factor corresponding to each column in `data`. GPU : bool, optional returnprobs : bool, optional If True, return the probabilities for each chooser/alternative instead @@ -178,7 +178,7 @@ def mnl_simulate(data, coeff, normalization_mean, normalization_std, numalts, GP def mnl_estimate(data, chosen, numalts, GPU=False, coeffrange=(-3, 3), - weights=None, lcgrad=False, beta=None): + weights=None, lcgrad=False, beta=None, normalize=False): """ Calculate coefficients of the MNL model. @@ -202,6 +202,8 @@ def mnl_estimate(data, chosen, numalts, GPU=False, coeffrange=(-3, 3), lcgrad : bool, optional beta : 1D array, optional Any initial guess for the coefficients. + normalize : bool, optional default False + subtract the mean and divide by the standard deviation before fitting the Coefficients Returns ------- @@ -226,10 +228,11 @@ def mnl_estimate(data, chosen, numalts, GPU=False, coeffrange=(-3, 3), numvars = data.shape[1] numobs = data.shape[0] // numalts - normalization_mean = data.mean(0) - normalization_std = data.std(0, ddof=1) + if normalize: + normalization_mean = data.mean(0) + normalization_std = data.std(0, ddof=1) - data = (data.copy() - normalization_mean) / normalization_std + data = (data.copy() - normalization_mean) / normalization_std if chosen is None: chosen = np.ones((numobs, numalts)) # used for latent classes @@ -270,11 +273,13 @@ def mnl_estimate(data, chosen, numalts, GPU=False, coeffrange=(-3, 3), } fit_parameters = pd.DataFrame({ - 'Normalization Mean': normalization_mean, - 'Normalization Std': normalization_std, 'Coefficient': beta, 'Std. Error': stderr, 'T-Score': beta / stderr}) + if normalize: + fit_parameters['Normalization Mean'] = normalization_mean + fit_parameters['Normalization Std'] = normalization_std + logger.debug('finish: MNL fit') return log_likelihood, fit_parameters