From f153a43a4cc57b5fd976226fe044be729d2b9b6f Mon Sep 17 00:00:00 2001 From: mxndrwgrdnr Date: Sat, 4 Sep 2021 02:15:39 +0000 Subject: [PATCH 1/7] read mct_ops settings from yaml and perform ops --- .../models/large_multinomial_logit.py | 368 +++++++++++------- 1 file changed, 217 insertions(+), 151 deletions(-) diff --git a/urbansim_templates/models/large_multinomial_logit.py b/urbansim_templates/models/large_multinomial_logit.py index fab92ce..1a73f39 100644 --- a/urbansim_templates/models/large_multinomial_logit.py +++ b/urbansim_templates/models/large_multinomial_logit.py @@ -3,21 +3,21 @@ import orca from urbansim.models.util import columns_in_formula, apply_filter_query from choicemodels.tools import MergedChoiceTable +import pandas as pd from .. import modelmanager from ..utils import get_data, update_column, to_list, version_greater_or_equal from .shared import TemplateStep - def check_choicemodels_version(): try: import choicemodels assert version_greater_or_equal(choicemodels.__version__, '0.2.dev4') except: raise ImportError("LargeMultinomialLogitStep requires choicemodels 0.2.dev4 or " - "later. For installation instructions, see " - "https://github.com/udst/choicemodels.") + "later. For installation instructions, see " + "https://github.com/udst/choicemodels.") @modelmanager.template @@ -26,9 +26,9 @@ class LargeMultinomialLogitStep(TemplateStep): Class for building standard multinomial logit model steps where alternatives are interchangeable and all have the same model expression. Supports random sampling of alternatives. - + Estimation and simulation are performed using ChoiceModels. - + Parameters ---------- choosers : str or list of str, optional @@ -39,7 +39,7 @@ class LargeMultinomialLogitStep(TemplateStep): 'alternatives' parameters replace the 'tables' parameter. Both are required for fitting a model, but do not have to be provided when the object is created. Reserved column names: 'chosen'. - + alternatives : str or list of str, optional Name(s) of Orca tables containing data about alternatives. The first table is the primary one. Any additional tables need to have merge relationships ("broadcasts") @@ -48,19 +48,19 @@ class LargeMultinomialLogitStep(TemplateStep): 'alternatives' parameters replace the 'tables' parameter. Both are required for fitting a model, but do not have to be provided when the object is created. Reserved column names: 'chosen'. - + model_expression : str, optional Patsy-style right-hand-side model expression representing the utility of a single alternative. Passed to `choicemodels.MultinomialLogit()`. This parameter is required for fitting a model, but does not have to be provided when the object is created. - + choice_column : str, optional Name of the column indicating observed choices, for model estimation. The column should contain integers matching the id of the primary `alternatives` table. This parameter is required for fitting a model, but it does not have to be provided when the object is created. Not required for simulation. - + chooser_filters : str or list of str, optional Filters to apply to the chooser data before fitting the model. These are passed to `pd.DataFrame.query()`. Filters are applied after any additional tables are merged @@ -101,7 +101,7 @@ class LargeMultinomialLogitStep(TemplateStep): to match its data type. If the column is generated on the fly, it will be given the same data type as the index of the alternatives table. Replaces the `out_fname` argument in UrbanSim. - + out_chooser_filters : str or list of str, optional Filters to apply to the chooser data before simulation. If not provided, no filters will be applied. Replaces the `predict_filters` argument in UrbanSim. @@ -113,69 +113,70 @@ class LargeMultinomialLogitStep(TemplateStep): constrained_choices : bool, optional "True" means alternatives have limited capacity. "False" (default) means that alternatives can accommodate an unlimited number of choosers. - + alt_capacity : str, optional Name of a column in the out_alternatives table that expresses the capacity of alternatives. If not provided and constrained_choices is True, each alternative is interpreted as accommodating a single chooser. - + chooser_size : str, optional Name of a column in the out_choosers table that expresses the size of choosers. Choosers might have varying sizes if the alternative capacities are amounts rather than counts -- e.g. square footage. Chooser sizes must be in the same units as alternative capacities. If not provided and constrained_choices is True, each chooser has a size of 1. - + max_iter : int or None, optional Maximum number of choice simulation iterations. If None (default), the algorithm will iterate until all choosers are matched or no alternatives remain. - + name : str, optional Name of the model step, passed to ModelManager. If none is provided, a name is generated each time the `fit()` method runs. - + tags : list of str, optional Tags, passed to ModelManager. - + Attributes ---------- All parameters can also be get and set as properties. The following attributes should be treated as read-only. - + choices : pd.Series Available after the model step is run. List of chosen alternative id's, indexed with the chooser id. Does not persist when the model step is reloaded from storage. - + mergedchoicetable : choicemodels.tools.MergedChoiceTable Table built for estimation or simulation. Does not persist when the model step is reloaded from storage. Not available if choices have capacity constraints, because multiple choice tables are generated iteratively. - + model : choicemodels.MultinomialLogitResults Available after a model has been fit. Persists when reloaded from storage. - + probabilities : pd.Series Available after the model step is run -- but not if choices have capacity constraints, which requires probabilities to be calculated multiple times. Provides list of probabilities corresponding to the sampled alternatives, indexed with the chooser and alternative id's. Does not persist when the model step is reloaded from storage. - + """ - def __init__(self, choosers=None, alternatives=None, model_expression=None, - choice_column=None, chooser_filters=None, chooser_sample_size=None, - alt_filters=None, alt_sample_size=None, out_choosers=None, - out_alternatives=None, out_column=None, out_chooser_filters=None, - out_alt_filters=None, constrained_choices=False, alt_capacity=None, - chooser_size=None, max_iter=None, name=None, tags=[]): - + + def __init__(self, choosers=None, alternatives=None, model_expression=None, + choice_column=None, chooser_filters=None, chooser_sample_size=None, + alt_filters=None, alt_sample_size=None, out_choosers=None, + out_alternatives=None, out_column=None, out_chooser_filters=None, + out_alt_filters=None, constrained_choices=False, alt_capacity=None, + chooser_size=None, max_iter=None, name=None, tags=[]): + self._listeners = [] - + # Parent class can initialize the standard parameters - TemplateStep.__init__(self, tables=None, model_expression=model_expression, - filters=None, out_tables=None, out_column=out_column, out_transform=None, - out_filters=None, name=name, tags=tags) + TemplateStep.__init__(self, tables=None, model_expression=model_expression, + filters=None, out_tables=None, out_column=out_column, out_transform=None, + out_filters=None, name=name, tags=tags) # Custom parameters not in parent class self.choosers = choosers @@ -193,76 +194,72 @@ def __init__(self, choosers=None, alternatives=None, model_expression=None, self.alt_capacity = alt_capacity self.chooser_size = chooser_size self.max_iter = max_iter - + # Placeholders for model fit data, filled in by fit() or from_dict() - self.summary_table = None + self.summary_table = None self.fitted_parameters = None self.model = None - + # Placeholders for diagnostic data, filled in by fit() or run() self.mergedchoicetable = None self.probabilities = None self.choices = None - def bind_to(self, callback): self._listeners.append(callback) - - + def send_to_listeners(self, param, value): for callback in self._listeners: callback(param, value) - - + @classmethod def from_dict(cls, d): """ Create an object instance from a saved dictionary representation. - + Parameters ---------- d : dict - + Returns ------- LargeMultinomialLogitStep - + """ check_choicemodels_version() from choicemodels import MultinomialLogitResults - + # Pass values from the dictionary to the __init__() method - obj = cls(choosers=d['choosers'], alternatives=d['alternatives'], - model_expression=d['model_expression'], choice_column=d['choice_column'], - chooser_filters=d['chooser_filters'], - chooser_sample_size=d['chooser_sample_size'], - alt_filters=d['alt_filters'], alt_sample_size=d['alt_sample_size'], - out_choosers=d['out_choosers'], out_alternatives=d['out_alternatives'], - out_column=d['out_column'], out_chooser_filters=d['out_chooser_filters'], - out_alt_filters=d['out_alt_filters'], - constrained_choices=d['constrained_choices'], alt_capacity=d['alt_capacity'], - chooser_size=d['chooser_size'], max_iter=d['max_iter'], name=d['name'], - tags=d['tags']) + obj = cls(choosers=d['choosers'], alternatives=d['alternatives'], + model_expression=d['model_expression'], choice_column=d['choice_column'], + chooser_filters=d['chooser_filters'], + chooser_sample_size=d['chooser_sample_size'], + alt_filters=d['alt_filters'], alt_sample_size=d['alt_sample_size'], + out_choosers=d['out_choosers'], out_alternatives=d['out_alternatives'], + out_column=d['out_column'], out_chooser_filters=d['out_chooser_filters'], + out_alt_filters=d['out_alt_filters'], + constrained_choices=d['constrained_choices'], alt_capacity=d['alt_capacity'], + chooser_size=d['chooser_size'], max_iter=d['max_iter'], name=d['name'], + tags=d['tags']) # Load model fit data obj.summary_table = d['summary_table'] obj.fitted_parameters = d['fitted_parameters'] - + if obj.fitted_parameters is not None: - obj.model = MultinomialLogitResults(model_expression = obj.model_expression, - fitted_parameters = obj.fitted_parameters) - - return obj + obj.model = MultinomialLogitResults(model_expression=obj.model_expression, + fitted_parameters=obj.fitted_parameters) + return obj def to_dict(self): """ Create a dictionary representation of the object. - + Returns ------- dict - + """ d = { 'template': self.template, @@ -291,60 +288,66 @@ def to_dict(self): } return d - # TO DO - there has got to be a less verbose way to handle getting and setting - + @property def choosers(self): return self.__choosers + @choosers.setter def choosers(self, value): self.__choosers = self._normalize_table_param(value) self.send_to_listeners('choosers', value) - + @property def alternatives(self): return self.__alternatives + @alternatives.setter def alternatives(self, value): self.__alternatives = self._normalize_table_param(value) self.send_to_listeners('alternatives', value) - + @property def model_expression(self): return self.__model_expression + @model_expression.setter def model_expression(self, value): self.__model_expression = value self.send_to_listeners('model_expression', value) - + @property def choice_column(self): return self.__choice_column + @choice_column.setter def choice_column(self, value): self.__choice_column = value self.send_to_listeners('choice_column', value) - + @property def chooser_filters(self): return self.__chooser_filters + @chooser_filters.setter def chooser_filters(self, value): self.__chooser_filters = value self.send_to_listeners('chooser_filters', value) - + @property def chooser_sample_size(self): return self.__chooser_sample_size + @chooser_sample_size.setter def chooser_sample_size(self, value): self.__chooser_sample_size = value self.send_to_listeners('chooser_sample_size', value) - + @property def alt_filters(self): return self.__alt_filters + @alt_filters.setter def alt_filters(self, value): self.__alt_filters = value @@ -353,6 +356,7 @@ def alt_filters(self, value): @property def alt_sample_size(self): return self.__alt_sample_size + @alt_sample_size.setter def alt_sample_size(self, value): self.__alt_sample_size = value @@ -361,104 +365,112 @@ def alt_sample_size(self, value): @property def out_choosers(self): return self.__out_choosers + @out_choosers.setter def out_choosers(self, value): self.__out_choosers = self._normalize_table_param(value) self.send_to_listeners('out_choosers', value) - + @property def out_alternatives(self): return self.__out_alternatives + @out_alternatives.setter def out_alternatives(self, value): - self.__out_alternatives = self._normalize_table_param(value) + self.__out_alternatives = self._normalize_table_param(value) self.send_to_listeners('out_alternatives', value) @property def out_column(self): return self.__out_column + @out_column.setter def out_column(self, value): self.__out_column = value self.send_to_listeners('out_column', value) - + @property def out_chooser_filters(self): return self.__out_chooser_filters + @out_chooser_filters.setter def out_chooser_filters(self, value): self.__out_chooser_filters = value self.send_to_listeners('out_chooser_filters', value) - + @property def out_alt_filters(self): return self.__out_alt_filters + @out_alt_filters.setter def out_alt_filters(self, value): self.__out_alt_filters = value self.send_to_listeners('out_alt_filters', value) - + @property def constrained_choices(self): return self.__constrained_choices + @constrained_choices.setter def constrained_choices(self, value): self.__constrained_choices = value self.send_to_listeners('constrained_choices', value) - + @property def alt_capacity(self): return self.__alt_capacity + @alt_capacity.setter def alt_capacity(self, value): self.__alt_capacity = value self.send_to_listeners('alt_capacity', value) - + @property def chooser_size(self): return self.__chooser_size + @chooser_size.setter def chooser_size(self, value): self.__chooser_size = value self.send_to_listeners('chooser_size', value) - + @property def max_iter(self): return self.__max_iter + @max_iter.setter def max_iter(self, value): self.__max_iter = value self.send_to_listeners('max_iter', value) - - + def fit(self, mct=None): """ Fit the model; save and report results. This uses the ChoiceModels estimation engine (originally from UrbanSim MNL). - + The `fit()` method can be run as many times as desired. Results will not be saved with Orca or ModelManager until the `register()` method is run. - + After sampling alternatives for each chooser, the merged choice table is saved to the class object for diagnostic use (`mergedchoicetable` with type choicemodels.tools.MergedChoiceTable). - + Parameters ---------- mct : choicemodels.tools.MergedChoiceTable This parameter is a temporary backdoor allowing us to pass in a more complicated choice table than can be generated within the template, for example including sampling weights or interaction terms. - + Returns ------- None - + """ check_choicemodels_version() from choicemodels import MultinomialLogit from choicemodels.tools import MergedChoiceTable - + if (mct is not None): df_from_mct = mct.to_frame() idx_names = df_from_mct.index.names @@ -467,44 +479,43 @@ def fit(self, mct=None): df_from_mct, self.chooser_filters).set_index(idx_names) mct = MergedChoiceTable.from_df(df_from_mct) - else: - observations = get_data(tables = self.choosers, - filters = self.chooser_filters, - model_expression = self.model_expression, - extra_columns = self.choice_column) - + else: + observations = get_data(tables=self.choosers, + filters=self.chooser_filters, + model_expression=self.model_expression, + extra_columns=self.choice_column) + if (self.chooser_sample_size is not None): observations = observations.sample(self.chooser_sample_size) - - alternatives = get_data(tables = self.alternatives, - filters = self.alt_filters, - model_expression = self.model_expression) - - mct = MergedChoiceTable(observations = observations, - alternatives = alternatives, - chosen_alternatives = self.choice_column, - sample_size = self.alt_sample_size) - - model = MultinomialLogit(data = mct, - model_expression = self.model_expression) + + alternatives = get_data(tables=self.alternatives, + filters=self.alt_filters, + model_expression=self.model_expression) + + mct = MergedChoiceTable(observations=observations, + alternatives=alternatives, + chosen_alternatives=self.choice_column, + sample_size=self.alt_sample_size) + + model = MultinomialLogit(data=mct, + model_expression=self.model_expression) results = model.fit() - + self.name = self._generate_name() self.summary_table = str(results) print(self.summary_table) - + coefs = results.get_raw_results()['fit_parameters']['Coefficient'] self.fitted_parameters = coefs.tolist() self.model = results - + # Save merged choice table to the class object for diagnostics self.mergedchoicetable = mct - - + def run(self, chooser_batch_size=None, interaction_terms=None): """ Run the model step: simulate choices and use them to update an Orca column. - + The simulated choices are saved to the class object for diagnostics. If choices are unconstrained, the choice table and the probabilities of sampled alternatives are saved as well. @@ -526,90 +537,145 @@ def run(self, chooser_batch_size=None, interaction_terms=None): MultiIndex. One level's name and values should match an index or column from the observations table, and the other should match an index or column from the alternatives table. - + Returns ------- None - + """ check_choicemodels_version() from choicemodels import MultinomialLogit - from choicemodels.tools import (MergedChoiceTable, monte_carlo_choices, - iterative_lottery_choices) + from choicemodels.tools import (MergedChoiceTable, monte_carlo_choices, + iterative_lottery_choices) # Clear simulation attributes from the class object self.mergedchoicetable = None self.probabilities = None self.choices = None - + if interaction_terms is not None: - uniq_intx_idx_names = set([idx for intx in interaction_terms for idx in intx.index.names]) - obs_extra_cols = to_list(self.chooser_size) + list(uniq_intx_idx_names) - alts_extra_cols = to_list(self.alt_capacity) + list(uniq_intx_idx_names) + uniq_intx_idx_names = set( + [idx for intx in interaction_terms for idx in intx.index.names]) + obs_extra_cols = to_list(self.chooser_size) + \ + list(uniq_intx_idx_names) + alts_extra_cols = to_list( + self.alt_capacity) + list(uniq_intx_idx_names) else: obs_extra_cols = self.chooser_size alts_extra_cols = self.alt_capacity - observations = get_data(tables = self.out_choosers, - fallback_tables = self.choosers, - filters = self.out_chooser_filters, - model_expression = self.model_expression, - extra_columns = obs_extra_cols) - + observations = get_data(tables=self.out_choosers, + fallback_tables=self.choosers, + filters=self.out_chooser_filters, + model_expression=self.model_expression, + extra_columns=obs_extra_cols) + if len(observations) == 0: print("No valid choosers") return - - alternatives = get_data(tables = self.out_alternatives, - fallback_tables = self.alternatives, - filters = self.out_alt_filters, - model_expression = self.model_expression, - extra_columns = alts_extra_cols) - + + alternatives = get_data(tables=self.out_alternatives, + fallback_tables=self.alternatives, + filters=self.out_alt_filters, + model_expression=self.model_expression, + extra_columns=alts_extra_cols) + if len(alternatives) == 0: print("No valid alternatives") return - + # Remove filter columns before merging, in case column names overlap expr_cols = columns_in_formula(self.model_expression) - - obs_cols = set(observations.columns) & set(expr_cols + to_list(obs_extra_cols)) + + obs_cols = set(observations.columns) & set( + expr_cols + to_list(obs_extra_cols)) observations = observations[list(obs_cols)] - - alt_cols = set(alternatives.columns) & set(expr_cols + to_list(alts_extra_cols)) + + alt_cols = set(alternatives.columns) & set( + expr_cols + to_list(alts_extra_cols)) alternatives = alternatives[list(alt_cols)] - + # Callables for iterative choices - def mct(obs, alts): - return MergedChoiceTable( + def mct(obs, alts, mct_intx_ops=None): + + this_mct = MergedChoiceTable( obs, alts, sample_size=self.alt_sample_size, interaction_terms=interaction_terms) + if mct_intx_ops: + mct_df = this_mct.to_frame() + og_mct_index = mct_df.index.names + mct_df.reset_index(inplace=True) + mct_df.index.name = 'mct_index' + + # merges + intx_df = mct_df.copy() + for merge in mct_intx_ops['successive_merges']: + left = intx_df[merge.get('mct_cols', intx_df.columns)] + right = get_data( + merge['right_table'], + extra_columns=merge.get('right_cols', None)) + intx_df = pd.merge( + left, right, + how=merge.get('how', 'inner'), + on=merge.get('on_cols', None), + left_on=merge.get('left_on', None), + right_on=merge.get('right_on', None), + left_index=merge.get('left_index', False), + right_index=merge.get('right_index', False), + suffixes=merge.get('suffixes', ('_x', '_y'))) + + # aggs + aggs = mct_intx_ops.get('aggregations', False) + if aggs: + intx_df = intx_df.groupby('mct_index').agg(aggs) + + # rename cols + if mct_intx_ops.get('rename_cols', False): + intx_df = intx_df.rename( + columns=mct_intx_ops['rename_cols']) + + # update mct + mct_df = pd.merge(mct_df, intx_df, on='mct_index') + + # create new cols from expressions + for new_col, expr in mct_intx_ops.get('eval_ops', {}): + mct_df[new_col] = mct_df.eval(expr) + + mct_df.set_index(og_mct_index, inplace=True) + if mct_df.isna.any(): + print("Replacing Nones and NaNs with 0") + mct_df = mct_df.fillna(0) + this_mct = MergedChoiceTable.from_df(mct_df) + + return this_mct + def probs(mct): return self.model.probabilities(mct) if (self.constrained_choices == True): - choices = iterative_lottery_choices(observations, alternatives, - mct_callable=mct, probs_callable=probs, - alt_capacity=self.alt_capacity, chooser_size=self.chooser_size, - max_iter=self.max_iter, chooser_batch_size=chooser_batch_size) - + choices = iterative_lottery_choices( + observations, alternatives, + mct_callable=mct, probs_callable=probs, + alt_capacity=self.alt_capacity, chooser_size=self.chooser_size, + max_iter=self.max_iter, chooser_batch_size=chooser_batch_size) + else: choicetable = mct(observations, alternatives) probabilities = probs(choicetable) choices = monte_carlo_choices(probabilities) - + # Save data to class object if available self.mergedchoicetable = choicetable self.probabilities = probabilities - + # Save choices to class object for diagnostics self.choices = choices # Update Orca - update_column(table = self.out_choosers, - fallback_table = self.choosers, - column = self.out_column, - fallback_column = self.choice_column, - data = choices) + update_column(table=self.out_choosers, + fallback_table=self.choosers, + column=self.out_column, + fallback_column=self.choice_column, + data=choices) From ef9a5782a17d8fcd1e4678858d63ca9fc13bb4dc Mon Sep 17 00:00:00 2001 From: mxndrwgrdnr Date: Tue, 7 Sep 2021 19:48:56 +0000 Subject: [PATCH 2/7] read mct intx ops from yaml --- .../models/large_multinomial_logit.py | 153 ++++++++++++------ 1 file changed, 101 insertions(+), 52 deletions(-) diff --git a/urbansim_templates/models/large_multinomial_logit.py b/urbansim_templates/models/large_multinomial_logit.py index 1a73f39..b27c277 100644 --- a/urbansim_templates/models/large_multinomial_logit.py +++ b/urbansim_templates/models/large_multinomial_logit.py @@ -169,7 +169,7 @@ def __init__(self, choosers=None, alternatives=None, model_expression=None, alt_filters=None, alt_sample_size=None, out_choosers=None, out_alternatives=None, out_column=None, out_chooser_filters=None, out_alt_filters=None, constrained_choices=False, alt_capacity=None, - chooser_size=None, max_iter=None, name=None, tags=[]): + chooser_size=None, max_iter=None, mct_intx_ops=None, name=None, tags=[]): self._listeners = [] @@ -194,6 +194,7 @@ def __init__(self, choosers=None, alternatives=None, model_expression=None, self.alt_capacity = alt_capacity self.chooser_size = chooser_size self.max_iter = max_iter + self.mct_intx_ops = mct_intx_ops # Placeholders for model fit data, filled in by fit() or from_dict() self.summary_table = None @@ -205,6 +206,8 @@ def __init__(self, choosers=None, alternatives=None, model_expression=None, self.probabilities = None self.choices = None + + def bind_to(self, callback): self._listeners.append(callback) @@ -239,7 +242,8 @@ def from_dict(cls, d): out_column=d['out_column'], out_chooser_filters=d['out_chooser_filters'], out_alt_filters=d['out_alt_filters'], constrained_choices=d['constrained_choices'], alt_capacity=d['alt_capacity'], - chooser_size=d['chooser_size'], max_iter=d['max_iter'], name=d['name'], + chooser_size=d['chooser_size'], max_iter=d['max_iter'], + mct_intx_ops=d['mct_intx_ops'], name=d['name'], tags=d['tags']) # Load model fit data @@ -283,6 +287,7 @@ def to_dict(self): 'alt_capacity': self.alt_capacity, 'chooser_size': self.chooser_size, 'max_iter': self.max_iter, + 'mct_intx_ops': self.mct_intx_ops, 'summary_table': self.summary_table, 'fitted_parameters': self.fitted_parameters, } @@ -443,6 +448,90 @@ def max_iter(self, value): self.__max_iter = value self.send_to_listeners('max_iter', value) + @property + def mct_intx_ops(self): + return self.__mct_intx_ops + + @mct_intx_ops.setter + def mct_intx_ops(self, value): + self.__mct_intx_ops = value + self.send_to_listeners('mct_intx_ops', value) + + def perform_mct_intx_ops(self, mct, nan_handling='zero'): + """ + Method to dynamically update a MergedChoiceTable object according to + a pre-defined set of operations specified in the model .yaml config. + Operations are performed sequentially as follows: 1) Pandas merges + with other Orca tables; 2) Pandas group-by aggregations; 3) rename + existing columns; 4) create new columns via Pandas `eval()`. + + Parameters + ---------- + mct : choicemodels.tools.MergedChoiceTable + nan_handling : str + Either 'zero' or 'drop', where the former will replace all NaN's + and None's with 0 integers and the latter will drop all rows with + any NaN or Null values. + + Returns + ------- + MergedChoiceTable + """ + + intx_ops = self.mct_intx_ops + mct_df = mct.to_frame() + og_mct_index = mct_df.index.names + mct_df.reset_index(inplace=True) + mct_df.index.name = 'mct_index' + + # merges + intx_df = mct_df.copy() + for merge in intx_ops['successive_merges']: + left = intx_df[merge.get('mct_cols', intx_df.columns)] + right = get_data( + merge['right_table'], + extra_columns=merge.get('right_cols', None)) + intx_df = pd.merge( + left, right, + how=merge.get('how', 'inner'), + on=merge.get('on_cols', None), + left_on=merge.get('left_on', None), + right_on=merge.get('right_on', None), + left_index=merge.get('left_index', False), + right_index=merge.get('right_index', False), + suffixes=merge.get('suffixes', ('_x', '_y'))) + + # aggs + aggs = intx_ops.get('aggregations', False) + if aggs: + intx_df = intx_df.groupby('mct_index').agg(aggs) + + # rename cols + if intx_ops.get('rename_cols', False): + intx_df = intx_df.rename( + columns=intx_ops['rename_cols']) + + # update mct + mct_df = pd.merge(mct_df, intx_df, on='mct_index') + + # create new cols from expressions + for new_col, expr in intx_ops.get('eval_ops', {}): + mct_df[new_col] = mct_df.eval(expr) + + # restore original mct index + mct_df.set_index(og_mct_index, inplace=True) + + # handle NaNs and Nones + if mct_df.isna.any(): + if nan_handling == 'zero': + print("Replacing MCT None's and NaN's with 0") + mct_df = mct_df.fillna(0) + elif nan_handling == 'drop': + print("Dropping rows with None's/NaN's from MCT") + mct_df = mct_df.dropna(axis=0) + + return MergedChoiceTable.from_df(mct_df) + def fit(self, mct=None): """ Fit the model; save and report results. This uses the ChoiceModels estimation @@ -597,72 +686,32 @@ def run(self, chooser_batch_size=None, interaction_terms=None): alternatives = alternatives[list(alt_cols)] # Callables for iterative choices - def mct(obs, alts, mct_intx_ops=None): + def mct(obs, alts, intx_ops=None): this_mct = MergedChoiceTable( obs, alts, sample_size=self.alt_sample_size, interaction_terms=interaction_terms) - if mct_intx_ops: - mct_df = this_mct.to_frame() - og_mct_index = mct_df.index.names - mct_df.reset_index(inplace=True) - mct_df.index.name = 'mct_index' - - # merges - intx_df = mct_df.copy() - for merge in mct_intx_ops['successive_merges']: - left = intx_df[merge.get('mct_cols', intx_df.columns)] - right = get_data( - merge['right_table'], - extra_columns=merge.get('right_cols', None)) - intx_df = pd.merge( - left, right, - how=merge.get('how', 'inner'), - on=merge.get('on_cols', None), - left_on=merge.get('left_on', None), - right_on=merge.get('right_on', None), - left_index=merge.get('left_index', False), - right_index=merge.get('right_index', False), - suffixes=merge.get('suffixes', ('_x', '_y'))) - - # aggs - aggs = mct_intx_ops.get('aggregations', False) - if aggs: - intx_df = intx_df.groupby('mct_index').agg(aggs) - - # rename cols - if mct_intx_ops.get('rename_cols', False): - intx_df = intx_df.rename( - columns=mct_intx_ops['rename_cols']) - - # update mct - mct_df = pd.merge(mct_df, intx_df, on='mct_index') - - # create new cols from expressions - for new_col, expr in mct_intx_ops.get('eval_ops', {}): - mct_df[new_col] = mct_df.eval(expr) - - mct_df.set_index(og_mct_index, inplace=True) - if mct_df.isna.any(): - print("Replacing Nones and NaNs with 0") - mct_df = mct_df.fillna(0) - this_mct = MergedChoiceTable.from_df(mct_df) + if intx_ops: + this_mct = self.perform_mct_intx_ops(this_mct) return this_mct def probs(mct): return self.model.probabilities(mct) - if (self.constrained_choices == True): + if self.constrained_choices is True: choices = iterative_lottery_choices( observations, alternatives, - mct_callable=mct, probs_callable=probs, + mct_callable=mct, + probs_callable=probs, alt_capacity=self.alt_capacity, chooser_size=self.chooser_size, - max_iter=self.max_iter, chooser_batch_size=chooser_batch_size) + max_iter=self.max_iter, chooser_batch_size=chooser_batch_size, + mct_intx_ops=self.mct_intx_ops) else: - choicetable = mct(observations, alternatives) + choicetable = mct( + observations, alternatives, intx_ops=self.mct_intx_ops) probabilities = probs(choicetable) choices = monte_carlo_choices(probabilities) From 36fd6801ea6f6b729fcb611d15a56a0e5bf2702e Mon Sep 17 00:00:00 2001 From: mxndrwgrdnr Date: Wed, 8 Sep 2021 06:00:22 +0000 Subject: [PATCH 3/7] mct intx ops fixes --- .../models/large_multinomial_logit.py | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/urbansim_templates/models/large_multinomial_logit.py b/urbansim_templates/models/large_multinomial_logit.py index b27c277..5dbea12 100644 --- a/urbansim_templates/models/large_multinomial_logit.py +++ b/urbansim_templates/models/large_multinomial_logit.py @@ -243,7 +243,7 @@ def from_dict(cls, d): out_alt_filters=d['out_alt_filters'], constrained_choices=d['constrained_choices'], alt_capacity=d['alt_capacity'], chooser_size=d['chooser_size'], max_iter=d['max_iter'], - mct_intx_ops=d['mct_intx_ops'], name=d['name'], + mct_intx_ops=d.get('mct_intx_ops', None), name=d['name'], tags=d['tags']) # Load model fit data @@ -486,20 +486,20 @@ def perform_mct_intx_ops(self, mct, nan_handling='zero'): # merges intx_df = mct_df.copy() - for merge in intx_ops['successive_merges']: - left = intx_df[merge.get('mct_cols', intx_df.columns)] + for merge, merge_args in intx_ops.get('successive_merges', {}).items(): + left = intx_df[merge_args.get('mct_cols', intx_df.columns)] right = get_data( - merge['right_table'], - extra_columns=merge.get('right_cols', None)) + merge_args['right_table'], + extra_columns=merge_args.get('right_cols', None)) intx_df = pd.merge( left, right, - how=merge.get('how', 'inner'), - on=merge.get('on_cols', None), - left_on=merge.get('left_on', None), - right_on=merge.get('right_on', None), - left_index=merge.get('left_index', False), - right_index=merge.get('right_index', False), - suffixes=merge.get('suffixes', ('_x', '_y'))) + how=merge_args.get('how', 'inner'), + on=merge_args.get('on_cols', None), + left_on=merge_args.get('left_on', None), + right_on=merge_args.get('right_on', None), + left_index=merge_args.get('left_index', False), + right_index=merge_args.get('right_index', False), + suffixes=merge_args.get('suffixes', ('_x', '_y'))) # aggs aggs = intx_ops.get('aggregations', False) @@ -515,7 +515,7 @@ def perform_mct_intx_ops(self, mct, nan_handling='zero'): mct_df = pd.merge(mct_df, intx_df, on='mct_index') # create new cols from expressions - for new_col, expr in intx_ops.get('eval_ops', {}): + for new_col, expr in intx_ops.get('eval_ops', {}).items(): mct_df[new_col] = mct_df.eval(expr) # restore original mct index From cbba22fb16f174d13c9d68de2b6005fc3474b7e7 Mon Sep 17 00:00:00 2001 From: mxndrwgrdnr Date: Thu, 9 Sep 2021 06:17:01 +0000 Subject: [PATCH 4/7] estimation works --- .../models/large_multinomial_logit.py | 32 ++++++++++++++++--- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/urbansim_templates/models/large_multinomial_logit.py b/urbansim_templates/models/large_multinomial_logit.py index 5dbea12..4d3f734 100644 --- a/urbansim_templates/models/large_multinomial_logit.py +++ b/urbansim_templates/models/large_multinomial_logit.py @@ -487,17 +487,37 @@ def perform_mct_intx_ops(self, mct, nan_handling='zero'): # merges intx_df = mct_df.copy() for merge, merge_args in intx_ops.get('successive_merges', {}).items(): - left = intx_df[merge_args.get('mct_cols', intx_df.columns)] + + # make sure mct index is preserved during merge + left_cols = merge_args.get('mct_cols', intx_df.columns) + left_idx = merge_args.get('left_index', False) + + if intx_df.index.name == mct_df.index.name: + if not left_idx: + intx_df.reset_index(inplace=True) + if mct_df.index.name not in left_cols: + left_cols += [mct_df.index.name] + elif mct_df.index.name in intx_df.columns: + if mct_df.index.name not in left_cols: + left_cols += [mct_df.index.name] + else: + raise KeyError( + 'Column {0} must be preserved in intx ops!'.format( + mct_df.index.name)) + + left = intx_df[left_cols] + right = get_data( merge_args['right_table'], extra_columns=merge_args.get('right_cols', None)) + intx_df = pd.merge( left, right, how=merge_args.get('how', 'inner'), on=merge_args.get('on_cols', None), left_on=merge_args.get('left_on', None), right_on=merge_args.get('right_on', None), - left_index=merge_args.get('left_index', False), + left_index=left_idx, right_index=merge_args.get('right_index', False), suffixes=merge_args.get('suffixes', ('_x', '_y'))) @@ -515,14 +535,16 @@ def perform_mct_intx_ops(self, mct, nan_handling='zero'): mct_df = pd.merge(mct_df, intx_df, on='mct_index') # create new cols from expressions - for new_col, expr in intx_ops.get('eval_ops', {}).items(): - mct_df[new_col] = mct_df.eval(expr) + for new_col, eval_attrs in intx_ops.get('eval_ops', {}).items(): + expr = eval_attrs['expr'] + engine = eval_attrs.get('engine', 'numexpr') + mct_df[new_col] = mct_df.eval(expr, engine=engine) # restore original mct index mct_df.set_index(og_mct_index, inplace=True) # handle NaNs and Nones - if mct_df.isna.any(): + if mct_df.isna().values.any(): if nan_handling == 'zero': print("Replacing MCT None's and NaN's with 0") mct_df = mct_df.fillna(0) From d196aea9aca1f28853fc6547db1242a26f50d875 Mon Sep 17 00:00:00 2001 From: mxndrwgrdnr Date: Thu, 9 Sep 2021 17:54:58 +0000 Subject: [PATCH 5/7] change mct intx ops dict schema --- urbansim_templates/models/large_multinomial_logit.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/urbansim_templates/models/large_multinomial_logit.py b/urbansim_templates/models/large_multinomial_logit.py index 4d3f734..d87a184 100644 --- a/urbansim_templates/models/large_multinomial_logit.py +++ b/urbansim_templates/models/large_multinomial_logit.py @@ -486,7 +486,7 @@ def perform_mct_intx_ops(self, mct, nan_handling='zero'): # merges intx_df = mct_df.copy() - for merge, merge_args in intx_ops.get('successive_merges', {}).items(): + for merge_args in intx_ops.get('successive_merges', []): # make sure mct index is preserved during merge left_cols = merge_args.get('mct_cols', intx_df.columns) @@ -535,9 +535,10 @@ def perform_mct_intx_ops(self, mct, nan_handling='zero'): mct_df = pd.merge(mct_df, intx_df, on='mct_index') # create new cols from expressions - for new_col, eval_attrs in intx_ops.get('eval_ops', {}).items(): - expr = eval_attrs['expr'] - engine = eval_attrs.get('engine', 'numexpr') + for eval_op in intx_ops.get('sequential_eval_ops', []): + new_col = eval_op['name'] + expr = eval_op['expr'] + engine = eval_op.get('engine', 'numexpr') mct_df[new_col] = mct_df.eval(expr, engine=engine) # restore original mct index From 300fa9d9c9a581825dd6e5e8daa94c589e77832f Mon Sep 17 00:00:00 2001 From: mxndrwgrdnr Date: Mon, 13 Sep 2021 00:03:32 +0000 Subject: [PATCH 6/7] update mct_intx_ops schema to include extra alts/obs cols specifciation --- .../models/large_multinomial_logit.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/urbansim_templates/models/large_multinomial_logit.py b/urbansim_templates/models/large_multinomial_logit.py index d87a184..3eba2c7 100644 --- a/urbansim_templates/models/large_multinomial_logit.py +++ b/urbansim_templates/models/large_multinomial_logit.py @@ -666,16 +666,25 @@ def run(self, chooser_batch_size=None, interaction_terms=None): self.choices = None if interaction_terms is not None: - uniq_intx_idx_names = set( - [idx for intx in interaction_terms for idx in intx.index.names]) + uniq_intx_idx_names = set([ + idx for intx in interaction_terms for idx in intx.index.names]) obs_extra_cols = to_list(self.chooser_size) + \ list(uniq_intx_idx_names) alts_extra_cols = to_list( self.alt_capacity) + list(uniq_intx_idx_names) else: - obs_extra_cols = self.chooser_size - alts_extra_cols = self.alt_capacity + obs_extra_cols = to_list(self.chooser_size) + alts_extra_cols = to_list(self.alt_capacity) + + # get any necessary extra columns from the mct intx operations spec + if self.mct_intx_ops: + intx_extra_obs_cols = self.mct_intx_ops.get('extra_obs_cols', []) + intx_extra_obs_cols = to_list(intx_extra_obs_cols) + obs_extra_cols += intx_extra_obs_cols + intx_extra_alts_cols = self.mct_intx_ops.get('extra_alts_cols', []) + intx_extra_alts_cols = to_list(intx_extra_alts_cols) + alts_extra_cols += intx_extra_alts_cols observations = get_data(tables=self.out_choosers, fallback_tables=self.choosers, @@ -717,6 +726,7 @@ def mct(obs, alts, intx_ops=None): if intx_ops: this_mct = self.perform_mct_intx_ops(this_mct) + this_mct.sample_size = self.alt_sample_size return this_mct From 6adbe99a0450b7bed0a7de4dc130d2683422f8f8 Mon Sep 17 00:00:00 2001 From: jdcaicedo251 Date: Thu, 29 Jun 2023 09:20:01 -0700 Subject: [PATCH 7/7] fix df column list data type --- .../models/large_multinomial_logit.py | 101 +++++++++--------- 1 file changed, 52 insertions(+), 49 deletions(-) diff --git a/urbansim_templates/models/large_multinomial_logit.py b/urbansim_templates/models/large_multinomial_logit.py index 3eba2c7..58b0a98 100644 --- a/urbansim_templates/models/large_multinomial_logit.py +++ b/urbansim_templates/models/large_multinomial_logit.py @@ -23,8 +23,8 @@ def check_choicemodels_version(): @modelmanager.template class LargeMultinomialLogitStep(TemplateStep): """ - Class for building standard multinomial logit model steps where alternatives are - interchangeable and all have the same model expression. Supports random sampling of + Class for building standard multinomial logit model steps where alternatives are + interchangeable and all have the same model expression. Supports random sampling of alternatives. Estimation and simulation are performed using ChoiceModels. @@ -36,7 +36,7 @@ class LargeMultinomialLogitStep(TemplateStep): primary one. Any additional tables need to have merge relationships ("broadcasts") specified so that they can be merged unambiguously onto the first table. The index of the primary table should be a unique ID. In this template, the 'choosers' and - 'alternatives' parameters replace the 'tables' parameter. Both are required for + 'alternatives' parameters replace the 'tables' parameter. Both are required for fitting a model, but do not have to be provided when the object is created. Reserved column names: 'chosen'. @@ -45,7 +45,7 @@ class LargeMultinomialLogitStep(TemplateStep): primary one. Any additional tables need to have merge relationships ("broadcasts") specified so that they can be merged unambiguously onto the first table. The index of the primary table should be a unique ID. In this template, the 'choosers' and - 'alternatives' parameters replace the 'tables' parameter. Both are required for + 'alternatives' parameters replace the 'tables' parameter. Both are required for fitting a model, but do not have to be provided when the object is created. Reserved column names: 'chosen'. @@ -62,66 +62,66 @@ class LargeMultinomialLogitStep(TemplateStep): when the object is created. Not required for simulation. chooser_filters : str or list of str, optional - Filters to apply to the chooser data before fitting the model. These are passed to - `pd.DataFrame.query()`. Filters are applied after any additional tables are merged + Filters to apply to the chooser data before fitting the model. These are passed to + `pd.DataFrame.query()`. Filters are applied after any additional tables are merged onto the primary one. Replaces the `fit_filters` argument in UrbanSim. chooser_sample_size : int, optional - Number of choosers to sample, for faster model fitting. Sampling is random and may + Number of choosers to sample, for faster model fitting. Sampling is random and may vary between model runs. alt_filters : str or list of str, optional - Filters to apply to the alternatives data before fitting the model. These are - passed to `pd.DataFrame.query()`. Filters are applied after any additional tables + Filters to apply to the alternatives data before fitting the model. These are + passed to `pd.DataFrame.query()`. Filters are applied after any additional tables are merged onto the primary one. Replaces the `fit_filters` argument in UrbanSim. Choosers whose chosen alternative is removed by these filters will not be included in the model estimation. alt_sample_size : int, optional - Numer of alternatives to sample for each choice scenario. For now, only random + Numer of alternatives to sample for each choice scenario. For now, only random sampling is supported. If this parameter is not provided, we will use a sample size of one less than the total number of alternatives. (ChoiceModels codebase currently requires sampling.) The same sample size is used for estimation and prediction. out_choosers : str or list of str, optional - Name(s) of Orca tables to draw choice scenario data from, for simulation. If not - provided, the `choosers` parameter will be used. Same guidance applies. Reserved + Name(s) of Orca tables to draw choice scenario data from, for simulation. If not + provided, the `choosers` parameter will be used. Same guidance applies. Reserved column names: 'chosen', 'join_index', 'observation_id'. out_alternatives : str or list of str, optional - Name(s) of Orca tables containing data about alternatives, for simulation. If not - provided, the `alternatives` parameter will be used. Same guidance applies. + Name(s) of Orca tables containing data about alternatives, for simulation. If not + provided, the `alternatives` parameter will be used. Same guidance applies. Reserved column names: 'chosen', 'join_index', 'observation_id'. out_column : str, optional Name of the column to write simulated choices to. If it does not already exist - in the primary `out_choosers` table, it will be created. If not provided, the - `choice_column` will be used. If the column already exists, choices will be cast - to match its data type. If the column is generated on the fly, it will be given - the same data type as the index of the alternatives table. Replaces the - `out_fname` argument in UrbanSim. + in the primary `out_choosers` table, it will be created. If not provided, the + `choice_column` will be used. If the column already exists, choices will be cast + to match its data type. If the column is generated on the fly, it will be given + the same data type as the index of the alternatives table. Replaces the + `out_fname` argument in UrbanSim. out_chooser_filters : str or list of str, optional - Filters to apply to the chooser data before simulation. If not provided, no + Filters to apply to the chooser data before simulation. If not provided, no filters will be applied. Replaces the `predict_filters` argument in UrbanSim. out_alt_filters : str or list of str, optional - Filters to apply to the alternatives data before simulation. If not provided, no + Filters to apply to the alternatives data before simulation. If not provided, no filters will be applied. Replaces the `predict_filters` argument in UrbanSim. constrained_choices : bool, optional - "True" means alternatives have limited capacity. "False" (default) means that - alternatives can accommodate an unlimited number of choosers. + "True" means alternatives have limited capacity. "False" (default) means that + alternatives can accommodate an unlimited number of choosers. alt_capacity : str, optional Name of a column in the out_alternatives table that expresses the capacity of alternatives. If not provided and constrained_choices is True, each alternative - is interpreted as accommodating a single chooser. + is interpreted as accommodating a single chooser. chooser_size : str, optional Name of a column in the out_choosers table that expresses the size of choosers. - Choosers might have varying sizes if the alternative capacities are amounts + Choosers might have varying sizes if the alternative capacities are amounts rather than counts -- e.g. square footage. Chooser sizes must be in the same units as alternative capacities. If not provided and constrained_choices is True, each chooser has a size of 1. @@ -143,12 +143,12 @@ class LargeMultinomialLogitStep(TemplateStep): be treated as read-only. choices : pd.Series - Available after the model step is run. List of chosen alternative id's, indexed - with the chooser id. Does not persist when the model step is reloaded from + Available after the model step is run. List of chosen alternative id's, indexed + with the chooser id. Does not persist when the model step is reloaded from storage. mergedchoicetable : choicemodels.tools.MergedChoiceTable - Table built for estimation or simulation. Does not persist when the model step is + Table built for estimation or simulation. Does not persist when the model step is reloaded from storage. Not available if choices have capacity constraints, because multiple choice tables are generated iteratively. @@ -156,10 +156,10 @@ class LargeMultinomialLogitStep(TemplateStep): Available after a model has been fit. Persists when reloaded from storage. probabilities : pd.Series - Available after the model step is run -- but not if choices have capacity - constraints, which requires probabilities to be calculated multiple times. - Provides list of probabilities corresponding to the sampled alternatives, indexed - with the chooser and alternative id's. Does not persist when the model step is + Available after the model step is run -- but not if choices have capacity + constraints, which requires probabilities to be calculated multiple times. + Provides list of probabilities corresponding to the sampled alternatives, indexed + with the chooser and alternative id's. Does not persist when the model step is reloaded from storage. """ @@ -489,7 +489,7 @@ def perform_mct_intx_ops(self, mct, nan_handling='zero'): for merge_args in intx_ops.get('successive_merges', []): # make sure mct index is preserved during merge - left_cols = merge_args.get('mct_cols', intx_df.columns) + left_cols = merge_args.get('mct_cols', intx_df.columns.tolist()) left_idx = merge_args.get('left_index', False) if intx_df.index.name == mct_df.index.name: @@ -532,7 +532,10 @@ def perform_mct_intx_ops(self, mct, nan_handling='zero'): columns=intx_ops['rename_cols']) # update mct - mct_df = pd.merge(mct_df, intx_df, on='mct_index') + mct_df = pd.merge(mct_df, intx_df, on='mct_index', suffixes=('', '_y')) + + # Drop Duplicated Colums if any + mct_df.drop(mct_df.filter(regex='_y$').columns.tolist(),axis=1, inplace=True) # create new cols from expressions for eval_op in intx_ops.get('sequential_eval_ops', []): @@ -557,22 +560,22 @@ def perform_mct_intx_ops(self, mct, nan_handling='zero'): def fit(self, mct=None): """ - Fit the model; save and report results. This uses the ChoiceModels estimation + Fit the model; save and report results. This uses the ChoiceModels estimation engine (originally from UrbanSim MNL). The `fit()` method can be run as many times as desired. Results will not be saved with Orca or ModelManager until the `register()` method is run. - After sampling alternatives for each chooser, the merged choice table is saved to + After sampling alternatives for each chooser, the merged choice table is saved to the class object for diagnostic use (`mergedchoicetable` with type choicemodels.tools.MergedChoiceTable). Parameters ---------- mct : choicemodels.tools.MergedChoiceTable - This parameter is a temporary backdoor allowing us to pass in a more - complicated choice table than can be generated within the template, for - example including sampling weights or interaction terms. + This parameter is a temporary backdoor allowing us to pass in a more + complicated choice table than can be generated within the template, for + example including sampling weights or interaction terms. Returns ------- @@ -628,27 +631,27 @@ def run(self, chooser_batch_size=None, interaction_terms=None): """ Run the model step: simulate choices and use them to update an Orca column. - The simulated choices are saved to the class object for diagnostics. If choices - are unconstrained, the choice table and the probabilities of sampled alternatives + The simulated choices are saved to the class object for diagnostics. If choices + are unconstrained, the choice table and the probabilities of sampled alternatives are saved as well. Parameters ---------- chooser_batch_size : int - This parameter gets passed to + This parameter gets passed to choicemodels.tools.simulation.iterative_lottery_choices and is a temporary workaround for dealing with memory issues that arise from generating massive merged choice tables for simulations that involve large numbers of choosers, large numbers of alternatives, and large numbers of predictors. It allows the - user to specify a batch size for simulating choices one chunk at a time. + user to specify a batch size for simulating choices one chunk at a time. interaction_terms : pandas.Series, pandas.DataFrame, or list of either, optional - Additional column(s) of interaction terms whose values depend on the - combination of observation and alternative, to be merged onto the final data - table. If passed as a Series or DataFrame, it should include a two-level - MultiIndex. One level's name and values should match an index or column from - the observations table, and the other should match an index or column from the - alternatives table. + Additional column(s) of interaction terms whose values depend on the + combination of observation and alternative, to be merged onto the final data + table. If passed as a Series or DataFrame, it should include a two-level + MultiIndex. One level's name and values should match an index or column from + the observations table, and the other should match an index or column from the + alternatives table. Returns -------