Skip to content

Commit

Permalink
Merge pull request #67 from UDST/mnl_w_mct_df
Browse files Browse the repository at this point in the history
New MergedChoiceTable feature: `from_df()` construction
  • Loading branch information
mxndrwgrdnr authored Apr 14, 2020
2 parents 54c936d + f1ec684 commit b173353
Show file tree
Hide file tree
Showing 8 changed files with 134 additions and 47 deletions.
11 changes: 3 additions & 8 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,13 @@ python:
- "2.7"
- "3.5"
- "3.6"

matrix:
include:
- python: "3.7" # temp solution until travis supports python 3.7 more cleanly
dist: xenial
sudo: true
- "3.7"
- "3.8"

install:
- pip install .
- pip install -r requirements-dev.txt
- # extra tests run if urbansim is present, but it can't install with python 3.7
- if [ "$TRAVIS_PYTHON_VERSION" != "3.7" ]; then pip install urbansim; fi
- pip install orca urbansim # extra tests run if urbansim is present
- pip list
- pip show choicemodels

Expand Down
9 changes: 7 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
# ChoiceModels change log
### 0.2.2dev0 (2019-04-23)

- adds a function `choicemodels.tools.parallel_lottery_choices()` to run iterative lottery choice batches in parallel rather than seqeuentially.
### 0.2.2.dev1 (2020-04-14)

- adds a `MergedChoiceTable.from_df()` as an alternative constructor

### 0.2.2.dev0 (2019-04-23)

- adds a function `choicemodels.tools.parallel_lottery_choices()` to run iterative lottery choice batches in parallel rather than sequentially

### 0.2.1 (2019-01-30)

Expand Down
2 changes: 1 addition & 1 deletion choicemodels/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

from .mnl import MultinomialLogit, MultinomialLogitResults

version = __version__ = '0.2.2dev0'
version = __version__ = '0.2.2.dev1'
80 changes: 58 additions & 22 deletions choicemodels/tools/mergedchoicetable.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,12 +114,6 @@ def __init__(self, observations, alternatives, chosen_alternatives=None,
raise ValueError("Cannot sample without replacement with sample_size {} "
"and n_alts {}".format(sample_size, alternatives.shape[0]))

if (observations.index.name == None):
observations.index.name = 'obs_id'

if (alternatives.index.name == None):
alternatives.index.name = 'alt_id'

# TO DO - check that dfs have unique indexes
# TO DO - check that chosen_alternatives correspond correctly to other dfs
# TO DO - same with weights (could join onto other tables and then split off)
Expand All @@ -130,14 +124,25 @@ def __init__(self, observations, alternatives, chosen_alternatives=None,
observations = observations.drop(chosen_alternatives.name, axis='columns')
chosen_alternatives.name = '_' + alternatives.index.name # avoids conflicts

# Check for duplicate column names
obs_cols = list(observations.columns) + list(observations.index.names)
alt_cols = list(alternatives.columns) + list(alternatives.index.names)
dupes = set(obs_cols) & set(alt_cols)
# Allow missing obs and alts, to support .from_df() constructor
if (observations is not None):

# Provide default names for observation and alternatives id's

if (observations.index.name == None):
observations.index.name = 'obs_id'

if (alternatives.index.name == None):
alternatives.index.name = 'alt_id'

# Check for duplicate column names
obs_cols = list(observations.columns) + list(observations.index.names)
alt_cols = list(alternatives.columns) + list(alternatives.index.names)
dupes = set(obs_cols) & set(alt_cols)

if len(dupes) > 0:
raise ValueError("Both input tables contain column {}. Please ensure "
"column names are unique before merging".format(dupes))
if len(dupes) > 0:
raise ValueError("Both input tables contain column {}. Please ensure "
"column names are unique before merging".format(dupes))

# Normalize weights to a pd.Series
if (weights is not None) & isinstance(weights, str):
Expand Down Expand Up @@ -172,17 +177,48 @@ def __init__(self, observations, alternatives, chosen_alternatives=None,
self.weights_2d = weights_2d

# Build choice table...
# Allow missing obs and alts, to support .from_df() constructor
if (observations is not None):

if (len(observations) == 0) or (len(alternatives) == 0):
self._merged_table = pd.DataFrame()
if (len(observations) == 0) or (len(alternatives) == 0):
self._merged_table = pd.DataFrame()

elif (sample_size is None):
self._merged_table = self._build_table_without_sampling()
elif (sample_size is None):
self._merged_table = self._build_table_without_sampling()

else:
self._merged_table = self._build_table()
else:
self._merged_table = self._build_table()


@classmethod
def from_df(cls, df):
"""
Create a MergedChoiceTable instance from a pre-generated DataFrame.
Each chooser's rows should be contiguous. If applicable, the chosen alternative
should be listed first. This ordering is used by MergedChoiceTable.to_frame(),
and appears to be an undocumented requirement of the legacy MNL code.
Parameters
----------
df : pandas.DataFrame
Table with a two-level MultiIndex where the first level corresponds to the
index of the observations and the second to the index of the alternatives.
May include a binary column named 'chosen' indicating observed choices.
Returns
-------
MergedChoiceTable
"""
obj = cls(observations = None, alternatives = None)
obj._merged_table = df

# TO DO: sort the dataframe so that rows are automatically in a consistent order

return obj


def _merge_interaction_terms(self, df):
"""
Merges interaction terms (if they exist) onto the input DataFrame.
Expand Down Expand Up @@ -436,7 +472,7 @@ def observation_id_col(self):
str
"""
return self.observations.index.name
return self._merged_table.index.names[0]


@property
Expand All @@ -450,7 +486,7 @@ def alternative_id_col(self):
str
"""
return self.alternatives.index.name
return self._merged_table.index.names[1]


@property
Expand All @@ -464,7 +500,7 @@ def choice_col(self):
str or None
"""
if (self.chosen_alternatives is not None):
if ('chosen' in self._merged_table.columns):
return 'chosen'

else:
Expand Down
2 changes: 1 addition & 1 deletion docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ ChoiceModels

ChoiceModels is a Python library for discrete choice modeling, with utilities for sampling, simulation, and other ancillary tasks. It's part of the `Urban Data Science Toolkit <https://docs.udst.org>`__ (UDST).

v0.2.2dev0, released April 23, 2019
v0.2.2.dev1, released April 14, 2020


Contents
Expand Down
7 changes: 0 additions & 7 deletions requirements.txt

This file was deleted.

18 changes: 12 additions & 6 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,9 @@
with open('README.md', 'r') as f:
long_description = f.read()

with open('requirements.txt') as f:
install_requires = f.readlines()
install_requires = [item.strip() for item in install_requires]

setup(
name='choicemodels',
version='0.2.2dev0',
version='0.2.2.dev1',
description='Tools for discrete choice estimation',
long_description=long_description,
author='UDST',
Expand All @@ -23,8 +19,18 @@
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'License :: OSI Approved :: BSD License'
],
packages=['choicemodels', 'choicemodels.tools'],
install_requires=install_requires
install_requires=[
'numpy >= 1.14',
'pandas >= 0.23',
'patsy >= 0.5',
'pylogit >= 0.2.2',
'scipy >= 1.0',
'statsmodels >= 0.8, <0.11; python_version <"3.6"',
'statsmodels >= 0.8; python_version >="3.6"'
]
)
52 changes: 52 additions & 0 deletions tests/test_mct.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,3 +228,55 @@ def test_join_key_name_conflict(obs, alts):
MergedChoiceTable(obs, alts, chosen_alternatives=alts.index.name)


def test_obs_id_property(obs, alts):
"""
Observation id should be available for a merged table.
"""
mct = choicemodels.tools.MergedChoiceTable(obs, alts,
sample_size = 2,
chosen_alternatives = 'choice')

assert(mct.observation_id_col == 'oid')


def test_alt_id_property(obs, alts):
"""
Alternative id should be available for a merged table.
"""
mct = choicemodels.tools.MergedChoiceTable(obs, alts,
sample_size = 2,
chosen_alternatives = 'choice')

assert(mct.alternative_id_col == 'aid')


def test_choice_col_property(obs, alts):
"""
Choice column property should be present if applicable, or None.
"""
mct = choicemodels.tools.MergedChoiceTable(obs, alts,
sample_size = 2,
chosen_alternatives = 'choice')
assert(mct.choice_col == 'chosen')

mct = choicemodels.tools.MergedChoiceTable(obs, alts,
sample_size = 2)
assert(mct.choice_col == None)


def test_from_df(obs, alts):
"""
MCT creation from a dataframe should work smoothly.
"""
df = choicemodels.tools.MergedChoiceTable(obs, alts,
sample_size = 2,
chosen_alternatives = 'choice').to_frame()

mct = choicemodels.tools.MergedChoiceTable.from_df(df)

assert(df.equals(mct.to_frame()))

0 comments on commit b173353

Please sign in to comment.