From dbf56aa525ff758edc30e2a128864fddb65ae1f3 Mon Sep 17 00:00:00 2001 From: Max Gardner Date: Fri, 10 Apr 2020 21:11:32 +0000 Subject: [PATCH 01/11] mct from_df classmethod --- choicemodels/tools/mergedchoicetable.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/choicemodels/tools/mergedchoicetable.py b/choicemodels/tools/mergedchoicetable.py index 7a19fd9..475c3b5 100644 --- a/choicemodels/tools/mergedchoicetable.py +++ b/choicemodels/tools/mergedchoicetable.py @@ -183,6 +183,30 @@ def __init__(self, observations, alternatives, chosen_alternatives=None, self._merged_table = self._build_table() + @classmethod + def from_df(cls, df): + """ + Create an object instance from a dataframe + + Parameters + ---------- + df : a Pandas DataFrame object with 1) a MultiIndex in which the + first level corresponds to the index of the observations and the + second to the index of the alternatives; and 2) a binary column + named 'chosen' that indicated whether the corresponding + alternative was chosen in the observation data. + + Returns + ------- + MergedChoiceTable + + """ + obj = cls(pd.DataFrame(), pd.DataFrame()) + obj._merged_table = df + + return obj + + def _merge_interaction_terms(self, df): """ Merges interaction terms (if they exist) onto the input DataFrame. From c625072a72190688b626069777f727c34df7b2ef Mon Sep 17 00:00:00 2001 From: Max Gardner Date: Mon, 13 Apr 2020 17:10:30 +0000 Subject: [PATCH 02/11] class method for mct creation from dataframe --- choicemodels/tools/mergedchoicetable.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/choicemodels/tools/mergedchoicetable.py b/choicemodels/tools/mergedchoicetable.py index 475c3b5..dac4b32 100644 --- a/choicemodels/tools/mergedchoicetable.py +++ b/choicemodels/tools/mergedchoicetable.py @@ -186,22 +186,34 @@ def __init__(self, observations, alternatives, chosen_alternatives=None, @classmethod def from_df(cls, df): """ - Create an object instance from a dataframe + Create an object instance from a dataframe. + + The MergedChoiceTable class requires two dataframes to initialize + representing observations and alternatives for the choice scenario, + so this classmethod simply passes in two empty dataframes. Similarly, + it also passes a dummy for the `chosen_alternatives` arg in order + to trigger the creation of the `MergedChoiceTable.choice_col` property. Parameters ---------- df : a Pandas DataFrame object with 1) a MultiIndex in which the first level corresponds to the index of the observations and the second to the index of the alternatives; and 2) a binary column - named 'chosen' that indicated whether the corresponding + named 'chosen' that indicated whether the corresponding alternative was chosen in the observation data. + + choice_col : Name of the column containing a binary representation + of whether each alternative was chosen in the given choice scenario. Returns ------- MergedChoiceTable """ - obj = cls(pd.DataFrame(), pd.DataFrame()) + obj = cls( + observations=pd.DataFrame(), + alternatives=pd.DataFrame(), + chosen_alternatives=-999) obj._merged_table = df return obj From fb44bd0fed063122ece51b4f99f8d86afdbaea9c Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Mon, 13 Apr 2020 17:21:25 -0700 Subject: [PATCH 03/11] Initial travis cleanup --- .travis.yml | 9 +-------- requirements.txt | 7 ------- setup.py | 10 +++++++++- 3 files changed, 10 insertions(+), 16 deletions(-) delete mode 100644 requirements.txt diff --git a/.travis.yml b/.travis.yml index 799b0ad..c178a19 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,17 +5,10 @@ python: - "3.5" - "3.6" -matrix: - include: - - python: "3.7" # temp solution until travis supports python 3.7 more cleanly - dist: xenial - sudo: true - install: - pip install . - pip install -r requirements-dev.txt - - # extra tests run if urbansim is present, but it can't install with python 3.7 - - if [ "$TRAVIS_PYTHON_VERSION" != "3.7" ]; then pip install urbansim; fi + - pip install urbansim # extra tests run if urbansim is present - pip list - pip show choicemodels diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 97345ad..0000000 --- a/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -future >= 0.16 -numpy >= 1.14 -pandas >= 0.23 -patsy >= 0.5 -pylogit >= 0.2.2 -scipy >= 1.0 -statsmodels >= 0.8 diff --git a/setup.py b/setup.py index 2741438..dc3e9b2 100644 --- a/setup.py +++ b/setup.py @@ -26,5 +26,13 @@ 'License :: OSI Approved :: BSD License' ], packages=['choicemodels', 'choicemodels.tools'], - install_requires=install_requires + install_requires=[ + 'numpy >= 1.14', + 'pandas >= 0.23', + 'patsy >= 0.5', + 'pylogit >= 0.2.2', + 'scipy >= 1.0', + 'statsmodels >= 0.8, <0.11; python_version <"3.6"', + 'statsmodels >= 0.8; python_version >="3.6"' + ] ) From 21e266c20f72965b2d468d389a18cbffc048368e Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Mon, 13 Apr 2020 17:24:32 -0700 Subject: [PATCH 04/11] Removing references to requirements.txt --- setup.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/setup.py b/setup.py index dc3e9b2..c4db450 100644 --- a/setup.py +++ b/setup.py @@ -4,10 +4,6 @@ with open('README.md', 'r') as f: long_description = f.read() -with open('requirements.txt') as f: - install_requires = f.readlines() -install_requires = [item.strip() for item in install_requires] - setup( name='choicemodels', version='0.2.2dev0', From 499578844e06884ff7440af697337f8b63280b26 Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Mon, 13 Apr 2020 17:32:28 -0700 Subject: [PATCH 05/11] Adding orca to correct the hdf install --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index c178a19..f91ee19 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,7 @@ python: install: - pip install . - pip install -r requirements-dev.txt - - pip install urbansim # extra tests run if urbansim is present + - pip install orca, urbansim # extra tests run if urbansim is present - pip list - pip show choicemodels From 476165d9267daca617153322f24429120c1568e5 Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Mon, 13 Apr 2020 17:39:20 -0700 Subject: [PATCH 06/11] Fixing syntax --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index f91ee19..43c3246 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,7 +8,7 @@ python: install: - pip install . - pip install -r requirements-dev.txt - - pip install orca, urbansim # extra tests run if urbansim is present + - pip install orca urbansim # extra tests run if urbansim is present - pip list - pip show choicemodels From b01be99eef9120f84a63a5460a250b467e47ffe3 Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Mon, 13 Apr 2020 17:42:36 -0700 Subject: [PATCH 07/11] Adding python 3.7 and 3.8 --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 43c3246..420d4cc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,8 @@ python: - "2.7" - "3.5" - "3.6" + - "3.7" + - "3.8" install: - pip install . From cdb3419a5415f52cc03389fb078183ffbefd3c45 Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Mon, 13 Apr 2020 17:48:23 -0700 Subject: [PATCH 08/11] Updating versioning and changelog --- CHANGELOG.md | 9 +++++++-- choicemodels/__init__.py | 2 +- docs/source/index.rst | 2 +- setup.py | 4 +++- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7623184..80246ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,12 @@ # ChoiceModels change log -### 0.2.2dev0 (2019-04-23) -- adds a function `choicemodels.tools.parallel_lottery_choices()` to run iterative lottery choice batches in parallel rather than seqeuentially. +### 0.2.2.dev1 (2020-04-14) + +- adds a `MergedChoiceTable.from_df()` as an alternative constructor + +### 0.2.2.dev0 (2019-04-23) + +- adds a function `choicemodels.tools.parallel_lottery_choices()` to run iterative lottery choice batches in parallel rather than sequentially ### 0.2.1 (2019-01-30) diff --git a/choicemodels/__init__.py b/choicemodels/__init__.py index 487e406..f504701 100644 --- a/choicemodels/__init__.py +++ b/choicemodels/__init__.py @@ -3,4 +3,4 @@ from .mnl import MultinomialLogit, MultinomialLogitResults -version = __version__ = '0.2.2dev0' +version = __version__ = '0.2.2.dev1' diff --git a/docs/source/index.rst b/docs/source/index.rst index 971c305..c1aab60 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -8,7 +8,7 @@ ChoiceModels ChoiceModels is a Python library for discrete choice modeling, with utilities for sampling, simulation, and other ancillary tasks. It's part of the `Urban Data Science Toolkit `__ (UDST). -v0.2.2dev0, released April 23, 2019 +v0.2.2.dev1, released April 14, 2020 Contents diff --git a/setup.py b/setup.py index c4db450..4f1491a 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name='choicemodels', - version='0.2.2dev0', + version='0.2.2.dev1', description='Tools for discrete choice estimation', long_description=long_description, author='UDST', @@ -19,6 +19,8 @@ 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', 'License :: OSI Approved :: BSD License' ], packages=['choicemodels', 'choicemodels.tools'], From dba2598ccb981cb7e6c2066f2c4d3acbf6a1ea51 Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Mon, 13 Apr 2020 17:59:48 -0700 Subject: [PATCH 09/11] Adjust constructor and property logic to support .from_df() --- choicemodels/tools/mergedchoicetable.py | 42 +++++++++++++------------ 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/choicemodels/tools/mergedchoicetable.py b/choicemodels/tools/mergedchoicetable.py index dac4b32..95decec 100644 --- a/choicemodels/tools/mergedchoicetable.py +++ b/choicemodels/tools/mergedchoicetable.py @@ -114,12 +114,6 @@ def __init__(self, observations, alternatives, chosen_alternatives=None, raise ValueError("Cannot sample without replacement with sample_size {} " "and n_alts {}".format(sample_size, alternatives.shape[0])) - if (observations.index.name == None): - observations.index.name = 'obs_id' - - if (alternatives.index.name == None): - alternatives.index.name = 'alt_id' - # TO DO - check that dfs have unique indexes # TO DO - check that chosen_alternatives correspond correctly to other dfs # TO DO - same with weights (could join onto other tables and then split off) @@ -130,14 +124,25 @@ def __init__(self, observations, alternatives, chosen_alternatives=None, observations = observations.drop(chosen_alternatives.name, axis='columns') chosen_alternatives.name = '_' + alternatives.index.name # avoids conflicts - # Check for duplicate column names - obs_cols = list(observations.columns) + list(observations.index.names) - alt_cols = list(alternatives.columns) + list(alternatives.index.names) - dupes = set(obs_cols) & set(alt_cols) + # Allow missing obs and alts, to support .from_df() constructor + if (observations is not None): + + # Provide default names for observation and alternatives id's + + if (observations.index.name == None): + observations.index.name = 'obs_id' - if len(dupes) > 0: - raise ValueError("Both input tables contain column {}. Please ensure " - "column names are unique before merging".format(dupes)) + if (alternatives.index.name == None): + alternatives.index.name = 'alt_id' + + # Check for duplicate column names + obs_cols = list(observations.columns) + list(observations.index.names) + alt_cols = list(alternatives.columns) + list(alternatives.index.names) + dupes = set(obs_cols) & set(alt_cols) + + if len(dupes) > 0: + raise ValueError("Both input tables contain column {}. Please ensure " + "column names are unique before merging".format(dupes)) # Normalize weights to a pd.Series if (weights is not None) & isinstance(weights, str): @@ -210,10 +215,7 @@ def from_df(cls, df): MergedChoiceTable """ - obj = cls( - observations=pd.DataFrame(), - alternatives=pd.DataFrame(), - chosen_alternatives=-999) + obj = cls(observations = None, alternatives = None) obj._merged_table = df return obj @@ -472,7 +474,7 @@ def observation_id_col(self): str """ - return self.observations.index.name + return self._merged_table.index.names[0] @property @@ -486,7 +488,7 @@ def alternative_id_col(self): str """ - return self.alternatives.index.name + return self._merged_table.index.names[1] @property @@ -500,7 +502,7 @@ def choice_col(self): str or None """ - if (self.chosen_alternatives is not None): + if ('chosen' in self._merged_table.columns): return 'chosen' else: From a1a50181bac7fa989bbffe4f30cad4645821b053 Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Mon, 13 Apr 2020 18:15:45 -0700 Subject: [PATCH 10/11] Updating docstrings --- choicemodels/tools/mergedchoicetable.py | 26 +++++++++++-------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/choicemodels/tools/mergedchoicetable.py b/choicemodels/tools/mergedchoicetable.py index 95decec..5b30e6a 100644 --- a/choicemodels/tools/mergedchoicetable.py +++ b/choicemodels/tools/mergedchoicetable.py @@ -191,24 +191,18 @@ def __init__(self, observations, alternatives, chosen_alternatives=None, @classmethod def from_df(cls, df): """ - Create an object instance from a dataframe. + Create a MergedChoiceTable instance from a pre-generated DataFrame. - The MergedChoiceTable class requires two dataframes to initialize - representing observations and alternatives for the choice scenario, - so this classmethod simply passes in two empty dataframes. Similarly, - it also passes a dummy for the `chosen_alternatives` arg in order - to trigger the creation of the `MergedChoiceTable.choice_col` property. + Each chooser's rows should be contiguous. If applicable, the chosen alternative + should be listed first. This ordering is used by MergedChoiceTable.to_frame(), + and appears to be an undocumented requirement of the legacy MNL code. Parameters ---------- - df : a Pandas DataFrame object with 1) a MultiIndex in which the - first level corresponds to the index of the observations and the - second to the index of the alternatives; and 2) a binary column - named 'chosen' that indicated whether the corresponding - alternative was chosen in the observation data. - - choice_col : Name of the column containing a binary representation - of whether each alternative was chosen in the given choice scenario. + df : pandas.DataFrame + Table with a two-level MultiIndex where the first level corresponds to the + index of the observations and the second to the index of the alternatives. + May include a binary column named 'chosen' indicating observed choices. Returns ------- @@ -217,7 +211,9 @@ def from_df(cls, df): """ obj = cls(observations = None, alternatives = None) obj._merged_table = df - + + # TO DO: sort the dataframe so that rows are automatically in a consistent order + return obj From f1ec684bf44858a601006abe42a6935eddc8b35b Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Mon, 13 Apr 2020 18:30:21 -0700 Subject: [PATCH 11/11] Adding tests --- choicemodels/tools/mergedchoicetable.py | 14 ++++--- tests/test_mct.py | 52 +++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/choicemodels/tools/mergedchoicetable.py b/choicemodels/tools/mergedchoicetable.py index 5b30e6a..4bc7b31 100644 --- a/choicemodels/tools/mergedchoicetable.py +++ b/choicemodels/tools/mergedchoicetable.py @@ -177,15 +177,17 @@ def __init__(self, observations, alternatives, chosen_alternatives=None, self.weights_2d = weights_2d # Build choice table... + # Allow missing obs and alts, to support .from_df() constructor + if (observations is not None): - if (len(observations) == 0) or (len(alternatives) == 0): - self._merged_table = pd.DataFrame() + if (len(observations) == 0) or (len(alternatives) == 0): + self._merged_table = pd.DataFrame() - elif (sample_size is None): - self._merged_table = self._build_table_without_sampling() + elif (sample_size is None): + self._merged_table = self._build_table_without_sampling() - else: - self._merged_table = self._build_table() + else: + self._merged_table = self._build_table() @classmethod diff --git a/tests/test_mct.py b/tests/test_mct.py index 9f61f6e..74a3217 100644 --- a/tests/test_mct.py +++ b/tests/test_mct.py @@ -228,3 +228,55 @@ def test_join_key_name_conflict(obs, alts): MergedChoiceTable(obs, alts, chosen_alternatives=alts.index.name) +def test_obs_id_property(obs, alts): + """ + Observation id should be available for a merged table. + + """ + mct = choicemodels.tools.MergedChoiceTable(obs, alts, + sample_size = 2, + chosen_alternatives = 'choice') + + assert(mct.observation_id_col == 'oid') + + +def test_alt_id_property(obs, alts): + """ + Alternative id should be available for a merged table. + + """ + mct = choicemodels.tools.MergedChoiceTable(obs, alts, + sample_size = 2, + chosen_alternatives = 'choice') + + assert(mct.alternative_id_col == 'aid') + + +def test_choice_col_property(obs, alts): + """ + Choice column property should be present if applicable, or None. + + """ + mct = choicemodels.tools.MergedChoiceTable(obs, alts, + sample_size = 2, + chosen_alternatives = 'choice') + assert(mct.choice_col == 'chosen') + + mct = choicemodels.tools.MergedChoiceTable(obs, alts, + sample_size = 2) + assert(mct.choice_col == None) + + +def test_from_df(obs, alts): + """ + MCT creation from a dataframe should work smoothly. + + """ + df = choicemodels.tools.MergedChoiceTable(obs, alts, + sample_size = 2, + chosen_alternatives = 'choice').to_frame() + + mct = choicemodels.tools.MergedChoiceTable.from_df(df) + + assert(df.equals(mct.to_frame())) +