diff --git a/.travis.yml b/.travis.yml index 799b0ad..420d4cc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,18 +4,13 @@ python: - "2.7" - "3.5" - "3.6" - -matrix: - include: - - python: "3.7" # temp solution until travis supports python 3.7 more cleanly - dist: xenial - sudo: true + - "3.7" + - "3.8" install: - pip install . - pip install -r requirements-dev.txt - - # extra tests run if urbansim is present, but it can't install with python 3.7 - - if [ "$TRAVIS_PYTHON_VERSION" != "3.7" ]; then pip install urbansim; fi + - pip install orca urbansim # extra tests run if urbansim is present - pip list - pip show choicemodels diff --git a/CHANGELOG.md b/CHANGELOG.md index 7623184..80246ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,12 @@ # ChoiceModels change log -### 0.2.2dev0 (2019-04-23) -- adds a function `choicemodels.tools.parallel_lottery_choices()` to run iterative lottery choice batches in parallel rather than seqeuentially. +### 0.2.2.dev1 (2020-04-14) + +- adds a `MergedChoiceTable.from_df()` as an alternative constructor + +### 0.2.2.dev0 (2019-04-23) + +- adds a function `choicemodels.tools.parallel_lottery_choices()` to run iterative lottery choice batches in parallel rather than sequentially ### 0.2.1 (2019-01-30) diff --git a/choicemodels/__init__.py b/choicemodels/__init__.py index 487e406..f504701 100644 --- a/choicemodels/__init__.py +++ b/choicemodels/__init__.py @@ -3,4 +3,4 @@ from .mnl import MultinomialLogit, MultinomialLogitResults -version = __version__ = '0.2.2dev0' +version = __version__ = '0.2.2.dev1' diff --git a/choicemodels/tools/mergedchoicetable.py b/choicemodels/tools/mergedchoicetable.py index 7a19fd9..4bc7b31 100644 --- a/choicemodels/tools/mergedchoicetable.py +++ b/choicemodels/tools/mergedchoicetable.py @@ -114,12 +114,6 @@ def __init__(self, observations, alternatives, chosen_alternatives=None, raise ValueError("Cannot sample without replacement with sample_size {} " "and n_alts {}".format(sample_size, alternatives.shape[0])) - if (observations.index.name == None): - observations.index.name = 'obs_id' - - if (alternatives.index.name == None): - alternatives.index.name = 'alt_id' - # TO DO - check that dfs have unique indexes # TO DO - check that chosen_alternatives correspond correctly to other dfs # TO DO - same with weights (could join onto other tables and then split off) @@ -130,14 +124,25 @@ def __init__(self, observations, alternatives, chosen_alternatives=None, observations = observations.drop(chosen_alternatives.name, axis='columns') chosen_alternatives.name = '_' + alternatives.index.name # avoids conflicts - # Check for duplicate column names - obs_cols = list(observations.columns) + list(observations.index.names) - alt_cols = list(alternatives.columns) + list(alternatives.index.names) - dupes = set(obs_cols) & set(alt_cols) + # Allow missing obs and alts, to support .from_df() constructor + if (observations is not None): + + # Provide default names for observation and alternatives id's + + if (observations.index.name == None): + observations.index.name = 'obs_id' + + if (alternatives.index.name == None): + alternatives.index.name = 'alt_id' + + # Check for duplicate column names + obs_cols = list(observations.columns) + list(observations.index.names) + alt_cols = list(alternatives.columns) + list(alternatives.index.names) + dupes = set(obs_cols) & set(alt_cols) - if len(dupes) > 0: - raise ValueError("Both input tables contain column {}. Please ensure " - "column names are unique before merging".format(dupes)) + if len(dupes) > 0: + raise ValueError("Both input tables contain column {}. Please ensure " + "column names are unique before merging".format(dupes)) # Normalize weights to a pd.Series if (weights is not None) & isinstance(weights, str): @@ -172,17 +177,48 @@ def __init__(self, observations, alternatives, chosen_alternatives=None, self.weights_2d = weights_2d # Build choice table... + # Allow missing obs and alts, to support .from_df() constructor + if (observations is not None): - if (len(observations) == 0) or (len(alternatives) == 0): - self._merged_table = pd.DataFrame() + if (len(observations) == 0) or (len(alternatives) == 0): + self._merged_table = pd.DataFrame() - elif (sample_size is None): - self._merged_table = self._build_table_without_sampling() + elif (sample_size is None): + self._merged_table = self._build_table_without_sampling() - else: - self._merged_table = self._build_table() + else: + self._merged_table = self._build_table() + @classmethod + def from_df(cls, df): + """ + Create a MergedChoiceTable instance from a pre-generated DataFrame. + + Each chooser's rows should be contiguous. If applicable, the chosen alternative + should be listed first. This ordering is used by MergedChoiceTable.to_frame(), + and appears to be an undocumented requirement of the legacy MNL code. + + Parameters + ---------- + df : pandas.DataFrame + Table with a two-level MultiIndex where the first level corresponds to the + index of the observations and the second to the index of the alternatives. + May include a binary column named 'chosen' indicating observed choices. + + Returns + ------- + MergedChoiceTable + + """ + obj = cls(observations = None, alternatives = None) + obj._merged_table = df + + # TO DO: sort the dataframe so that rows are automatically in a consistent order + + return obj + + def _merge_interaction_terms(self, df): """ Merges interaction terms (if they exist) onto the input DataFrame. @@ -436,7 +472,7 @@ def observation_id_col(self): str """ - return self.observations.index.name + return self._merged_table.index.names[0] @property @@ -450,7 +486,7 @@ def alternative_id_col(self): str """ - return self.alternatives.index.name + return self._merged_table.index.names[1] @property @@ -464,7 +500,7 @@ def choice_col(self): str or None """ - if (self.chosen_alternatives is not None): + if ('chosen' in self._merged_table.columns): return 'chosen' else: diff --git a/docs/source/index.rst b/docs/source/index.rst index 971c305..c1aab60 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -8,7 +8,7 @@ ChoiceModels ChoiceModels is a Python library for discrete choice modeling, with utilities for sampling, simulation, and other ancillary tasks. It's part of the `Urban Data Science Toolkit `__ (UDST). -v0.2.2dev0, released April 23, 2019 +v0.2.2.dev1, released April 14, 2020 Contents diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 97345ad..0000000 --- a/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ -future >= 0.16 -numpy >= 1.14 -pandas >= 0.23 -patsy >= 0.5 -pylogit >= 0.2.2 -scipy >= 1.0 -statsmodels >= 0.8 diff --git a/setup.py b/setup.py index 2741438..4f1491a 100644 --- a/setup.py +++ b/setup.py @@ -4,13 +4,9 @@ with open('README.md', 'r') as f: long_description = f.read() -with open('requirements.txt') as f: - install_requires = f.readlines() -install_requires = [item.strip() for item in install_requires] - setup( name='choicemodels', - version='0.2.2dev0', + version='0.2.2.dev1', description='Tools for discrete choice estimation', long_description=long_description, author='UDST', @@ -23,8 +19,18 @@ 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', 'License :: OSI Approved :: BSD License' ], packages=['choicemodels', 'choicemodels.tools'], - install_requires=install_requires + install_requires=[ + 'numpy >= 1.14', + 'pandas >= 0.23', + 'patsy >= 0.5', + 'pylogit >= 0.2.2', + 'scipy >= 1.0', + 'statsmodels >= 0.8, <0.11; python_version <"3.6"', + 'statsmodels >= 0.8; python_version >="3.6"' + ] ) diff --git a/tests/test_mct.py b/tests/test_mct.py index 9f61f6e..74a3217 100644 --- a/tests/test_mct.py +++ b/tests/test_mct.py @@ -228,3 +228,55 @@ def test_join_key_name_conflict(obs, alts): MergedChoiceTable(obs, alts, chosen_alternatives=alts.index.name) +def test_obs_id_property(obs, alts): + """ + Observation id should be available for a merged table. + + """ + mct = choicemodels.tools.MergedChoiceTable(obs, alts, + sample_size = 2, + chosen_alternatives = 'choice') + + assert(mct.observation_id_col == 'oid') + + +def test_alt_id_property(obs, alts): + """ + Alternative id should be available for a merged table. + + """ + mct = choicemodels.tools.MergedChoiceTable(obs, alts, + sample_size = 2, + chosen_alternatives = 'choice') + + assert(mct.alternative_id_col == 'aid') + + +def test_choice_col_property(obs, alts): + """ + Choice column property should be present if applicable, or None. + + """ + mct = choicemodels.tools.MergedChoiceTable(obs, alts, + sample_size = 2, + chosen_alternatives = 'choice') + assert(mct.choice_col == 'chosen') + + mct = choicemodels.tools.MergedChoiceTable(obs, alts, + sample_size = 2) + assert(mct.choice_col == None) + + +def test_from_df(obs, alts): + """ + MCT creation from a dataframe should work smoothly. + + """ + df = choicemodels.tools.MergedChoiceTable(obs, alts, + sample_size = 2, + chosen_alternatives = 'choice').to_frame() + + mct = choicemodels.tools.MergedChoiceTable.from_df(df) + + assert(df.equals(mct.to_frame())) +