From be5ab69467cff9e347f01168b2917f061e02ee02 Mon Sep 17 00:00:00 2001
From: Eddie Janowicz <eddie.janowicz@gmail.com>
Date: Mon, 17 Apr 2017 14:27:55 -0700
Subject: [PATCH 1/2] add relative probability influence method

---
 zone_model/evaluate.py |  5 +++
 zone_model/utils.py    | 91 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 96 insertions(+)

diff --git a/zone_model/evaluate.py b/zone_model/evaluate.py
index ede8bc1..9ec42e6 100644
--- a/zone_model/evaluate.py
+++ b/zone_model/evaluate.py
@@ -35,3 +35,8 @@ def correlate(observed, predicted):
 
     corr = model.score(scoring_function=correlate, aggregate=True)
     print("  Correlation is {}".format(corr))
+
+    relative_probabilities = pd.Series(model.relative_probabilities())
+    print("  Variables by probability influence:")
+    print(relative_probabilities.sort_values(ascending=False))
+    
diff --git a/zone_model/utils.py b/zone_model/utils.py
index ed5281c..8939c02 100644
--- a/zone_model/utils.py
+++ b/zone_model/utils.py
@@ -445,6 +445,97 @@ def score(self, scoring_function=accuracy_score, choosers=None,
 
         return scoring_function(observed_choices, predicted_choices)
 
+    def single_alternative_proba(self, alternative_data, choosers=None,
+                                 alternatives=None):
+        """
+        Probability of a single alternative with user-supplied attributes
+        being selected. For use in diagnostic settings.
+        Parameters
+        ----------
+        alternative_data : dict or pd.Series
+            The single alternative's attributes.  A mapping between variable
+            name and variable value.  Should contain key for each explanatory
+            variable in the model specification.
+        choosers : pandas.DataFrame, optional
+            DataFrame of choosers.
+        alternatives : pandas.DataFrame, optional
+            DataFrame of alternatives.
+        Returns
+        -------
+        probability : float
+            Probability of alternative with user-supplied characteristics
+            being selected.
+        """
+        if choosers is None or alternatives is None:
+            choosers, alternatives = self.calculate_model_variables()
+
+        alternatives_plus = alternatives.append(alternative_data, 
+                                                ignore_index=True)
+        probabilities = self.calculate_probabilities(choosers, 
+                                                     alternatives_plus)
+
+        probability = probabilities.iloc[-1]
+
+        return probability
+
+    def relative_probabilities(self, low_percentile=.05, high_percentile=.95,
+                               choosers=None, alternatives=None):
+        """
+        Indicator of explanatory variable influence.  For each variable, 
+        calculate relative variable probability contribution by holding all 
+        other variables at their median value and having the variable of 
+        interest take on its 5th and 95th percentile values, then calculating
+        the difference in resulting probabilities.
+        Parameters
+        ----------
+        low_percentile : float, optional
+            The percentile that represents the value variable takes on in the
+            low end of its range.
+        high_percentile : float, optional
+            The percentile that represents the value variable takes on in the
+            high end of its range.
+        choosers : pandas.DataFrame, optional
+            DataFrame of choosers.
+        alternatives : pandas.DataFrame, optional
+            DataFrame of alternatives.
+        Returns
+        -------
+        relative_probabilities : dict
+            Mapping between variable name and it's contribution to 
+            probability.
+        """
+        if choosers is None or alternatives is None:
+            choosers, alternatives = self.calculate_model_variables()
+
+        explanatory_variables = list(self.model_expression)
+        alternatives = alternatives[explanatory_variables]
+
+        relative_probabilities = {}
+        for var_to_measure in explanatory_variables:
+
+            low_percentile_value = alternatives[var_to_measure].quantile(
+                                                               low_percentile)
+            high_percentile_value = alternatives[var_to_measure].quantile(
+                                                              high_percentile)
+
+            constant_vars = [var for var in explanatory_variables if
+                             var != var_to_measure]
+
+            mock_observation = alternatives[constant_vars].median()
+
+            mock_observation[var_to_measure] = high_percentile_value
+            high_proba = self.single_alternative_proba(mock_observation, 
+                                                       choosers, alternatives)
+
+            mock_observation[var_to_measure] = low_percentile_value
+            low_proba = self.single_alternative_proba(mock_observation, 
+                                                      choosers, alternatives)
+
+            proba_difference = high_proba - low_proba
+            relative_probabilities[var_to_measure] = proba_difference
+            
+        return relative_probabilities
+
 
 class SimpleEnsemble(SimulationChoiceModel):
     """

From aec9a991b51d88eb11a121a3d49046de4df1d0d1 Mon Sep 17 00:00:00 2001
From: Eddie Janowicz <eddie.janowicz@gmail.com>
Date: Mon, 17 Apr 2017 14:50:17 -0700
Subject: [PATCH 2/2] pycodestyle formatting fixes

---
 zone_model/evaluate.py |  1 -
 zone_model/utils.py    | 18 +++++++++---------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/zone_model/evaluate.py b/zone_model/evaluate.py
index 9ec42e6..45f3c78 100644
--- a/zone_model/evaluate.py
+++ b/zone_model/evaluate.py
@@ -39,4 +39,3 @@ def correlate(observed, predicted):
     relative_probabilities = pd.Series(model.relative_probabilities())
     print("  Variables by probability influence:")
     print(relative_probabilities.sort_values(ascending=False))
-    
diff --git a/zone_model/utils.py b/zone_model/utils.py
index 8939c02..df4a11c 100644
--- a/zone_model/utils.py
+++ b/zone_model/utils.py
@@ -469,9 +469,9 @@ def single_alternative_proba(self, alternative_data, choosers=None,
         if choosers is None or alternatives is None:
             choosers, alternatives = self.calculate_model_variables()
 
-        alternatives_plus = alternatives.append(alternative_data, 
+        alternatives_plus = alternatives.append(alternative_data,
                                                 ignore_index=True)
-        probabilities = self.calculate_probabilities(choosers, 
+        probabilities = self.calculate_probabilities(choosers,
                                                      alternatives_plus)
 
         probability = probabilities.iloc[-1]
@@ -481,9 +481,9 @@ def single_alternative_proba(self, alternative_data, choosers=None,
     def relative_probabilities(self, low_percentile=.05, high_percentile=.95,
                                choosers=None, alternatives=None):
         """
-        Indicator of explanatory variable influence.  For each variable, 
-        calculate relative variable probability contribution by holding all 
-        other variables at their median value and having the variable of 
+        Indicator of explanatory variable influence.  For each variable,
+        calculate relative variable probability contribution by holding all
+        other variables at their median value and having the variable of
         interest take on its 5th and 95th percentile values, then calculating
         the difference in resulting probabilities.
         Parameters
@@ -501,7 +501,7 @@ def relative_probabilities(self, low_percentile=.05, high_percentile=.95,
         Returns
         -------
         relative_probabilities : dict
-            Mapping between variable name and it's contribution to 
+            Mapping between variable name and it's contribution to
             probability.
         """
         if choosers is None or alternatives is None:
@@ -524,16 +524,16 @@ def relative_probabilities(self, low_percentile=.05, high_percentile=.95,
             mock_observation = alternatives[constant_vars].median()
 
             mock_observation[var_to_measure] = high_percentile_value
-            high_proba = self.single_alternative_proba(mock_observation, 
+            high_proba = self.single_alternative_proba(mock_observation,
                                                        choosers, alternatives)
 
             mock_observation[var_to_measure] = low_percentile_value
-            low_proba = self.single_alternative_proba(mock_observation, 
+            low_proba = self.single_alternative_proba(mock_observation,
                                                       choosers, alternatives)
 
             proba_difference = high_proba - low_proba
             relative_probabilities[var_to_measure] = proba_difference
-            
+
         return relative_probabilities