Skip to content

Commit

Permalink
remove deprecated bias scan metrics (#504)
Browse files Browse the repository at this point in the history
  • Loading branch information
hoffmansc authored Nov 21, 2023
1 parent 94383f8 commit ab7e52a
Show file tree
Hide file tree
Showing 6 changed files with 153 additions and 321 deletions.
50 changes: 0 additions & 50 deletions aif360/metrics/mdss_classification_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from aif360.detectors.mdss.MDSS import MDSS

import pandas as pd
from sklearn.utils.deprecation import deprecated


class MDSSClassificationMetric(ClassificationMetric):
Expand Down Expand Up @@ -116,52 +115,3 @@ def score_groups(self, privileged=True, penalty=1e-17):
return scanner.score_current_subset(
coordinates, expected, outcomes, dict(subset), penalty
)

@deprecated('Change to new interface - aif360.detectors.mdss_detector.bias_scan by version 0.5.0.')
def bias_scan(self, privileged=True, num_iters=10, penalty=1e-17):
"""
scan to find the highest scoring subset of records
:param privileged: flag for group to scan for - privileged group (True) or unprivileged group (False).
This abstract the need to explicitly specify the direction of bias to scan for which depends on what the favourable label is.
:param num_iters: number of iterations (random restarts)
:param penalty: penalty term. Should be positive. The penalty term as with any regularization parameter may need to be
tuned for ones use case. The higher the penalty, the less complex (number of features and feature values) the highest scoring
subset that gets returned is.
:returns: the highest scoring subset and the score
"""

coordinates = pd.DataFrame(
self.classified_dataset.features,
columns=self.classified_dataset.feature_names,
)

expected = pd.Series(self.classified_dataset.scores.flatten())
outcomes = pd.Series(self.dataset.labels.flatten() == self.dataset.favorable_label, dtype=int)

# In MDSS, we look for subset whose observations systematically deviates from expectations.
# Positive direction means observations are systematically higher than expectations
# (or expectations are systematically lower than observations) while
# Negative direction means observatons are systematically lower than expectations
# (or expectations are systematically higher than observations)

# For a privileged group, we are looking for a subset whose expectations
# (where expectations is obtained from a model) is systematically higher than the observations.
# This means we scan in the negative direction.

# For an uprivileged group, we are looking for a subset whose expectations
# (where expectations is obtained from a model) is systematically lower the observations.
# This means we scan in the position direction.

self.kwargs['direction'] = "negative" if privileged else "positive"

if self.scoring == "Bernoulli":
scoring_function = Bernoulli(**self.kwargs)
elif self.scoring == "BerkJones":
scoring_function = BerkJones(**self.kwargs)
else:
scoring_function = self.scoring(**self.kwargs)

scanner = MDSS(scoring_function)
return scanner.scan(coordinates, expected, outcomes, penalty, num_iters)
6 changes: 3 additions & 3 deletions aif360/sklearn/detectors/detectors.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Union

from aif360.detectors import bias_scan
from aif360.detectors import bias_scan as _bias_scan
from aif360.detectors.mdss.ScoringFunctions import ScoringFunction

import pandas as pd
Expand Down Expand Up @@ -50,7 +50,7 @@ def bias_scan(
Returns:
tuple: The highest scoring subset and the score or dict of the highest scoring subset and the score for each category in nominal mode
"""
return bias_scan(
return _bias_scan(
data=X,
observations=y_true,
expectations=y_pred,
Expand All @@ -60,5 +60,5 @@ def bias_scan(
num_iters=num_iters,
penalty=penalty,
mode=mode,
kwargs=kwargs
**kwargs
)
91 changes: 17 additions & 74 deletions aif360/sklearn/metrics/metrics.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from itertools import permutations
from typing import Union
import warnings

import numpy as np
import pandas as pd
Expand All @@ -9,7 +10,6 @@
from sklearn.metrics._classification import _prf_divide, _check_zero_division
from sklearn.neighbors import NearestNeighbors
from sklearn.utils import check_X_y
from sklearn.utils.deprecation import deprecated

from aif360.metrics import ot_metric
from aif360.sklearn.utils import check_inputs, check_groups
Expand All @@ -31,7 +31,7 @@
'equal_opportunity_difference', 'average_odds_difference', 'average_predictive_value_difference',
'average_odds_error', 'class_imbalance', 'kl_divergence',
'conditional_demographic_disparity', 'smoothed_edf',
'df_bias_amplification', 'mdss_bias_scan', 'mdss_bias_score',
'df_bias_amplification', 'mdss_bias_score',
# individual fairness
'generalized_entropy_index', 'generalized_entropy_error',
'between_group_generalized_entropy_error', 'theil_index',
Expand Down Expand Up @@ -946,7 +946,7 @@ def df_bias_amplification(y_true, y_pred, *, prot_attr=None, pos_label=1,
return eps_pred - eps_true

def mdss_bias_score(y_true, probas_pred, X=None, subset=None, *, pos_label=1,
scoring='Bernoulli', privileged=True, penalty=1e-17,
scoring='Bernoulli', overpredicted=True, penalty=1e-17,
**kwargs):
"""Compute the bias score for a prespecified group of records using a
given scoring function.
Expand All @@ -966,10 +966,14 @@ def mdss_bias_score(y_true, probas_pred, X=None, subset=None, *, pos_label=1,
scoring (str or class): One of 'Bernoulli' or 'BerkJones' or
subclass of
:class:`aif360.metrics.mdss.ScoringFunctions.ScoringFunction`.
privileged (bool): Flag for which direction to scan: privileged
(``True``) implies negative (observed worse than predicted outcomes)
while unprivileged (``False``) implies positive (observed better
than predicted outcomes).
overpredicted (bool): Flag for which direction to scan: `True` means we
scan for a group whose expectations/predictions are systematically
higher than observed. In other words, we scan for a group whose
observed is systematically lower than the expectations. `False`
means we scan for a group whose expectations/predictions are
systematically lower than observed (observed is systematically
higher than the expectations).
privileged (bool): Deprecated. Use overpredicted instead.
penalty (scalar): Penalty coefficient. Should be positive. The higher
the penalty, the less complex (number of features and feature
values) the highest scoring subset that gets returned is.
Expand All @@ -991,7 +995,12 @@ def mdss_bias_score(y_true, probas_pred, X=None, subset=None, *, pos_label=1,
expected = pd.Series(probas_pred).reset_index(drop=True)
outcomes = pd.Series(y_true == pos_label, dtype=int).reset_index(drop=True)

direction = 'negative' if privileged else 'positive'
# TODO: DEPRECATED. Remove in next version.
if 'privileged' in kwargs:
warnings.warn("privileged is deprecated. Use overpredicted instead.",
category=FutureWarning)
overpredicted = kwargs['privileged']
direction = 'negative' if overpredicted else 'positive'
kwargs['direction'] = direction

if scoring == 'Bernoulli':
Expand All @@ -1004,72 +1013,6 @@ def mdss_bias_score(y_true, probas_pred, X=None, subset=None, *, pos_label=1,

return scanner.score_current_subset(X, expected, outcomes, subset or {}, penalty)

@deprecated('Change to new interface - aif360.sklearn.detectors.mdss_detector.bias_scan by version 0.5.0.')
def mdss_bias_scan(y_true, probas_pred, X=None, *, pos_label=1,
scoring='Bernoulli', privileged=True, n_iter=10,
penalty=1e-17, **kwargs):
"""Scan to find the highest scoring subset of records.
Bias scan is a technique to identify bias in predictive models using subset
scanning [#zhang16]_.
Args:
y_true (array-like): Ground truth (correct) target values.
probas_pred (array-like): Probability estimates of the positive class.
X (dataframe, optional): The dataset (containing the features) that was
used to predict `probas_pred`. If not specified, the subset is
returned as indices.
pos_label (scalar): Label of the positive class.
scoring (str or class): One of 'Bernoulli' or 'BerkJones' or
subclass of
:class:`aif360.metrics.mdss.ScoringFunctions.ScoringFunction`.
privileged (bool): Flag for which direction to scan: privileged
(``True``) implies negative (observed worse than predicted outcomes)
while unprivileged (``False``) implies positive (observed better
than predicted outcomes).
n_iter (scalar): Number of iterations (random restarts).
penalty (scalar): Penalty coefficient. Should be positive. The higher
the penalty, the less complex (number of features and feature
values) the highest scoring subset that gets returned is.
**kwargs: Additional kwargs to be passed to `scoring` (not including
`direction`).
Returns:
tuple:
Highest scoring subset and its bias score
* **subset** (dict) -- Mapping of features to values defining the
highest scoring subset.
* **score** (float) -- Bias score for that group.
See also:
:func:`mdss_bias_score`
References:
.. [#zhang16] `Zhang, Z. and Neill, D. B., "Identifying significant
predictive bias in classifiers," arXiv preprint, 2016.
<https://arxiv.org/abs/1611.08292>`_
"""
if X is None:
X = pd.DataFrame({'index': range(len(y_true))})
else:
X = X.reset_index(drop=True) # match all indices

expected = pd.Series(probas_pred).reset_index(drop=True)
outcomes = pd.Series(y_true == pos_label, dtype=int).reset_index(drop=True)

direction = 'negative' if privileged else 'positive'
kwargs['direction'] = direction
if scoring == 'Bernoulli':
scoring_function = Bernoulli(**kwargs)
elif scoring == 'BerkJones':
scoring_function = BerkJones(**kwargs)
else:
scoring_function = scoring(**kwargs)
scanner = MDSS(scoring_function)

return scanner.scan(X, expected, outcomes, penalty, n_iter)


# ========================== INDIVIDUAL FAIRNESS ===============================
def generalized_entropy_index(b, alpha=2):
Expand Down
38 changes: 16 additions & 22 deletions examples/demo_mdss_classifier_metric.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,8 @@
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"from aif360.metrics import BinaryLabelDatasetMetric \n",
"from aif360.metrics.mdss_classification_metric import MDSSClassificationMetric\n",
"from aif360.detectors.mdss.ScoringFunctions.Bernoulli import Bernoulli\n",
"from aif360.metrics import BinaryLabelDatasetMetric, MDSSClassificationMetric\n",
"from aif360.detectors import bias_scan\n",
"\n",
"from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_compas"
]
Expand Down Expand Up @@ -88,9 +87,9 @@
"source": [
"dataset_orig_df = pd.DataFrame(dataset_orig.features, columns=dataset_orig.feature_names)\n",
"\n",
"age_cat = np.argmax(dataset_orig_df[['age_cat=Less than 25', 'age_cat=25 to 45', \n",
"age_cat = np.argmax(dataset_orig_df[['age_cat=Less than 25', 'age_cat=25 to 45',\n",
" 'age_cat=Greater than 45']].values, axis=1).reshape(-1, 1)\n",
"priors_count = np.argmax(dataset_orig_df[['priors_count=0', 'priors_count=1 to 3', \n",
"priors_count = np.argmax(dataset_orig_df[['priors_count=0', 'priors_count=1 to 3',\n",
" 'priors_count=More than 3']].values, axis=1).reshape(-1, 1)\n",
"c_charge_degree = np.argmax(dataset_orig_df[['c_charge_degree=M', 'c_charge_degree=F']].values, axis=1).reshape(-1, 1)\n",
"\n",
Expand Down Expand Up @@ -249,12 +248,12 @@
}
],
"source": [
"metric_train = BinaryLabelDatasetMetric(dataset_orig_train, \n",
"metric_train = BinaryLabelDatasetMetric(dataset_orig_train,\n",
" unprivileged_groups=male_group,\n",
" privileged_groups=female_group)\n",
"\n",
"print(\"Train set: Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_train.mean_difference())\n",
"metric_test = BinaryLabelDatasetMetric(dataset_orig_test, \n",
"metric_test = BinaryLabelDatasetMetric(dataset_orig_test,\n",
" unprivileged_groups=male_group,\n",
" privileged_groups=female_group)\n",
"print(\"Test set: Difference in mean outcomes between unprivileged and privileged groups = %f\" % metric_test.mean_difference())\n"
Expand Down Expand Up @@ -935,19 +934,14 @@
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Function bias_scan is deprecated; Change to new interface - aif360.detectors.mdss_detector.bias_scan by version 0.5.0.\n",
"Function bias_scan is deprecated; Change to new interface - aif360.detectors.mdss_detector.bias_scan by version 0.5.0.\n"
]
}
],
"outputs": [],
"source": [
"privileged_subset = mdss_classified.bias_scan(penalty=0.5, privileged=True)\n",
"unprivileged_subset = mdss_classified.bias_scan(penalty=0.5, privileged=False)"
"privileged_subset = bias_scan(df.iloc[:, :-2], df.observed, df.probabilities,\n",
" favorable_value=dataset_orig_test.favorable_label,\n",
" penalty=0.5, overpredicted=True)\n",
"unprivileged_subset = bias_scan(df.iloc[:, :-2], df.observed, df.probabilities,\n",
" favorable_value=dataset_orig_test.favorable_label,\n",
" penalty=0.5, overpredicted=False)"
]
},
{
Expand Down Expand Up @@ -1024,7 +1018,7 @@
"detected_privileged_groups = []\n",
"for vals in subset_values:\n",
" detected_privileged_groups.append((dict(zip(privileged_subset[0].keys(), vals))))\n",
" \n",
"\n",
"a = list(unprivileged_subset[0].values())\n",
"subset_values = list(itertools.product(*a))\n",
"\n",
Expand All @@ -1047,11 +1041,11 @@
}
],
"source": [
"metric_bias_test = BinaryLabelDatasetMetric(dataset_bias_test, \n",
"metric_bias_test = BinaryLabelDatasetMetric(dataset_bias_test,\n",
" unprivileged_groups=detected_unprivileged_groups,\n",
" privileged_groups=detected_privileged_groups)\n",
"\n",
"print(\"Test set: Difference in mean outcomes between unprivileged and privileged groups = %f\" \n",
"print(\"Test set: Difference in mean outcomes between unprivileged and privileged groups = %f\"\n",
" % metric_bias_test.mean_difference())"
]
},
Expand Down
Loading

0 comments on commit ab7e52a

Please sign in to comment.