From 744216d744a1a70fd19faea26f2c80c1b98207a7 Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Thu, 11 Apr 2024 14:45:43 +0200
Subject: [PATCH 01/29] Added correlation calculation

---
 ehrapy/preprocessing/_bias.py       | 34 +++++++++++++++++++++++++++++
 ehrapy/preprocessing/_imputation.py |  2 +-
 2 files changed, 35 insertions(+), 1 deletion(-)
 create mode 100644 ehrapy/preprocessing/_bias.py

diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
new file mode 100644
index 00000000..f686bd9b
--- /dev/null
+++ b/ehrapy/preprocessing/_bias.py
@@ -0,0 +1,34 @@
+from collections.abc import Iterable
+from typing import Literal
+
+from anndata import AnnData
+
+from ehrapy import logging as logg
+from ehrapy.anndata import anndata_to_df
+
+
+def bias_detection(adata: AnnData, sensitive_features: Iterable[str], corr_threshold: float = 0.5):
+    """Detects bias in the data.
+
+    Args:
+        adata: An annotated data matrix containing patient data.
+        sensitive_features: A list of sensitive features to check for bias.
+
+    Returns:
+        #TODO
+    """
+    correlations = _feature_correlations(adata)
+    adata.varp["correlation"] = correlations
+
+
+def _feature_correlations(adata: AnnData, method: Literal["pearson", "spearman"] = "pearson"):
+    """Computes pairwise correlations between features in the AnnData object.
+
+    Args:
+        adata: An annotated data matrix containing patient data.
+
+    Returns:
+        A pandas DataFrame containing the correlation matrix.
+    """
+    corr_matrix = anndata_to_df(adata).corr(method=method)
+    return corr_matrix
diff --git a/ehrapy/preprocessing/_imputation.py b/ehrapy/preprocessing/_imputation.py
index a98b2beb..24ff6fe6 100644
--- a/ehrapy/preprocessing/_imputation.py
+++ b/ehrapy/preprocessing/_imputation.py
@@ -208,7 +208,7 @@ def knn_impute(
     imputation ran successfully.
 
     Args:
-        adata: An annotated data matrix containing gene expression values.
+        adata: An annotated data matrix containing patient data.
         var_names: A list of variable names indicating which columns to impute.
                    If `None`, all columns are imputed. Default is `None`.
         n_neighbours: Number of neighbors to use when performing the imputation. Defaults to 5.

From a1e6b2a852e9128950ca3b2ecc8cf769113ec885 Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Thu, 11 Apr 2024 17:01:28 +0200
Subject: [PATCH 02/29] Standard. Mean Differences

---
 ehrapy/preprocessing/_bias.py | 56 ++++++++++++++++++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index f686bd9b..e2144bf4 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -1,13 +1,21 @@
 from collections.abc import Iterable
 from typing import Literal
 
+import pandas as pd
 from anndata import AnnData
 
 from ehrapy import logging as logg
 from ehrapy.anndata import anndata_to_df
+from ehrapy.tools.feature_ranking._feature_importances import rank_features_supervised
 
 
-def bias_detection(adata: AnnData, sensitive_features: Iterable[str], corr_threshold: float = 0.5):
+def bias_detection(
+    adata: AnnData,
+    sensitive_features: Iterable[str],
+    corr_threshold: float = 0.5,
+    smd_threshold: float = 0.5,
+    feature_importance_threshold: float = 0.25,
+):
     """Detects bias in the data.
 
     Args:
@@ -20,6 +28,22 @@ def bias_detection(adata: AnnData, sensitive_features: Iterable[str], corr_thres
     correlations = _feature_correlations(adata)
     adata.varp["correlation"] = correlations
 
+    for feature in sensitive_features:
+        if correlations.loc[feature, :].abs().max() > corr_threshold:
+            logg.warning(
+                f"Feature {feature} is highly correlated with another feature."
+            )  # TODO: How do we print results?
+
+    smd = _standardized_mean_differences(adata)
+    adata.varp["smd"] = smd
+    for feature in sensitive_features:
+        for comp_feature in adata.var_names:
+            if smd.loc[_standardized_mean_differences, feature] > smd_threshold:
+                logg.warning(f"Feature {comp_feature} has a high standardized mean difference with {feature}.")
+
+    # feature importances
+    # TODO
+
 
 def _feature_correlations(adata: AnnData, method: Literal["pearson", "spearman"] = "pearson"):
     """Computes pairwise correlations between features in the AnnData object.
@@ -32,3 +56,33 @@ def _feature_correlations(adata: AnnData, method: Literal["pearson", "spearman"]
     """
     corr_matrix = anndata_to_df(adata).corr(method=method)
     return corr_matrix
+
+
+def _standardized_mean_differences(adata: AnnData) -> pd.DataFrame:
+    """Computes the standardized mean differences between sensitive features.
+
+    Args:
+        adata: An annotated data matrix containing patient data.
+        sensitive_features: A list of sensitive features to check for bias.
+
+    Returns:
+        A pandas DataFrame containing the standardized mean differences.
+    """
+    df = anndata_to_df(adata)
+    smd_results = {}  # type: ignore
+
+    for feature1 in df.columns:
+        smd_results[feature1] = {}
+        comparison_features = [feature for feature in df.columns if feature != feature1]
+
+        overall_mean = df[comparison_features].mean()
+        overall_std = df[comparison_features].std()
+
+        group_mean = df.groupby(feature1)[comparison_features].mean()
+        for feature2 in comparison_features:
+            smd = (group_mean[feature2] - overall_mean[feature2]) / overall_std[feature2]
+            smd_results[feature1][feature2] = smd.to_dict()
+
+    smd_results = pd.DataFrame(smd_results).reindex(adata.var_names)
+    smd_results = smd_results[adata.var_names]
+    return smd_results

From 68b11044cfc2f8388e9e2144057f88ebcdd75c6b Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Fri, 12 Apr 2024 19:41:12 +0200
Subject: [PATCH 03/29] Added feature importances

---
 ehrapy/preprocessing/__init__.py              |  1 +
 ehrapy/preprocessing/_bias.py                 | 86 ++++++++++++-------
 .../feature_ranking/_feature_importances.py   | 24 ++++--
 3 files changed, 73 insertions(+), 38 deletions(-)

diff --git a/ehrapy/preprocessing/__init__.py b/ehrapy/preprocessing/__init__.py
index f548236d..3b8bae0d 100644
--- a/ehrapy/preprocessing/__init__.py
+++ b/ehrapy/preprocessing/__init__.py
@@ -1,3 +1,4 @@
+from ehrapy.preprocessing._bias import bias_detection
 from ehrapy.preprocessing._encoding import encode, undo_encoding
 from ehrapy.preprocessing._highly_variable_features import highly_variable_features
 from ehrapy.preprocessing._imputation import (
diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index e2144bf4..ee35f776 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -1,20 +1,21 @@
 from collections.abc import Iterable
 from typing import Literal
 
+import numpy as np
 import pandas as pd
 from anndata import AnnData
 
 from ehrapy import logging as logg
 from ehrapy.anndata import anndata_to_df
-from ehrapy.tools.feature_ranking._feature_importances import rank_features_supervised
 
 
 def bias_detection(
     adata: AnnData,
-    sensitive_features: Iterable[str],
+    sensitive_features: Iterable[str] | Literal["all"],
     corr_threshold: float = 0.5,
     smd_threshold: float = 0.5,
-    feature_importance_threshold: float = 0.25,
+    feature_importance_threshold: float = 0.01,
+    prediction_confidence_threshold: float = 0.5,
 ):
     """Detects bias in the data.
 
@@ -25,31 +26,55 @@ def bias_detection(
     Returns:
         #TODO
     """
+    from ehrapy.tools import rank_features_supervised
+
+    if sensitive_features == "all":
+        sensitive_features = adata.var_names
+
     correlations = _feature_correlations(adata)
     adata.varp["correlation"] = correlations
 
     for feature in sensitive_features:
-        if correlations.loc[feature, :].abs().max() > corr_threshold:
-            logg.warning(
-                f"Feature {feature} is highly correlated with another feature."
-            )  # TODO: How do we print results?
+        for comp_feature in adata.var_names:
+            if correlations.loc[feature, comp_feature] > corr_threshold:
+                logg.warning(
+                    f"Feature {feature} is highly correlated with {comp_feature} (correlation coefficient ≈{correlations.loc[feature, comp_feature]:.3f})."
+                )  # TODO: How do we print results?
 
-    smd = _standardized_mean_differences(adata)
-    adata.varp["smd"] = smd
+    smd_dict = _standardized_mean_differences(adata, sensitive_features)
     for feature in sensitive_features:
+        abs_smd = smd_dict[feature].abs()
         for comp_feature in adata.var_names:
-            if smd.loc[_standardized_mean_differences, feature] > smd_threshold:
-                logg.warning(f"Feature {comp_feature} has a high standardized mean difference with {feature}.")
-
-    # feature importances
-    # TODO
-
-
-def _feature_correlations(adata: AnnData, method: Literal["pearson", "spearman"] = "pearson"):
+            if abs_smd[comp_feature].max() > smd_threshold:
+                logg.warning(
+                    f"Feature {comp_feature} has a high standardized mean difference with {feature}."
+                )  # TODO: Do we look at / print groups individually?
+
+    for prediction_feature in adata.var_names:
+        prediction_score = rank_features_supervised(
+            adata,
+            prediction_feature,
+            input_features="all",
+            model="rf",
+            key_added=f"{prediction_feature}_feature_importances",
+            percent_output=True,
+            logging=False,
+            return_score=True,
+        )
+        for feature in sensitive_features:
+            feature_importance = adata.var[f"{prediction_feature}_feature_importances"][feature] / 100
+            if feature_importance > feature_importance_threshold and prediction_score > prediction_confidence_threshold:
+                logg.warning(
+                    f"Feature {feature} has a high feature importance for predicting {prediction_feature} (importance in %: {feature_importance:.3f}, prediction score: {prediction_score:.3f})."
+                )
+
+
+def _feature_correlations(adata: AnnData, method: Literal["pearson", "spearman"] = "spearman"):
     """Computes pairwise correlations between features in the AnnData object.
 
     Args:
         adata: An annotated data matrix containing patient data.
+        method: The correlation method to use. Choose between "pearson" and "spearman". Defaults to "spearman".
 
     Returns:
         A pandas DataFrame containing the correlation matrix.
@@ -58,12 +83,13 @@ def _feature_correlations(adata: AnnData, method: Literal["pearson", "spearman"]
     return corr_matrix
 
 
-def _standardized_mean_differences(adata: AnnData) -> pd.DataFrame:
+def _standardized_mean_differences(adata: AnnData, features: Iterable[str]) -> dict:
     """Computes the standardized mean differences between sensitive features.
 
     Args:
         adata: An annotated data matrix containing patient data.
-        sensitive_features: A list of sensitive features to check for bias.
+        features: A list of features to compute the standardized mean differences (SMD) for. For each listed feature, the SMD is computed for each
+            feature for all groups within the respected feature.
 
     Returns:
         A pandas DataFrame containing the standardized mean differences.
@@ -71,18 +97,18 @@ def _standardized_mean_differences(adata: AnnData) -> pd.DataFrame:
     df = anndata_to_df(adata)
     smd_results = {}  # type: ignore
 
-    for feature1 in df.columns:
-        smd_results[feature1] = {}
-        comparison_features = [feature for feature in df.columns if feature != feature1]
+    for group_feature in features:  # TODO: Restrict to categorical features (wait for other PR)
+        smd_results[group_feature] = {}
+        for group in df[group_feature].unique():
+            group_mean = df[df[group_feature] == group].mean()
+            group_std = df[df[group_feature] == group].std()
+
+            comparison_mean = df[df[group_feature] != group].mean()
+            comparison_std = df[df[group_feature] != group].std()
 
-        overall_mean = df[comparison_features].mean()
-        overall_std = df[comparison_features].std()
+            smd = (group_mean - comparison_mean) / np.sqrt((group_std**2 + comparison_std**2) / 2)
+            smd_results[group_feature][group] = smd
 
-        group_mean = df.groupby(feature1)[comparison_features].mean()
-        for feature2 in comparison_features:
-            smd = (group_mean[feature2] - overall_mean[feature2]) / overall_std[feature2]
-            smd_results[feature1][feature2] = smd.to_dict()
+        smd_results[group_feature] = pd.DataFrame(smd_results[group_feature]).T[adata.var_names]
 
-    smd_results = pd.DataFrame(smd_results).reindex(adata.var_names)
-    smd_results = smd_results[adata.var_names]
     return smd_results
diff --git a/ehrapy/tools/feature_ranking/_feature_importances.py b/ehrapy/tools/feature_ranking/_feature_importances.py
index 79efb779..d93a4b4b 100644
--- a/ehrapy/tools/feature_ranking/_feature_importances.py
+++ b/ehrapy/tools/feature_ranking/_feature_importances.py
@@ -18,15 +18,17 @@ def rank_features_supervised(
     adata: AnnData,
     predicted_feature: str,
     prediction_type: Literal["continuous", "categorical", "auto"] = "auto",
-    model: Literal["regression", "svm", "rf"] = "regression",
+    model: Literal["regression", "svm", "rf"] = "rf",
     input_features: Iterable[str] | Literal["all"] = "all",
     layer: str | None = None,
     test_split_size: float = 0.2,
     key_added: str = "feature_importances",
     feature_scaling: Literal["standard", "minmax"] | None = "standard",
     percent_output: bool = False,
+    logging: bool = True,
+    return_score: bool = False,
     **kwargs,
-):
+) -> float | None:
     """Calculate feature importances for predicting a specified feature in adata.var.
 
     Args:
@@ -49,6 +51,8 @@ def rank_features_supervised(
             for each feature individually. Defaults to 'standard'.
         percent_output: Set to True to output the feature importances as percentages. Note that information about positive or negative
             coefficients for regression models will be lost. Defaults to False.
+        logging: Set to False to disable logging. Defaults to True.
+        return_score: Set to True to return the R2 score / the accuracy of the model. Defaults to False.
         **kwargs: Additional keyword arguments to pass to the model. See the documentation of the respective model in scikit-learn for details.
 
     Examples:
@@ -92,9 +96,10 @@ def rank_features_supervised(
                 prediction_type = "categorical"
             else:
                 prediction_type = "continuous"
-        logg.info(
-            f"Predicted feature {predicted_feature} was detected as {prediction_type}. If this is incorrect, please specify in the prediction_type argument."
-        )
+        if logging:
+            logg.info(
+                f"Predicted feature {predicted_feature} was detected as {prediction_type}. If this is incorrect, please specify in the prediction_type argument."
+            )
 
     elif prediction_type == "continuous":
         if pd.api.types.is_categorical_dtype(data[predicted_feature].dtype):
@@ -167,9 +172,10 @@ def rank_features_supervised(
 
     score = predictor.score(x_test, y_test)
     evaluation_metric = "R2 score" if prediction_type == "continuous" else "accuracy"
-    logg.info(
-        f"Training completed. The model achieved an {evaluation_metric} of {score:.2f} on the test set, consisting of {len(y_test)} samples."
-    )
+    if logging:
+        logg.info(
+            f"Training completed. The model achieved an {evaluation_metric} of {score:.2f} on the test set, consisting of {len(y_test)} samples."
+        )
 
     if model == "regression" or model == "svm":
         feature_importances = pd.Series(predictor.coef_.squeeze(), index=input_data.columns)
@@ -182,3 +188,5 @@ def rank_features_supervised(
     # Reorder feature importances to match adata.var order and save importances in adata.var
     feature_importances = feature_importances.reindex(adata.var_names)
     adata.var[key_added] = feature_importances
+
+    return score if return_score else None

From 053658600de0c60f709e812e0a904a6046a7f897 Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Sat, 13 Apr 2024 09:18:23 +0200
Subject: [PATCH 04/29] Doc string improvements

---
 ehrapy/preprocessing/_bias.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index ee35f776..034601c1 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -14,7 +14,7 @@ def bias_detection(
     sensitive_features: Iterable[str] | Literal["all"],
     corr_threshold: float = 0.5,
     smd_threshold: float = 0.5,
-    feature_importance_threshold: float = 0.01,
+    feature_importance_threshold: float = 0.1,
     prediction_confidence_threshold: float = 0.5,
 ):
     """Detects bias in the data.
@@ -22,9 +22,12 @@ def bias_detection(
     Args:
         adata: An annotated data matrix containing patient data.
         sensitive_features: A list of sensitive features to check for bias.
-
-    Returns:
-        #TODO
+        corr_threshold: The threshold for the correlation coefficient between two features to be considered of interest. Defaults to 0.5.
+        smd_threshold: The threshold for the standardized mean difference between two features to be considered of interest. Defaults to 0.5.
+        feature_importance_threshold: The threshold for the feature importance of a sensitive feature for predicting another feature to be considered
+            of interest. Defaults to 0.1.
+        prediction_confidence_threshold: The threshold for the prediction confidence (R2 or accuracy) of a sensitive feature for predicting another
+            feature to be considered of interest. Defaults to 0.5.
     """
     from ehrapy.tools import rank_features_supervised
 
@@ -89,10 +92,10 @@ def _standardized_mean_differences(adata: AnnData, features: Iterable[str]) -> d
     Args:
         adata: An annotated data matrix containing patient data.
         features: A list of features to compute the standardized mean differences (SMD) for. For each listed feature, the SMD is computed for each
-            feature for all groups within the respected feature.
+            feature, comparing one group to the rest. Thus, we obtain a n_groups_in_feature x n_features matrix of SMDs for each listed feature.
 
     Returns:
-        A pandas DataFrame containing the standardized mean differences.
+        A dictionary mapping each feature to a pandas DataFrame containing the standardized mean differences.
     """
     df = anndata_to_df(adata)
     smd_results = {}  # type: ignore

From c41ad45c41694441e8236bf7a88baa87fbf788d1 Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Sat, 13 Apr 2024 09:37:31 +0200
Subject: [PATCH 05/29] Added correlations parameter

---
 ehrapy/preprocessing/_bias.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index 034601c1..a388123e 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -16,6 +16,7 @@ def bias_detection(
     smd_threshold: float = 0.5,
     feature_importance_threshold: float = 0.1,
     prediction_confidence_threshold: float = 0.5,
+    corr_method: Literal["pearson", "spearman"] = "spearman",
 ):
     """Detects bias in the data.
 
@@ -28,13 +29,14 @@ def bias_detection(
             of interest. Defaults to 0.1.
         prediction_confidence_threshold: The threshold for the prediction confidence (R2 or accuracy) of a sensitive feature for predicting another
             feature to be considered of interest. Defaults to 0.5.
+        corr_method: The correlation method to use. Choose between "pearson" and "spearman". Defaults to "spearman".
     """
     from ehrapy.tools import rank_features_supervised
 
     if sensitive_features == "all":
         sensitive_features = adata.var_names
 
-    correlations = _feature_correlations(adata)
+    correlations = _feature_correlations(adata, method=corr_method)
     adata.varp["correlation"] = correlations
 
     for feature in sensitive_features:

From 97b004bc17b366e60c7210a217cd849b261d727a Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Mon, 15 Apr 2024 15:47:25 +0200
Subject: [PATCH 06/29] PR Revisions

---
 ehrapy/preprocessing/__init__.py    |   2 +-
 ehrapy/preprocessing/_bias.py       | 178 ++++++++++++++++------------
 ehrapy/preprocessing/_imputation.py |   2 +-
 3 files changed, 101 insertions(+), 81 deletions(-)

diff --git a/ehrapy/preprocessing/__init__.py b/ehrapy/preprocessing/__init__.py
index 3b8bae0d..a5d7805e 100644
--- a/ehrapy/preprocessing/__init__.py
+++ b/ehrapy/preprocessing/__init__.py
@@ -1,4 +1,4 @@
-from ehrapy.preprocessing._bias import bias_detection
+from ehrapy.preprocessing._bias import detect_bias
 from ehrapy.preprocessing._encoding import encode, undo_encoding
 from ehrapy.preprocessing._highly_variable_features import highly_variable_features
 from ehrapy.preprocessing._imputation import (
diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index a388123e..b6b1fc10 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -1,3 +1,4 @@
+import itertools
 from collections.abc import Iterable
 from typing import Literal
 
@@ -5,24 +6,33 @@
 import pandas as pd
 from anndata import AnnData
 
-from ehrapy import logging as logg
 from ehrapy.anndata import anndata_to_df
 
 
-def bias_detection(
+def detect_bias(
     adata: AnnData,
     sensitive_features: Iterable[str] | Literal["all"],
+    *,
+    run_feature_importances: bool | None = None,
     corr_threshold: float = 0.5,
     smd_threshold: float = 0.5,
     feature_importance_threshold: float = 0.1,
     prediction_confidence_threshold: float = 0.5,
     corr_method: Literal["pearson", "spearman"] = "spearman",
-):
-    """Detects bias in the data.
+) -> dict[str, pd.DataFrame]:
+    """Detects biases in the data using feature correlations, standardized mean differences, and feature importances.
+
+    Detects biases with respect to sensitive features, which can be either a specified subset of features or all features in adata.var.
+    The method computes pairwise correlations between features, standardized mean differences between groups of sensitive features, and
+    feature importances for predicting one feature with another. The results are stored in adata.varp and adata.varm.
+    Values that exceed the specified thresholds are considered of interest and returned in the results.
 
     Args:
-        adata: An annotated data matrix containing patient data.
-        sensitive_features: A list of sensitive features to check for bias.
+        adata: An annotated data matrix containing EHR data.
+        sensitive_features: Sensitive features to consider for bias detection. If set to "all", all features in adata.var will be considered.
+            If only a subset of features should be considered, provide as an iterable.
+        run_feature_importances: Whether to run feature importances for detecting bias. If set to None, the function will run feature importances if
+            sensitive_features is not set to "all", as this can be computationally expensive. Defaults to None.
         corr_threshold: The threshold for the correlation coefficient between two features to be considered of interest. Defaults to 0.5.
         smd_threshold: The threshold for the standardized mean difference between two features to be considered of interest. Defaults to 0.5.
         feature_importance_threshold: The threshold for the feature importance of a sensitive feature for predicting another feature to be considered
@@ -30,90 +40,100 @@ def bias_detection(
         prediction_confidence_threshold: The threshold for the prediction confidence (R2 or accuracy) of a sensitive feature for predicting another
             feature to be considered of interest. Defaults to 0.5.
         corr_method: The correlation method to use. Choose between "pearson" and "spearman". Defaults to "spearman".
+
+    Returns:
+        A dictionary containing the results of the bias detection. The keys are:
+        - "feature_correlations": Pairwise correlations between features that exceed the correlation threshold.
+        - "standardized_mean_differences": Standardized mean differences between groups of sensitive features that exceed the SMD threshold.
+        - "feature_importances": Feature importances for predicting one feature with another that exceed the feature importance and prediction
+            confidence thresholds.
     """
     from ehrapy.tools import rank_features_supervised
 
-    if sensitive_features == "all":
-        sensitive_features = adata.var_names
-
-    correlations = _feature_correlations(adata, method=corr_method)
-    adata.varp["correlation"] = correlations
+    bias_results = {}
 
-    for feature in sensitive_features:
-        for comp_feature in adata.var_names:
-            if correlations.loc[feature, comp_feature] > corr_threshold:
-                logg.warning(
-                    f"Feature {feature} is highly correlated with {comp_feature} (correlation coefficient ≈{correlations.loc[feature, comp_feature]:.3f})."
-                )  # TODO: How do we print results?
-
-    smd_dict = _standardized_mean_differences(adata, sensitive_features)
-    for feature in sensitive_features:
-        abs_smd = smd_dict[feature].abs()
-        for comp_feature in adata.var_names:
-            if abs_smd[comp_feature].max() > smd_threshold:
-                logg.warning(
-                    f"Feature {comp_feature} has a high standardized mean difference with {feature}."
-                )  # TODO: Do we look at / print groups individually?
-
-    for prediction_feature in adata.var_names:
-        prediction_score = rank_features_supervised(
-            adata,
-            prediction_feature,
-            input_features="all",
-            model="rf",
-            key_added=f"{prediction_feature}_feature_importances",
-            percent_output=True,
-            logging=False,
-            return_score=True,
-        )
-        for feature in sensitive_features:
-            feature_importance = adata.var[f"{prediction_feature}_feature_importances"][feature] / 100
-            if feature_importance > feature_importance_threshold and prediction_score > prediction_confidence_threshold:
-                logg.warning(
-                    f"Feature {feature} has a high feature importance for predicting {prediction_feature} (importance in %: {feature_importance:.3f}, prediction score: {prediction_score:.3f})."
-                )
-
-
-def _feature_correlations(adata: AnnData, method: Literal["pearson", "spearman"] = "spearman"):
-    """Computes pairwise correlations between features in the AnnData object.
-
-    Args:
-        adata: An annotated data matrix containing patient data.
-        method: The correlation method to use. Choose between "pearson" and "spearman". Defaults to "spearman".
-
-    Returns:
-        A pandas DataFrame containing the correlation matrix.
-    """
-    corr_matrix = anndata_to_df(adata).corr(method=method)
-    return corr_matrix
+    if run_feature_importances is None:
+        run_feature_importances = sensitive_features != "all"
 
+    if sensitive_features == "all":
+        sensitive_features = adata.var_names
 
-def _standardized_mean_differences(adata: AnnData, features: Iterable[str]) -> dict:
-    """Computes the standardized mean differences between sensitive features.
+    adata_df = anndata_to_df(adata)
 
-    Args:
-        adata: An annotated data matrix containing patient data.
-        features: A list of features to compute the standardized mean differences (SMD) for. For each listed feature, the SMD is computed for each
-            feature, comparing one group to the rest. Thus, we obtain a n_groups_in_feature x n_features matrix of SMDs for each listed feature.
+    # Feature correlations
+    correlations = adata_df.corr(method=corr_method)
+    adata.varp["feature_correlations"] = correlations
 
-    Returns:
-        A dictionary mapping each feature to a pandas DataFrame containing the standardized mean differences.
-    """
-    df = anndata_to_df(adata)
-    smd_results = {}  # type: ignore
+    corr_results = {"Sensitive Feature": [], "Compared Feature": [], "Correlation Coefficient": []}  # type: ignore
+    for sens_feature, comp_feature in itertools.product(sensitive_features, adata.var_names):
+        if sens_feature == comp_feature:
+            continue
+        if abs(correlations.loc[sens_feature, comp_feature]) > corr_threshold:
+            corr_results["Sensitive Feature"].append(sens_feature)
+            corr_results["Compared Feature"].append(comp_feature)
+            corr_results["Correlation Coefficient"].append(correlations.loc[sens_feature, comp_feature])
+    bias_results["feature_correlations"] = pd.DataFrame(corr_results)
 
-    for group_feature in features:  # TODO: Restrict to categorical features (wait for other PR)
-        smd_results[group_feature] = {}
-        for group in df[group_feature].unique():
-            group_mean = df[df[group_feature] == group].mean()
-            group_std = df[df[group_feature] == group].std()
+    # Standardized mean differences
+    for groupby_feature in sensitive_features:  # TODO: Restrict to categorical features (wait for other PR)
+        smd_results = {}
+        for group in adata_df[groupby_feature].unique():
+            group_mean = adata_df[adata_df[groupby_feature] == group].mean()
+            group_std = adata_df[adata_df[groupby_feature] == group].std()
 
-            comparison_mean = df[df[group_feature] != group].mean()
-            comparison_std = df[df[group_feature] != group].std()
+            comparison_mean = adata_df[adata_df[groupby_feature] != group].mean()
+            comparison_std = adata_df[adata_df[groupby_feature] != group].std()
 
             smd = (group_mean - comparison_mean) / np.sqrt((group_std**2 + comparison_std**2) / 2)
-            smd_results[group_feature][group] = smd
+            smd_results[group] = smd
 
-        smd_results[group_feature] = pd.DataFrame(smd_results[group_feature]).T[adata.var_names]
+        adata.varm[f"smd_{groupby_feature}"] = pd.DataFrame(smd_results).T[adata.var_names]
 
-    return smd_results
+    smd_results = {"Sensitive Feature": [], "Compared Feature": [], "Group": [], "Standardized Mean Difference": []}
+    for sens_feature in sensitive_features:
+        abs_smd = adata.varm[f"smd_{sens_feature}"].abs()
+        for comp_feature in adata.var_names:
+            if sens_feature == comp_feature:
+                continue
+            if abs_smd[comp_feature].max() > smd_threshold:
+                smd_results["Sensitive Feature"].append([sens_feature] * len(abs_smd[comp_feature]))
+                smd_results["Compared Feature"].append([comp_feature] * len(abs_smd[comp_feature]))
+                smd_results["Group"].append(abs_smd[comp_feature].index.values)
+                smd_results["Standardized Mean Difference"] = adata.varm[f"smd_{sens_feature}"].values
+    bias_results["standardized_mean_differences"] = pd.DataFrame(smd_results)
+
+    # Feature importances
+    if run_feature_importances:
+        feature_importances_results = {
+            "Sensitive Feature": [],
+            "Predicted Feature": [],
+            "Feature Importance": [],
+            "Prediction Score": [],
+        }  # type: ignore
+        for prediction_feature in adata.var_names:
+            prediction_score = rank_features_supervised(
+                adata,
+                prediction_feature,
+                input_features="all",
+                model="rf",
+                key_added=f"{prediction_feature}_feature_importances",
+                percent_output=True,
+                logging=False,
+                return_score=True,
+            )
+
+            for sens_feature in sensitive_features:
+                if prediction_feature == sens_feature:
+                    continue
+                feature_importance = adata.var[f"{prediction_feature}_feature_importances"][sens_feature] / 100
+                if (
+                    feature_importance > feature_importance_threshold
+                    and prediction_score > prediction_confidence_threshold
+                ):
+                    feature_importances_results["Sensitive Feature"].append(sens_feature)
+                    feature_importances_results["Predicted Feature"].append(prediction_feature)
+                    feature_importances_results["Feature Importance"].append(feature_importance)
+                    feature_importances_results["Prediction Score"].append(prediction_score)
+        bias_results["feature_importances"] = pd.DataFrame(feature_importances_results)
+
+    return bias_results
diff --git a/ehrapy/preprocessing/_imputation.py b/ehrapy/preprocessing/_imputation.py
index 9004183e..26a43a0d 100644
--- a/ehrapy/preprocessing/_imputation.py
+++ b/ehrapy/preprocessing/_imputation.py
@@ -202,7 +202,7 @@ def knn_impute(
     imputation ran successfully.
 
     Args:
-        adata: An annotated data matrix containing patient data.
+        adata: An annotated data matrix containing EHR data.
         var_names: A list of variable names indicating which columns to impute.
                    If `None`, all columns are imputed. Default is `None`.
         n_neighbours: Number of neighbors to use when performing the imputation. Defaults to 5.

From 778c0c3b4a095044fc96ebebd57ed07fc8e14775 Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Mon, 15 Apr 2024 17:42:34 +0200
Subject: [PATCH 07/29] Added categorical value count calculation

---
 ehrapy/preprocessing/_bias.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index b6b1fc10..526c379b 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -16,6 +16,7 @@ def detect_bias(
     run_feature_importances: bool | None = None,
     corr_threshold: float = 0.5,
     smd_threshold: float = 0.5,
+    categorical_factor_threshold: float = 2,
     feature_importance_threshold: float = 0.1,
     prediction_confidence_threshold: float = 0.5,
     corr_method: Literal["pearson", "spearman"] = "spearman",
@@ -35,6 +36,8 @@ def detect_bias(
             sensitive_features is not set to "all", as this can be computationally expensive. Defaults to None.
         corr_threshold: The threshold for the correlation coefficient between two features to be considered of interest. Defaults to 0.5.
         smd_threshold: The threshold for the standardized mean difference between two features to be considered of interest. Defaults to 0.5.
+        categorical_factor_threshold: The threshold for the factor between the value counts (as percentages) of a feature compared between two
+            groups of a sensitive feature. Defaults to 2.
         feature_importance_threshold: The threshold for the feature importance of a sensitive feature for predicting another feature to be considered
             of interest. Defaults to 0.1.
         prediction_confidence_threshold: The threshold for the prediction confidence (R2 or accuracy) of a sensitive feature for predicting another
@@ -102,6 +105,32 @@ def detect_bias(
                 smd_results["Standardized Mean Difference"] = adata.varm[f"smd_{sens_feature}"].values
     bias_results["standardized_mean_differences"] = pd.DataFrame(smd_results)
 
+    # Categorical value counts
+    cat_value_count_results = {
+        "Sensitive Feature": [],
+        "Compared Feature": [],
+        "Group 1 Percentage": [],
+        "Group 2 Percentage": [],
+    }  # type: ignore
+    for sens_feature in sensitive_features:  # TODO: Restrict to categorical features (wait for other PR)
+        for comp_feature in adata.var_names:  # TODO: Restrict to categorical features (wait for other PR)
+            if sens_feature == comp_feature:
+                continue
+            value_counts = adata_df.groupby([sens_feature, comp_feature]).size().unstack(fill_value=0)
+            value_counts = value_counts.div(value_counts.sum(axis=1), axis=0)
+
+            for sens_group in value_counts.index:
+                for comp_group1, comp_group2 in itertools.combinations(value_counts.columns, 2):
+                    if (
+                        value_counts.loc[sens_group, comp_group1] / value_counts.loc[sens_group, comp_group2]
+                        > categorical_factor_threshold
+                    ):
+                        cat_value_count_results["Sensitive Feature"].append(sens_feature)
+                        cat_value_count_results["Compared Feature"].append(comp_feature)
+                        cat_value_count_results["Group 1 Percentage"].append(value_counts.loc[sens_group, comp_group1])
+                        cat_value_count_results["Group 2 Percentage"].append(value_counts.loc[sens_group, comp_group2])
+    bias_results["categorical_value_counts"] = pd.DataFrame(cat_value_count_results)
+
     # Feature importances
     if run_feature_importances:
         feature_importances_results = {

From 7ad07ec1ab7f3bc9b895b208729180d3b1630dfb Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Tue, 16 Apr 2024 12:01:54 +0200
Subject: [PATCH 08/29] Added first test

---
 ehrapy/preprocessing/_bias.py    | 108 ++++++++++++++++++++-----------
 tests/preprocessing/test_bias.py |  95 +++++++++++++++++++++++++++
 2 files changed, 167 insertions(+), 36 deletions(-)
 create mode 100644 tests/preprocessing/test_bias.py

diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index 526c379b..10a00934 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -11,7 +11,7 @@
 
 def detect_bias(
     adata: AnnData,
-    sensitive_features: Iterable[str] | Literal["all"],
+    sensitive_features: Iterable[str] | np.ndarray | Literal["all"],
     *,
     run_feature_importances: bool | None = None,
     corr_threshold: float = 0.5,
@@ -59,86 +59,122 @@ def detect_bias(
         run_feature_importances = sensitive_features != "all"
 
     if sensitive_features == "all":
-        sensitive_features = adata.var_names
+        sens_features_list = adata.var_names.values.tolist()
+        categorical_sensitive_features = adata.var_names.values[
+            adata.var["feature_type"] == "categorical"
+        ]  # TODO: Double-check that named correctly
+    else:
+        for feat in sensitive_features:
+            if feat not in adata.var_names:
+                raise ValueError(f"Feature {feat} not found in adata.var.")
+        sens_features_list = sensitive_features
+        categorical_sensitive_features = [
+            feat for feat in sensitive_features if adata.var["feature_type"][feat] == "categorical"
+        ]
 
     adata_df = anndata_to_df(adata)
+    categorical_var_names = adata.var_names[adata.var["feature_type"] == "categorical"]
 
     # Feature correlations
     correlations = adata_df.corr(method=corr_method)
     adata.varp["feature_correlations"] = correlations
+    print(type(correlations))
 
-    corr_results = {"Sensitive Feature": [], "Compared Feature": [], "Correlation Coefficient": []}  # type: ignore
-    for sens_feature, comp_feature in itertools.product(sensitive_features, adata.var_names):
+    corr_results: dict[str, list] = {"Feature 1": [], "Feature 2": [], "Correlation Coefficient": []}
+    if sensitive_features == "all":
+        feature_tuples = list(itertools.combinations(sens_features_list, 2))
+    else:
+        feature_tuples = list(itertools.product(sens_features_list, adata.var_names))
+    for sens_feature, comp_feature in feature_tuples:
         if sens_feature == comp_feature:
             continue
         if abs(correlations.loc[sens_feature, comp_feature]) > corr_threshold:
-            corr_results["Sensitive Feature"].append(sens_feature)
-            corr_results["Compared Feature"].append(comp_feature)
+            corr_results["Feature 1"].append(sens_feature)
+            corr_results["Feature 2"].append(comp_feature)
             corr_results["Correlation Coefficient"].append(correlations.loc[sens_feature, comp_feature])
     bias_results["feature_correlations"] = pd.DataFrame(corr_results)
 
     # Standardized mean differences
-    for groupby_feature in sensitive_features:  # TODO: Restrict to categorical features (wait for other PR)
-        smd_results = {}
-        for group in adata_df[groupby_feature].unique():
-            group_mean = adata_df[adata_df[groupby_feature] == group].mean()
-            group_std = adata_df[adata_df[groupby_feature] == group].std()
+    smd_results: dict[str, list] = {
+        "Sensitive Feature": [],
+        "Compared Feature": [],
+        "Group": [],
+        "Standardized Mean Difference": [],
+    }  # type: ignore
+    for sens_feature in categorical_sensitive_features:  # TODO: Restrict to categorical features (wait for other PR)
+        alphabetic_groups = sorted(adata_df[sens_feature].unique())
+        smd_nparray = np.zeros((len(alphabetic_groups), len(adata.var_names)))
+
+        for group_nr, group in enumerate(alphabetic_groups):
+            group_mean = adata_df[adata_df[sens_feature] == group].mean()
+            group_std = adata_df[adata_df[sens_feature] == group].std()
 
-            comparison_mean = adata_df[adata_df[groupby_feature] != group].mean()
-            comparison_std = adata_df[adata_df[groupby_feature] != group].std()
+            comparison_mean = adata_df[adata_df[sens_feature] != group].mean()
+            comparison_std = adata_df[adata_df[sens_feature] != group].std()
 
             smd = (group_mean - comparison_mean) / np.sqrt((group_std**2 + comparison_std**2) / 2)
-            smd_results[group] = smd
+            smd_nparray[group_nr] = smd
 
-        adata.varm[f"smd_{groupby_feature}"] = pd.DataFrame(smd_results).T[adata.var_names]
+            abs_smd = smd.abs()
+            for i, comp_feature in enumerate(adata.var_names):  # TODO: Restrict to continuous features???
+                if sens_feature == comp_feature:
+                    continue
+                if abs_smd[i] > smd_threshold:
+                    smd_results["Sensitive Feature"].append(sens_feature)
+                    smd_results["Compared Feature"].append(comp_feature)
+                    smd_results["Group"].append(group)
+                    smd_results["Standardized Mean Difference"] = smd[i]
+
+        adata.varm[f"smd_{sens_feature}"] = smd_nparray.T  # TODO: Double check
+        # pd.DataFrame(smd_results).T[adata.var_names]  # TODO: Sollte ndarray sein
 
-    smd_results = {"Sensitive Feature": [], "Compared Feature": [], "Group": [], "Standardized Mean Difference": []}
-    for sens_feature in sensitive_features:
-        abs_smd = adata.varm[f"smd_{sens_feature}"].abs()
-        for comp_feature in adata.var_names:
-            if sens_feature == comp_feature:
-                continue
-            if abs_smd[comp_feature].max() > smd_threshold:
-                smd_results["Sensitive Feature"].append([sens_feature] * len(abs_smd[comp_feature]))
-                smd_results["Compared Feature"].append([comp_feature] * len(abs_smd[comp_feature]))
-                smd_results["Group"].append(abs_smd[comp_feature].index.values)
-                smd_results["Standardized Mean Difference"] = adata.varm[f"smd_{sens_feature}"].values
     bias_results["standardized_mean_differences"] = pd.DataFrame(smd_results)
 
     # Categorical value counts
-    cat_value_count_results = {
+    cat_value_count_results: dict[str, list] = {
         "Sensitive Feature": [],
+        "Sensitive Group": [],
         "Compared Feature": [],
+        "Group 1": [],
+        "Group 2": [],
         "Group 1 Percentage": [],
         "Group 2 Percentage": [],
-    }  # type: ignore
-    for sens_feature in sensitive_features:  # TODO: Restrict to categorical features (wait for other PR)
-        for comp_feature in adata.var_names:  # TODO: Restrict to categorical features (wait for other PR)
+    }
+    for sens_feature in categorical_sensitive_features:  # TODO: Restrict to categorical features (wait for other PR)
+        for comp_feature in categorical_var_names:  # TODO: Restrict to categorical features (wait for other PR)
             if sens_feature == comp_feature:
                 continue
             value_counts = adata_df.groupby([sens_feature, comp_feature]).size().unstack(fill_value=0)
             value_counts = value_counts.div(value_counts.sum(axis=1), axis=0)
 
             for sens_group in value_counts.index:
-                for comp_group1, comp_group2 in itertools.combinations(value_counts.columns, 2):
+                for comp_group1, comp_group2 in itertools.combinations(
+                    value_counts.columns, 2
+                ):  # TODO: Comp. more efficient
+                    value_count_diff = (
+                        value_counts.loc[sens_group, comp_group1] - value_counts.loc[sens_group, comp_group2]
+                    )
                     if (
-                        value_counts.loc[sens_group, comp_group1] / value_counts.loc[sens_group, comp_group2]
-                        > categorical_factor_threshold
+                        value_count_diff > categorical_factor_threshold
+                        or value_count_diff < 1 / categorical_factor_threshold
                     ):
                         cat_value_count_results["Sensitive Feature"].append(sens_feature)
+                        cat_value_count_results["Sensitive Group"].append(sens_group)
                         cat_value_count_results["Compared Feature"].append(comp_feature)
+                        cat_value_count_results["Group 1"].append(comp_group1)
+                        cat_value_count_results["Group 2"].append(comp_group2)
                         cat_value_count_results["Group 1 Percentage"].append(value_counts.loc[sens_group, comp_group1])
                         cat_value_count_results["Group 2 Percentage"].append(value_counts.loc[sens_group, comp_group2])
     bias_results["categorical_value_counts"] = pd.DataFrame(cat_value_count_results)
 
     # Feature importances
     if run_feature_importances:
-        feature_importances_results = {
+        feature_importances_results: dict[str, list] = {
             "Sensitive Feature": [],
             "Predicted Feature": [],
             "Feature Importance": [],
             "Prediction Score": [],
-        }  # type: ignore
+        }
         for prediction_feature in adata.var_names:
             prediction_score = rank_features_supervised(
                 adata,
@@ -151,7 +187,7 @@ def detect_bias(
                 return_score=True,
             )
 
-            for sens_feature in sensitive_features:
+            for sens_feature in sens_features_list:
                 if prediction_feature == sens_feature:
                     continue
                 feature_importance = adata.var[f"{prediction_feature}_feature_importances"][sens_feature] / 100
diff --git a/tests/preprocessing/test_bias.py b/tests/preprocessing/test_bias.py
new file mode 100644
index 00000000..c8801137
--- /dev/null
+++ b/tests/preprocessing/test_bias.py
@@ -0,0 +1,95 @@
+import numpy as np
+import pandas as pd
+import pytest
+
+import ehrapy as ep
+
+
+@pytest.fixture
+def adata():
+    corr = np.random.randint(0, 100, 100)
+    df = pd.DataFrame(
+        {
+            "corr1": corr,
+            "corr2": corr * 2,
+            "corr3": corr * -1,
+            "continuous1": np.random.randint(0, 20, 50).tolist() + np.random.randint(20, 40, 50).tolist(),
+            "cat1": [0] * 50 + [1] * 50,
+            "cat2": [10] * 10 + [11] * 40 + [10] * 30 + [11] * 20,
+        }
+    )
+    adata = ep.ad.df_to_anndata(df)
+    adata.var["feature_type"] = ["continuous"] * 4 + [
+        "categorical"
+    ] * 2  # TODO: Adjust to use variable for name as specified in _constants
+    return adata
+
+
+def test_detect_bias_all_sens_features(adata):
+    results = ep.pp.detect_bias(adata, "all", run_feature_importances=True)
+
+    assert "feature_correlations" in results.keys()
+    feature_corrs = results["feature_correlations"]
+    assert len(feature_corrs) == 4
+    assert (
+        feature_corrs[(feature_corrs["Feature 1"] == "corr1") & (feature_corrs["Feature 2"] == "corr2")][
+            "Correlation Coefficient"
+        ].values[0]
+        == 1
+    )
+    assert (
+        feature_corrs[(feature_corrs["Feature 1"] == "corr1") & (feature_corrs["Feature 2"] == "corr3")][
+            "Correlation Coefficient"
+        ].values[0]
+        == -1
+    )
+    assert (
+        feature_corrs[(feature_corrs["Feature 1"] == "corr2") & (feature_corrs["Feature 2"] == "corr3")][
+            "Correlation Coefficient"
+        ].values[0]
+        == -1
+    )
+    assert (
+        feature_corrs[(feature_corrs["Feature 1"] == "continuous1") & (feature_corrs["Feature 2"] == "cat1")][
+            "Correlation Coefficient"
+        ].values[0]
+        > 0.5
+    )
+
+    assert "standardized_mean_differences" in results.keys()
+    results["standardized_mean_differences"]
+    # TODO
+
+    assert "categorical_value_counts" in results.keys()
+    cat_value_counts = results["categorical_value_counts"]
+    assert len(cat_value_counts) == 4
+    assert (
+        cat_value_counts[
+            (cat_value_counts["Sensitive Feature"] == "cat1") & (cat_value_counts["Sensitive Group"] == 0)
+        ]["Group 1 Percentage"].values[0]
+        == 0.2
+    )
+    assert (
+        cat_value_counts[
+            (cat_value_counts["Sensitive Feature"] == "cat1") & (cat_value_counts["Sensitive Group"] == 0)
+        ]["Group 2 Percentage"].values[0]
+        == 0.8
+    )
+
+    assert "feature_importances" in results.keys()
+    feat_importances = results["feature_importances"]
+    assert (
+        len(feat_importances) == 7
+    )  # 6 for the pairwise correlating features and one for continuous1, which predicts cat1
+    assert (
+        feat_importances[
+            (feat_importances["Sensitive Feature"] == "continuous1") & (feat_importances["Predicted Feature"] == "cat1")
+        ]["Feature Importance"].values[0]
+        == 1
+    )
+
+
+def test_detect_bias_specific_sens_features(adata):
+    ep.pp.detect_bias(adata, ["continuous1", "cat1"], run_feature_importances=True)
+
+    # TODO: Add actual tests

From 7d483a320689d78895a4388f381891b3775ddcc5 Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Tue, 16 Apr 2024 16:27:26 +0200
Subject: [PATCH 09/29] docs clarifications

---
 ehrapy/preprocessing/_bias.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index 10a00934..baaa904c 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -116,7 +116,7 @@ def detect_bias(
             smd_nparray[group_nr] = smd
 
             abs_smd = smd.abs()
-            for i, comp_feature in enumerate(adata.var_names):  # TODO: Restrict to continuous features???
+            for i, comp_feature in enumerate(adata.var_names):  # TODO: Restrict to continuous features
                 if sens_feature == comp_feature:
                     continue
                 if abs_smd[i] > smd_threshold:
@@ -126,7 +126,6 @@ def detect_bias(
                     smd_results["Standardized Mean Difference"] = smd[i]
 
         adata.varm[f"smd_{sens_feature}"] = smd_nparray.T  # TODO: Double check
-        # pd.DataFrame(smd_results).T[adata.var_names]  # TODO: Sollte ndarray sein
 
     bias_results["standardized_mean_differences"] = pd.DataFrame(smd_results)
 
@@ -150,7 +149,7 @@ def detect_bias(
             for sens_group in value_counts.index:
                 for comp_group1, comp_group2 in itertools.combinations(
                     value_counts.columns, 2
-                ):  # TODO: Comp. more efficient
+                ):  # TODO: Try to find computationally more efficient way
                     value_count_diff = (
                         value_counts.loc[sens_group, comp_group1] - value_counts.loc[sens_group, comp_group2]
                     )

From 138860bc45412c91ef9bed48209d761510bf270e Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Tue, 16 Apr 2024 19:16:50 +0200
Subject: [PATCH 10/29] Test improvements

---
 tests/preprocessing/test_bias.py | 64 ++++++++------------------------
 1 file changed, 15 insertions(+), 49 deletions(-)

diff --git a/tests/preprocessing/test_bias.py b/tests/preprocessing/test_bias.py
index c8801137..eab8a7c7 100644
--- a/tests/preprocessing/test_bias.py
+++ b/tests/preprocessing/test_bias.py
@@ -13,7 +13,7 @@ def adata():
             "corr1": corr,
             "corr2": corr * 2,
             "corr3": corr * -1,
-            "continuous1": np.random.randint(0, 20, 50).tolist() + np.random.randint(20, 40, 50).tolist(),
+            "contin1": np.random.randint(0, 20, 50).tolist() + np.random.randint(20, 40, 50).tolist(),
             "cat1": [0] * 50 + [1] * 50,
             "cat2": [10] * 10 + [11] * 40 + [10] * 30 + [11] * 20,
         }
@@ -29,67 +29,33 @@ def test_detect_bias_all_sens_features(adata):
     results = ep.pp.detect_bias(adata, "all", run_feature_importances=True)
 
     assert "feature_correlations" in results.keys()
-    feature_corrs = results["feature_correlations"]
-    assert len(feature_corrs) == 4
-    assert (
-        feature_corrs[(feature_corrs["Feature 1"] == "corr1") & (feature_corrs["Feature 2"] == "corr2")][
-            "Correlation Coefficient"
-        ].values[0]
-        == 1
-    )
-    assert (
-        feature_corrs[(feature_corrs["Feature 1"] == "corr1") & (feature_corrs["Feature 2"] == "corr3")][
-            "Correlation Coefficient"
-        ].values[0]
-        == -1
-    )
-    assert (
-        feature_corrs[(feature_corrs["Feature 1"] == "corr2") & (feature_corrs["Feature 2"] == "corr3")][
-            "Correlation Coefficient"
-        ].values[0]
-        == -1
-    )
-    assert (
-        feature_corrs[(feature_corrs["Feature 1"] == "continuous1") & (feature_corrs["Feature 2"] == "cat1")][
-            "Correlation Coefficient"
-        ].values[0]
-        > 0.5
-    )
+    df = results["feature_correlations"]
+    assert len(df) == 4
+    assert df[(df["Feature 1"] == "corr1") & (df["Feature 2"] == "corr2")]["Correlation Coefficient"].values[0] == 1
+    assert df[(df["Feature 1"] == "corr1") & (df["Feature 2"] == "corr3")]["Correlation Coefficient"].values[0] == -1
+    assert df[(df["Feature 1"] == "corr2") & (df["Feature 2"] == "corr3")]["Correlation Coefficient"].values[0] == -1
+    assert df[(df["Feature 1"] == "contin1") & (df["Feature 2"] == "cat1")]["Correlation Coefficient"].values[0] > 0.5
 
     assert "standardized_mean_differences" in results.keys()
     results["standardized_mean_differences"]
     # TODO
 
     assert "categorical_value_counts" in results.keys()
-    cat_value_counts = results["categorical_value_counts"]
-    assert len(cat_value_counts) == 4
-    assert (
-        cat_value_counts[
-            (cat_value_counts["Sensitive Feature"] == "cat1") & (cat_value_counts["Sensitive Group"] == 0)
-        ]["Group 1 Percentage"].values[0]
-        == 0.2
-    )
-    assert (
-        cat_value_counts[
-            (cat_value_counts["Sensitive Feature"] == "cat1") & (cat_value_counts["Sensitive Group"] == 0)
-        ]["Group 2 Percentage"].values[0]
-        == 0.8
-    )
+    df = results["categorical_value_counts"]
+    assert len(df) == 4
+    assert df[(df["Sensitive Feature"] == "cat1") & (df["Sensitive Group"] == 0)]["Group 1 Percentage"].values[0] == 0.2
+    assert df[(df["Sensitive Feature"] == "cat1") & (df["Sensitive Group"] == 0)]["Group 2 Percentage"].values[0] == 0.8
 
     assert "feature_importances" in results.keys()
-    feat_importances = results["feature_importances"]
-    assert (
-        len(feat_importances) == 7
-    )  # 6 for the pairwise correlating features and one for continuous1, which predicts cat1
+    df = results["feature_importances"]
+    assert len(df) == 7  # 6 for the pairwise correlating features and one for contin1, which predicts cat1
     assert (
-        feat_importances[
-            (feat_importances["Sensitive Feature"] == "continuous1") & (feat_importances["Predicted Feature"] == "cat1")
-        ]["Feature Importance"].values[0]
+        df[(df["Sensitive Feature"] == "contin1") & (df["Predicted Feature"] == "cat1")]["Feature Importance"].values[0]
         == 1
     )
 
 
 def test_detect_bias_specific_sens_features(adata):
-    ep.pp.detect_bias(adata, ["continuous1", "cat1"], run_feature_importances=True)
+    ep.pp.detect_bias(adata, ["contin1", "cat1"], run_feature_importances=True)
 
     # TODO: Add actual tests

From c0bdcb11506ffc51278660a95bb110ea106fb1f2 Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Thu, 25 Apr 2024 11:11:49 +0200
Subject: [PATCH 11/29] Incorporate feature type detection

---
 ehrapy/preprocessing/_bias.py    | 68 +++++++++++++++++++-------------
 tests/preprocessing/test_bias.py | 34 +++++++++-------
 2 files changed, 61 insertions(+), 41 deletions(-)

diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index baaa904c..80e9b420 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -6,9 +6,11 @@
 import pandas as pd
 from anndata import AnnData
 
-from ehrapy.anndata import anndata_to_df
+from ehrapy.anndata import anndata_to_df, check_feature_types
+from ehrapy.anndata._constants import CATEGORICAL_TAG, CONTINUOUS_TAG, DATE_TAG, FEATURE_TYPE_KEY
 
 
+@check_feature_types
 def detect_bias(
     adata: AnnData,
     sensitive_features: Iterable[str] | np.ndarray | Literal["all"],
@@ -60,27 +62,26 @@ def detect_bias(
 
     if sensitive_features == "all":
         sens_features_list = adata.var_names.values.tolist()
-        categorical_sensitive_features = adata.var_names.values[
-            adata.var["feature_type"] == "categorical"
-        ]  # TODO: Double-check that named correctly
+        cat_sens_features = adata.var_names.values[adata.var[FEATURE_TYPE_KEY] == CATEGORICAL_TAG]
     else:
         for feat in sensitive_features:
             if feat not in adata.var_names:
                 raise ValueError(f"Feature {feat} not found in adata.var.")
         sens_features_list = sensitive_features
-        categorical_sensitive_features = [
-            feat for feat in sensitive_features if adata.var["feature_type"][feat] == "categorical"
+        cat_sens_features = [
+            feat for feat in sensitive_features if adata.var[FEATURE_TYPE_KEY][feat] == CATEGORICAL_TAG
         ]
 
     adata_df = anndata_to_df(adata)
-    categorical_var_names = adata.var_names[adata.var["feature_type"] == "categorical"]
+    # categorical_var_names = adata.var_names[adata.var["feature_type"] == "categorical"]
 
+    # --------------------
     # Feature correlations
+    # --------------------
     correlations = adata_df.corr(method=corr_method)
     adata.varp["feature_correlations"] = correlations
-    print(type(correlations))
 
-    corr_results: dict[str, list] = {"Feature 1": [], "Feature 2": [], "Correlation Coefficient": []}
+    corr_results: dict[str, list] = {"Feature 1": [], "Feature 2": [], f"{corr_method.capitalize()} CC": []}
     if sensitive_features == "all":
         feature_tuples = list(itertools.combinations(sens_features_list, 2))
     else:
@@ -91,45 +92,55 @@ def detect_bias(
         if abs(correlations.loc[sens_feature, comp_feature]) > corr_threshold:
             corr_results["Feature 1"].append(sens_feature)
             corr_results["Feature 2"].append(comp_feature)
-            corr_results["Correlation Coefficient"].append(correlations.loc[sens_feature, comp_feature])
+            corr_results[f"{corr_method.capitalize()} CC"].append(correlations.loc[sens_feature, comp_feature])
     bias_results["feature_correlations"] = pd.DataFrame(corr_results)
 
+    # -----------------------------
     # Standardized mean differences
+    # -----------------------------
     smd_results: dict[str, list] = {
         "Sensitive Feature": [],
         "Compared Feature": [],
         "Group": [],
         "Standardized Mean Difference": [],
-    }  # type: ignore
-    for sens_feature in categorical_sensitive_features:  # TODO: Restrict to categorical features (wait for other PR)
-        alphabetic_groups = sorted(adata_df[sens_feature].unique())
-        smd_nparray = np.zeros((len(alphabetic_groups), len(adata.var_names)))
+    }
+    continuous_var_names = adata.var_names[adata.var[FEATURE_TYPE_KEY] == CONTINUOUS_TAG]
+    for sens_feature in cat_sens_features:
+        sens_feature_groups = sorted(adata_df[sens_feature].unique())
+        smd_nparray = np.zeros((len(sens_feature_groups), len(continuous_var_names)))
 
-        for group_nr, group in enumerate(alphabetic_groups):
-            group_mean = adata_df[adata_df[sens_feature] == group].mean()
-            group_std = adata_df[adata_df[sens_feature] == group].std()
+        for group_nr, group in enumerate(sens_feature_groups):
+            # Compute SMD for all continuous features between the sensitive group and all other observations
+            group_mean = adata_df[continuous_var_names][adata_df[sens_feature] == group].mean()
+            group_std = adata_df[continuous_var_names][adata_df[sens_feature] == group].std()
 
-            comparison_mean = adata_df[adata_df[sens_feature] != group].mean()
-            comparison_std = adata_df[adata_df[sens_feature] != group].std()
+            comparison_mean = adata_df[continuous_var_names][adata_df[sens_feature] != group].mean()
+            comparison_std = adata_df[continuous_var_names][adata_df[sens_feature] != group].std()
 
             smd = (group_mean - comparison_mean) / np.sqrt((group_std**2 + comparison_std**2) / 2)
             smd_nparray[group_nr] = smd
 
             abs_smd = smd.abs()
-            for i, comp_feature in enumerate(adata.var_names):  # TODO: Restrict to continuous features
-                if sens_feature == comp_feature:
-                    continue
-                if abs_smd[i] > smd_threshold:
+            for comp_feature_nr, comp_feature in enumerate(
+                [continuous_var_names]
+            ):  # TODO: Restrict to continuous features
+                # if sens_feature == comp_feature:
+                #   continue
+                if abs_smd[comp_feature_nr] > smd_threshold:
                     smd_results["Sensitive Feature"].append(sens_feature)
                     smd_results["Compared Feature"].append(comp_feature)
                     smd_results["Group"].append(group)
-                    smd_results["Standardized Mean Difference"] = smd[i]
+                    smd_results["Standardized Mean Difference"] = smd[comp_feature_nr]
 
-        adata.varm[f"smd_{sens_feature}"] = smd_nparray.T  # TODO: Double check
+        adata.uns[f"smd_{sens_feature}"] = (
+            smd_nparray.T
+        )  # TODO: Double check; also, this is not very informative without row names...
 
     bias_results["standardized_mean_differences"] = pd.DataFrame(smd_results)
 
+    # ------------------------
     # Categorical value counts
+    # ------------------------
     cat_value_count_results: dict[str, list] = {
         "Sensitive Feature": [],
         "Sensitive Group": [],
@@ -139,8 +150,9 @@ def detect_bias(
         "Group 1 Percentage": [],
         "Group 2 Percentage": [],
     }
-    for sens_feature in categorical_sensitive_features:  # TODO: Restrict to categorical features (wait for other PR)
-        for comp_feature in categorical_var_names:  # TODO: Restrict to categorical features (wait for other PR)
+    cat_var_names = adata.var_names[adata.var[FEATURE_TYPE_KEY] == CATEGORICAL_TAG]
+    for sens_feature in cat_sens_features:
+        for comp_feature in cat_var_names:
             if sens_feature == comp_feature:
                 continue
             value_counts = adata_df.groupby([sens_feature, comp_feature]).size().unstack(fill_value=0)
@@ -166,7 +178,9 @@ def detect_bias(
                         cat_value_count_results["Group 2 Percentage"].append(value_counts.loc[sens_group, comp_group2])
     bias_results["categorical_value_counts"] = pd.DataFrame(cat_value_count_results)
 
+    # -------------------
     # Feature importances
+    # -------------------
     if run_feature_importances:
         feature_importances_results: dict[str, list] = {
             "Sensitive Feature": [],
diff --git a/tests/preprocessing/test_bias.py b/tests/preprocessing/test_bias.py
index eab8a7c7..49c2f1a3 100644
--- a/tests/preprocessing/test_bias.py
+++ b/tests/preprocessing/test_bias.py
@@ -3,6 +3,7 @@
 import pytest
 
 import ehrapy as ep
+from ehrapy.anndata._constants import CATEGORICAL_TAG, CONTINUOUS_TAG, FEATURE_TYPE_KEY
 
 
 @pytest.fixture
@@ -19,25 +20,26 @@ def adata():
         }
     )
     adata = ep.ad.df_to_anndata(df)
-    adata.var["feature_type"] = ["continuous"] * 4 + [
-        "categorical"
-    ] * 2  # TODO: Adjust to use variable for name as specified in _constants
+    adata.var[FEATURE_TYPE_KEY] = [CONTINUOUS_TAG] * 4 + [CATEGORICAL_TAG] * 2
     return adata
 
 
 def test_detect_bias_all_sens_features(adata):
-    results = ep.pp.detect_bias(adata, "all", run_feature_importances=True)
+    results = ep.pp.detect_bias(
+        adata, "all", run_feature_importances=True, corr_method="spearman", feature_importance_threshold=0.4
+    )
 
     assert "feature_correlations" in results.keys()
     df = results["feature_correlations"]
     assert len(df) == 4
-    assert df[(df["Feature 1"] == "corr1") & (df["Feature 2"] == "corr2")]["Correlation Coefficient"].values[0] == 1
-    assert df[(df["Feature 1"] == "corr1") & (df["Feature 2"] == "corr3")]["Correlation Coefficient"].values[0] == -1
-    assert df[(df["Feature 1"] == "corr2") & (df["Feature 2"] == "corr3")]["Correlation Coefficient"].values[0] == -1
-    assert df[(df["Feature 1"] == "contin1") & (df["Feature 2"] == "cat1")]["Correlation Coefficient"].values[0] > 0.5
+    assert df[(df["Feature 1"] == "corr1") & (df["Feature 2"] == "corr2")]["Spearman CC"].values[0] == 1
+    assert df[(df["Feature 1"] == "corr1") & (df["Feature 2"] == "corr3")]["Spearman CC"].values[0] == -1
+    assert df[(df["Feature 1"] == "corr2") & (df["Feature 2"] == "corr3")]["Spearman CC"].values[0] == -1
+    assert df[(df["Feature 1"] == "contin1") & (df["Feature 2"] == "cat1")]["Spearman CC"].values[0] > 0.5
 
     assert "standardized_mean_differences" in results.keys()
-    results["standardized_mean_differences"]
+    df = results["standardized_mean_differences"]
+    print(df)
     # TODO
 
     assert "categorical_value_counts" in results.keys()
@@ -49,13 +51,17 @@ def test_detect_bias_all_sens_features(adata):
     assert "feature_importances" in results.keys()
     df = results["feature_importances"]
     assert len(df) == 7  # 6 for the pairwise correlating features and one for contin1, which predicts cat1
-    assert (
-        df[(df["Sensitive Feature"] == "contin1") & (df["Predicted Feature"] == "cat1")]["Feature Importance"].values[0]
-        == 1
-    )
 
 
 def test_detect_bias_specific_sens_features(adata):
-    ep.pp.detect_bias(adata, ["contin1", "cat1"], run_feature_importances=True)
+    results = ep.pp.detect_bias(
+        adata,
+        ["contin1", "cat1"],
+        run_feature_importances=True,
+        corr_method="spearman",
+        feature_importance_threshold=0.4,
+    )
 
+    assert "feature_correlations" in results.keys()
+    results["feature_correlations"]
     # TODO: Add actual tests

From 031808def002c6e84df7c08be6cd8fd3a98dc76a Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Thu, 25 Apr 2024 11:47:29 +0200
Subject: [PATCH 12/29] Finished tests

---
 ehrapy/preprocessing/_bias.py    |  8 ++---
 tests/preprocessing/test_bias.py | 52 +++++++++++++++++++++++++++++---
 2 files changed, 51 insertions(+), 9 deletions(-)

diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index 80e9b420..f24f385b 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -100,8 +100,8 @@ def detect_bias(
     # -----------------------------
     smd_results: dict[str, list] = {
         "Sensitive Feature": [],
+        "Sensitive Group": [],
         "Compared Feature": [],
-        "Group": [],
         "Standardized Mean Difference": [],
     }
     continuous_var_names = adata.var_names[adata.var[FEATURE_TYPE_KEY] == CONTINUOUS_TAG]
@@ -122,15 +122,15 @@ def detect_bias(
 
             abs_smd = smd.abs()
             for comp_feature_nr, comp_feature in enumerate(
-                [continuous_var_names]
+                continuous_var_names
             ):  # TODO: Restrict to continuous features
                 # if sens_feature == comp_feature:
                 #   continue
                 if abs_smd[comp_feature_nr] > smd_threshold:
                     smd_results["Sensitive Feature"].append(sens_feature)
+                    smd_results["Sensitive Group"].append(group)
                     smd_results["Compared Feature"].append(comp_feature)
-                    smd_results["Group"].append(group)
-                    smd_results["Standardized Mean Difference"] = smd[comp_feature_nr]
+                    smd_results["Standardized Mean Difference"].append(smd[comp_feature_nr])
 
         adata.uns[f"smd_{sens_feature}"] = (
             smd_nparray.T
diff --git a/tests/preprocessing/test_bias.py b/tests/preprocessing/test_bias.py
index 49c2f1a3..4bb610ee 100644
--- a/tests/preprocessing/test_bias.py
+++ b/tests/preprocessing/test_bias.py
@@ -39,8 +39,27 @@ def test_detect_bias_all_sens_features(adata):
 
     assert "standardized_mean_differences" in results.keys()
     df = results["standardized_mean_differences"]
-    print(df)
-    # TODO
+    assert len(df) == 4  # Both groups of cat1, cat2 respectively show a high SMD with contin1
+    assert (
+        df[(df["Sensitive Feature"] == "cat1") & (df["Sensitive Group"] == 0)]["Standardized Mean Difference"].values[0]
+        < -1
+    )
+    assert (
+        df[(df["Sensitive Feature"] == "cat1") & (df["Sensitive Group"] == 1)]["Standardized Mean Difference"].values[0]
+        > 1
+    )
+    assert (
+        df[(df["Sensitive Feature"] == "cat2") & (df["Sensitive Group"] == 10)]["Standardized Mean Difference"].values[
+            0
+        ]
+        > 1
+    )
+    assert (
+        df[(df["Sensitive Feature"] == "cat2") & (df["Sensitive Group"] == 11)]["Standardized Mean Difference"].values[
+            0
+        ]
+        < -1
+    )
 
     assert "categorical_value_counts" in results.keys()
     df = results["categorical_value_counts"]
@@ -50,7 +69,7 @@ def test_detect_bias_all_sens_features(adata):
 
     assert "feature_importances" in results.keys()
     df = results["feature_importances"]
-    assert len(df) == 7  # 6 for the pairwise correlating features and one for contin1, which predicts cat1
+    assert len(df) >= 7  # 6 for the pairwise correlating features and one/two for contin1, which predicts cat1
 
 
 def test_detect_bias_specific_sens_features(adata):
@@ -63,5 +82,28 @@ def test_detect_bias_specific_sens_features(adata):
     )
 
     assert "feature_correlations" in results.keys()
-    results["feature_correlations"]
-    # TODO: Add actual tests
+    df = results["feature_correlations"]
+    assert len(df) == 2  # cat1 & contin1 and contin1 & cat1
+    assert np.all(df["Spearman CC"] > 0.5)
+
+    assert "standardized_mean_differences" in results.keys()
+    df = results["standardized_mean_differences"]
+    assert len(df) == 2  # Both groups of cat1 show a high SMD with contin1
+    assert (
+        df[(df["Sensitive Feature"] == "cat1") & (df["Sensitive Group"] == 0)]["Standardized Mean Difference"].values[0]
+        < -1
+    )
+    assert (
+        df[(df["Sensitive Feature"] == "cat1") & (df["Sensitive Group"] == 1)]["Standardized Mean Difference"].values[0]
+        > 1
+    )
+
+    assert "categorical_value_counts" in results.keys()
+    df = results["categorical_value_counts"]
+    assert len(df) == 2
+    assert df[(df["Sensitive Feature"] == "cat1") & (df["Sensitive Group"] == 0)]["Group 1 Percentage"].values[0] == 0.2
+    assert df[(df["Sensitive Feature"] == "cat1") & (df["Sensitive Group"] == 0)]["Group 2 Percentage"].values[0] == 0.8
+
+    assert "feature_importances" in results.keys()
+    df = results["feature_importances"]
+    assert len(df) == 1  # contin1 predicts cat1

From a8633061618c841da96fdb810f23d50cabe384aa Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Thu, 25 Apr 2024 20:03:28 +0200
Subject: [PATCH 13/29] SMD improvements

---
 ehrapy/preprocessing/_bias.py | 38 ++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index f24f385b..bedd0350 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -52,6 +52,12 @@ def detect_bias(
         - "standardized_mean_differences": Standardized mean differences between groups of sensitive features that exceed the SMD threshold.
         - "feature_importances": Feature importances for predicting one feature with another that exceed the feature importance and prediction
             confidence thresholds.
+
+    Examples:
+        >>> import ehrapy as ep
+        >>> adata = ep.dt.mimic_2(encoded=True)
+        >>> ep.ad.infer_feature_types(adata, output=None)
+        >>> results_dict = ep.pp.detect_bias(adata, "all")
     """
     from ehrapy.tools import rank_features_supervised
 
@@ -73,7 +79,6 @@ def detect_bias(
         ]
 
     adata_df = anndata_to_df(adata)
-    # categorical_var_names = adata.var_names[adata.var["feature_type"] == "categorical"]
 
     # --------------------
     # Feature correlations
@@ -93,7 +98,9 @@ def detect_bias(
             corr_results["Feature 1"].append(sens_feature)
             corr_results["Feature 2"].append(comp_feature)
             corr_results[f"{corr_method.capitalize()} CC"].append(correlations.loc[sens_feature, comp_feature])
-    bias_results["feature_correlations"] = pd.DataFrame(corr_results)
+    bias_results["feature_correlations"] = pd.DataFrame(corr_results).sort_values(
+        by=f"{corr_method.capitalize()} CC", key=abs
+    )
 
     # -----------------------------
     # Standardized mean differences
@@ -107,9 +114,11 @@ def detect_bias(
     continuous_var_names = adata.var_names[adata.var[FEATURE_TYPE_KEY] == CONTINUOUS_TAG]
     for sens_feature in cat_sens_features:
         sens_feature_groups = sorted(adata_df[sens_feature].unique())
-        smd_nparray = np.zeros((len(sens_feature_groups), len(continuous_var_names)))
+        if len(sens_feature_groups) == 1:
+            continue
+        smd_df = pd.DataFrame(index=continuous_var_names, columns=sens_feature_groups)
 
-        for group_nr, group in enumerate(sens_feature_groups):
+        for _group_nr, group in enumerate(sens_feature_groups):
             # Compute SMD for all continuous features between the sensitive group and all other observations
             group_mean = adata_df[continuous_var_names][adata_df[sens_feature] == group].mean()
             group_std = adata_df[continuous_var_names][adata_df[sens_feature] == group].std()
@@ -118,25 +127,20 @@ def detect_bias(
             comparison_std = adata_df[continuous_var_names][adata_df[sens_feature] != group].std()
 
             smd = (group_mean - comparison_mean) / np.sqrt((group_std**2 + comparison_std**2) / 2)
-            smd_nparray[group_nr] = smd
+            smd_df[group] = smd
 
             abs_smd = smd.abs()
-            for comp_feature_nr, comp_feature in enumerate(
-                continuous_var_names
-            ):  # TODO: Restrict to continuous features
-                # if sens_feature == comp_feature:
-                #   continue
+            for comp_feature_nr, comp_feature in enumerate(continuous_var_names):
                 if abs_smd[comp_feature_nr] > smd_threshold:
                     smd_results["Sensitive Feature"].append(sens_feature)
                     smd_results["Sensitive Group"].append(group)
                     smd_results["Compared Feature"].append(comp_feature)
                     smd_results["Standardized Mean Difference"].append(smd[comp_feature_nr])
+        adata.uns[f"smd_{sens_feature}"] = smd_df
 
-        adata.uns[f"smd_{sens_feature}"] = (
-            smd_nparray.T
-        )  # TODO: Double check; also, this is not very informative without row names...
-
-    bias_results["standardized_mean_differences"] = pd.DataFrame(smd_results)
+    bias_results["standardized_mean_differences"] = pd.DataFrame(smd_results).sort_values(
+        by="Standardized Mean Difference", key=abs
+    )
 
     # ------------------------
     # Categorical value counts
@@ -159,9 +163,7 @@ def detect_bias(
             value_counts = value_counts.div(value_counts.sum(axis=1), axis=0)
 
             for sens_group in value_counts.index:
-                for comp_group1, comp_group2 in itertools.combinations(
-                    value_counts.columns, 2
-                ):  # TODO: Try to find computationally more efficient way
+                for comp_group1, comp_group2 in itertools.combinations(value_counts.columns, 2):
                     value_count_diff = (
                         value_counts.loc[sens_group, comp_group1] - value_counts.loc[sens_group, comp_group2]
                     )

From eea9772444841943a3f0f96514052b32d02dac5f Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Wed, 1 May 2024 10:39:22 +0200
Subject: [PATCH 14/29] Test fixes

---
 docs/usage/usage.md              |  1 +
 tests/preprocessing/test_bias.py | 38 ++++++++------------------------
 2 files changed, 10 insertions(+), 29 deletions(-)

diff --git a/docs/usage/usage.md b/docs/usage/usage.md
index 8992f594..b0be91f8 100644
--- a/docs/usage/usage.md
+++ b/docs/usage/usage.md
@@ -94,6 +94,7 @@ Other than tools, preprocessing steps usually don’t return an easily interpret
     preprocessing.qc_metrics
     preprocessing.qc_lab_measurements
     preprocessing.mcar_test
+    preprocessing.detect_bias
 ```
 
 ### Imputation
diff --git a/tests/preprocessing/test_bias.py b/tests/preprocessing/test_bias.py
index 4bb610ee..98442131 100644
--- a/tests/preprocessing/test_bias.py
+++ b/tests/preprocessing/test_bias.py
@@ -40,26 +40,11 @@ def test_detect_bias_all_sens_features(adata):
     assert "standardized_mean_differences" in results.keys()
     df = results["standardized_mean_differences"]
     assert len(df) == 4  # Both groups of cat1, cat2 respectively show a high SMD with contin1
-    assert (
-        df[(df["Sensitive Feature"] == "cat1") & (df["Sensitive Group"] == 0)]["Standardized Mean Difference"].values[0]
-        < -1
-    )
-    assert (
-        df[(df["Sensitive Feature"] == "cat1") & (df["Sensitive Group"] == 1)]["Standardized Mean Difference"].values[0]
-        > 1
-    )
-    assert (
-        df[(df["Sensitive Feature"] == "cat2") & (df["Sensitive Group"] == 10)]["Standardized Mean Difference"].values[
-            0
-        ]
-        > 1
-    )
-    assert (
-        df[(df["Sensitive Feature"] == "cat2") & (df["Sensitive Group"] == 11)]["Standardized Mean Difference"].values[
-            0
-        ]
-        < -1
-    )
+    smd_key = "Standardized Mean Difference"
+    assert df[(df["Sensitive Feature"] == "cat1") & (df["Sensitive Group"] == 0)][smd_key].values[0] < 0
+    assert df[(df["Sensitive Feature"] == "cat1") & (df["Sensitive Group"] == 1)][smd_key].values[0] > 0
+    assert df[(df["Sensitive Feature"] == "cat2") & (df["Sensitive Group"] == 10)][smd_key].values[0] > 0
+    assert df[(df["Sensitive Feature"] == "cat2") & (df["Sensitive Group"] == 11)][smd_key].values[0] < 0
 
     assert "categorical_value_counts" in results.keys()
     df = results["categorical_value_counts"]
@@ -89,14 +74,9 @@ def test_detect_bias_specific_sens_features(adata):
     assert "standardized_mean_differences" in results.keys()
     df = results["standardized_mean_differences"]
     assert len(df) == 2  # Both groups of cat1 show a high SMD with contin1
-    assert (
-        df[(df["Sensitive Feature"] == "cat1") & (df["Sensitive Group"] == 0)]["Standardized Mean Difference"].values[0]
-        < -1
-    )
-    assert (
-        df[(df["Sensitive Feature"] == "cat1") & (df["Sensitive Group"] == 1)]["Standardized Mean Difference"].values[0]
-        > 1
-    )
+    smd_key = "Standardized Mean Difference"
+    assert df[(df["Sensitive Feature"] == "cat1") & (df["Sensitive Group"] == 0)][smd_key].values[0] < -1
+    assert df[(df["Sensitive Feature"] == "cat1") & (df["Sensitive Group"] == 1)][smd_key].values[0] > 1
 
     assert "categorical_value_counts" in results.keys()
     df = results["categorical_value_counts"]
@@ -106,4 +86,4 @@ def test_detect_bias_specific_sens_features(adata):
 
     assert "feature_importances" in results.keys()
     df = results["feature_importances"]
-    assert len(df) == 1  # contin1 predicts cat1
+    assert len(df) == 2  # contin1 predicts cat1 and cat1 predicts contin1

From 5347c356430341a776f73210c5710cce53811485 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 1 May 2024 08:43:16 +0000
Subject: [PATCH 15/29] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 ehrapy/tools/feature_ranking/_feature_importances.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ehrapy/tools/feature_ranking/_feature_importances.py b/ehrapy/tools/feature_ranking/_feature_importances.py
index 3cef310f..caa38c83 100644
--- a/ehrapy/tools/feature_ranking/_feature_importances.py
+++ b/ehrapy/tools/feature_ranking/_feature_importances.py
@@ -138,9 +138,9 @@ def rank_features_supervised(
     evaluation_metric = "R2 score" if prediction_type == "continuous" else "accuracy"
 
     if logging:
-      logger.info(
-          f"Training completed. The model achieved an {evaluation_metric} of {score:.2f} on the test set, consisting of {len(y_test)} samples."
-      )
+        logger.info(
+            f"Training completed. The model achieved an {evaluation_metric} of {score:.2f} on the test set, consisting of {len(y_test)} samples."
+        )
 
     if model == "regression" or model == "svm":
         feature_importances = pd.Series(predictor.coef_.squeeze(), index=input_data.columns)

From f1f4b4dee5a3878f83468b7ecb71b0e18e9e5e75 Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Wed, 1 May 2024 10:48:41 +0200
Subject: [PATCH 16/29] Save SMD in uns subdict

---
 ehrapy/preprocessing/_bias.py                        | 3 ++-
 ehrapy/tools/feature_ranking/_feature_importances.py | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index bedd0350..966816b9 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -111,6 +111,7 @@ def detect_bias(
         "Compared Feature": [],
         "Standardized Mean Difference": [],
     }
+    adata.uns["smd"] = {}
     continuous_var_names = adata.var_names[adata.var[FEATURE_TYPE_KEY] == CONTINUOUS_TAG]
     for sens_feature in cat_sens_features:
         sens_feature_groups = sorted(adata_df[sens_feature].unique())
@@ -136,7 +137,7 @@ def detect_bias(
                     smd_results["Sensitive Group"].append(group)
                     smd_results["Compared Feature"].append(comp_feature)
                     smd_results["Standardized Mean Difference"].append(smd[comp_feature_nr])
-        adata.uns[f"smd_{sens_feature}"] = smd_df
+        adata.uns["smd"][sens_feature] = smd_df
 
     bias_results["standardized_mean_differences"] = pd.DataFrame(smd_results).sort_values(
         by="Standardized Mean Difference", key=abs
diff --git a/ehrapy/tools/feature_ranking/_feature_importances.py b/ehrapy/tools/feature_ranking/_feature_importances.py
index caa38c83..3c845d48 100644
--- a/ehrapy/tools/feature_ranking/_feature_importances.py
+++ b/ehrapy/tools/feature_ranking/_feature_importances.py
@@ -19,6 +19,7 @@
 def rank_features_supervised(
     adata: AnnData,
     predicted_feature: str,
+    *,
     model: Literal["regression", "svm", "rf"] = "rf",
     input_features: Iterable[str] | Literal["all"] = "all",
     layer: str | None = None,

From 6688bf6341149805b4b6193b10d87264a2ae2a99 Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Wed, 1 May 2024 14:30:13 +0200
Subject: [PATCH 17/29] Fix tests and silence test warnings

---
 ehrapy/preprocessing/_bias.py                           | 6 +++---
 tests/tools/feature_ranking/test_feature_importances.py | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index 966816b9..e38160f2 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -131,12 +131,12 @@ def detect_bias(
             smd_df[group] = smd
 
             abs_smd = smd.abs()
-            for comp_feature_nr, comp_feature in enumerate(continuous_var_names):
-                if abs_smd[comp_feature_nr] > smd_threshold:
+            for comp_feature in continuous_var_names:
+                if abs_smd[comp_feature] > smd_threshold:
                     smd_results["Sensitive Feature"].append(sens_feature)
                     smd_results["Sensitive Group"].append(group)
                     smd_results["Compared Feature"].append(comp_feature)
-                    smd_results["Standardized Mean Difference"].append(smd[comp_feature_nr])
+                    smd_results["Standardized Mean Difference"].append(smd[comp_feature])
         adata.uns["smd"][sens_feature] = smd_df
 
     bias_results["standardized_mean_differences"] = pd.DataFrame(smd_results).sort_values(
diff --git a/tests/tools/feature_ranking/test_feature_importances.py b/tests/tools/feature_ranking/test_feature_importances.py
index 244807c0..285ce091 100644
--- a/tests/tools/feature_ranking/test_feature_importances.py
+++ b/tests/tools/feature_ranking/test_feature_importances.py
@@ -16,7 +16,7 @@ def test_continuous_prediction():
     adata.var[FEATURE_TYPE_KEY] = [CONTINUOUS_TAG] * 3
 
     for model in ["regression", "svm", "rf"]:
-        rank_features_supervised(adata, "target", model, "all")
+        rank_features_supervised(adata, "target", model=model, input_features="all")
         assert "feature_importances" in adata.var
         assert adata.var["feature_importances"]["feature1"] > 0
         assert adata.var["feature_importances"]["feature2"] == 0
@@ -32,7 +32,7 @@ def test_categorical_prediction():
     adata.var[FEATURE_TYPE_KEY] = [CATEGORICAL_TAG] * 3
 
     for model in ["regression", "svm", "rf"]:
-        rank_features_supervised(adata, "target", model, "all")
+        rank_features_supervised(adata, "target", model=model, input_features="all")
         assert "feature_importances" in adata.var
         assert adata.var["feature_importances"]["feature1"] > 0
         assert adata.var["feature_importances"]["feature2"] == 0
@@ -47,7 +47,7 @@ def test_multiclass_prediction():
     adata.var_names = ["target", "feature1", "feature2"]
     adata.var[FEATURE_TYPE_KEY] = [CATEGORICAL_TAG] * 3
 
-    rank_features_supervised(adata, "target", "rf", "all")
+    rank_features_supervised(adata, "target", model="rf", input_features="all")
     assert "feature_importances" in adata.var
     assert adata.var["feature_importances"]["feature1"] > 0
     assert adata.var["feature_importances"]["feature2"] == 0
@@ -55,5 +55,5 @@ def test_multiclass_prediction():
 
     for invalid_model in ["regression", "svm"]:
         with pytest.raises(ValueError) as excinfo:
-            rank_features_supervised(adata, "target", invalid_model, "all")
+            rank_features_supervised(adata, "target", model=invalid_model, input_features="all")
         assert str(excinfo.value).startswith("Feature target has more than two categories.")

From 381b8b1c0c004724003aaccb1f1e866b80e413f9 Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Wed, 1 May 2024 14:52:23 +0200
Subject: [PATCH 18/29] Introduced copy parameter

---
 ehrapy/preprocessing/_bias.py    | 12 +++++++++++-
 tests/preprocessing/test_bias.py | 14 ++++++++++----
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index e38160f2..0b0c01a6 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -22,7 +22,8 @@ def detect_bias(
     feature_importance_threshold: float = 0.1,
     prediction_confidence_threshold: float = 0.5,
     corr_method: Literal["pearson", "spearman"] = "spearman",
-) -> dict[str, pd.DataFrame]:
+    copy: bool = False,
+) -> dict[str, pd.DataFrame] | tuple[dict[str, pd.DataFrame], AnnData]:
     """Detects biases in the data using feature correlations, standardized mean differences, and feature importances.
 
     Detects biases with respect to sensitive features, which can be either a specified subset of features or all features in adata.var.
@@ -45,6 +46,8 @@ def detect_bias(
         prediction_confidence_threshold: The threshold for the prediction confidence (R2 or accuracy) of a sensitive feature for predicting another
             feature to be considered of interest. Defaults to 0.5.
         corr_method: The correlation method to use. Choose between "pearson" and "spearman". Defaults to "spearman".
+        copy: If set to False, adata is updated in place. If set to True, the adata is copied and the results are stored in the copied adata, which
+            is then returned. Defaults to False.
 
     Returns:
         A dictionary containing the results of the bias detection. The keys are:
@@ -53,6 +56,8 @@ def detect_bias(
         - "feature_importances": Feature importances for predicting one feature with another that exceed the feature importance and prediction
             confidence thresholds.
 
+        If copy is set to True, the function returns a tuple with the results dictionary and the updated adata.
+
     Examples:
         >>> import ehrapy as ep
         >>> adata = ep.dt.mimic_2(encoded=True)
@@ -78,6 +83,9 @@ def detect_bias(
             feat for feat in sensitive_features if adata.var[FEATURE_TYPE_KEY][feat] == CATEGORICAL_TAG
         ]
 
+    if copy:
+        adata = adata.copy()
+
     adata_df = anndata_to_df(adata)
 
     # --------------------
@@ -217,4 +225,6 @@ def detect_bias(
                     feature_importances_results["Prediction Score"].append(prediction_score)
         bias_results["feature_importances"] = pd.DataFrame(feature_importances_results)
 
+    if copy:
+        return bias_results, adata
     return bias_results
diff --git a/tests/preprocessing/test_bias.py b/tests/preprocessing/test_bias.py
index 98442131..9052cbfe 100644
--- a/tests/preprocessing/test_bias.py
+++ b/tests/preprocessing/test_bias.py
@@ -24,7 +24,7 @@ def adata():
     return adata
 
 
-def test_detect_bias_all_sens_features(adata):
+def test_detect_bias_all_sensitive_features(adata):
     results = ep.pp.detect_bias(
         adata, "all", run_feature_importances=True, corr_method="spearman", feature_importance_threshold=0.4
     )
@@ -57,15 +57,20 @@ def test_detect_bias_all_sens_features(adata):
     assert len(df) >= 7  # 6 for the pairwise correlating features and one/two for contin1, which predicts cat1
 
 
-def test_detect_bias_specific_sens_features(adata):
-    results = ep.pp.detect_bias(
+def test_detect_bias_specified_sensitive_features(adata):
+    results, result_adata = ep.pp.detect_bias(
         adata,
         ["contin1", "cat1"],
         run_feature_importances=True,
         corr_method="spearman",
-        feature_importance_threshold=0.4,
+        feature_importance_threshold=0.5,
+        prediction_confidence_threshold=0.4,
+        copy=True,
     )
 
+    assert "smd" not in adata.uns.keys()
+    assert "smd" in result_adata.uns.keys()
+
     assert "feature_correlations" in results.keys()
     df = results["feature_correlations"]
     assert len(df) == 2  # cat1 & contin1 and contin1 & cat1
@@ -86,4 +91,5 @@ def test_detect_bias_specific_sens_features(adata):
 
     assert "feature_importances" in results.keys()
     df = results["feature_importances"]
+    print(df)
     assert len(df) == 2  # contin1 predicts cat1 and cat1 predicts contin1

From 3ff2c65280d0eb5f7540443ad77b6e7b1de782e6 Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Wed, 1 May 2024 15:28:34 +0200
Subject: [PATCH 19/29] Added encoding check

---
 ehrapy/preprocessing/_bias.py    |  6 ++++++
 tests/preprocessing/test_bias.py | 10 ++++++++++
 2 files changed, 16 insertions(+)

diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index 0b0c01a6..65e15948 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -88,6 +88,12 @@ def detect_bias(
 
     adata_df = anndata_to_df(adata)
 
+    for feature in adata.var_names:
+        if not np.all(adata_df[feature].dropna().apply(type).isin([int, float, complex])):
+            raise ValueError(
+                f"Feature {feature} is not encoded numerically. Please encode the data (ep.pp.encode) before running bias detection."
+            )
+
     # --------------------
     # Feature correlations
     # --------------------
diff --git a/tests/preprocessing/test_bias.py b/tests/preprocessing/test_bias.py
index 9052cbfe..4451f95d 100644
--- a/tests/preprocessing/test_bias.py
+++ b/tests/preprocessing/test_bias.py
@@ -93,3 +93,13 @@ def test_detect_bias_specified_sensitive_features(adata):
     df = results["feature_importances"]
     print(df)
     assert len(df) == 2  # contin1 predicts cat1 and cat1 predicts contin1
+
+
+def test_unencoded_data():
+    adata = ep.ad.df_to_anndata(
+        pd.DataFrame({"Unencoded": ["A", "B", "C", "D", "E", "F"], "Encoded": [1, 2, 3, 4, 5, 6]})
+    )
+    adata.var[FEATURE_TYPE_KEY] = [CATEGORICAL_TAG] * 2
+
+    with pytest.raises(ValueError):
+        ep.pp.detect_bias(adata, "all")

From bcfe3a437aa7d91ee16fb8d3181e81f9582667c2 Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Wed, 1 May 2024 15:34:55 +0200
Subject: [PATCH 20/29] Fixed sensitive_features dtype

---
 ehrapy/preprocessing/_bias.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index 65e15948..121b9853 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -13,7 +13,7 @@
 @check_feature_types
 def detect_bias(
     adata: AnnData,
-    sensitive_features: Iterable[str] | np.ndarray | Literal["all"],
+    sensitive_features: Iterable[str] | Literal["all"],
     *,
     run_feature_importances: bool | None = None,
     corr_threshold: float = 0.5,

From b11f5ea553381e1276c8c3d1da49b12b200aaad0 Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Wed, 1 May 2024 15:37:49 +0200
Subject: [PATCH 21/29] Feature importances return docstring

---
 ehrapy/tools/feature_ranking/_feature_importances.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ehrapy/tools/feature_ranking/_feature_importances.py b/ehrapy/tools/feature_ranking/_feature_importances.py
index 3c845d48..a2af2c61 100644
--- a/ehrapy/tools/feature_ranking/_feature_importances.py
+++ b/ehrapy/tools/feature_ranking/_feature_importances.py
@@ -54,6 +54,9 @@ def rank_features_supervised(
         return_score: Set to True to return the R2 score / the accuracy of the model. Defaults to False.
         **kwargs: Additional keyword arguments to pass to the model. See the documentation of the respective model in scikit-learn for details.
 
+    Returns:
+        If return_score is True, the R2 score / accuracy of the model on the test set. Otherwise, None.
+
     Examples:
         >>> import ehrapy as ep
         >>> adata = ep.dt.mimic_2(encoded=False)

From e1aaaaea56fc69641941cedfd60ff1b99314400b Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Thu, 2 May 2024 12:03:49 +0200
Subject: [PATCH 22/29] Improved docs explanations

---
 ehrapy/preprocessing/_bias.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index 121b9853..3de32aa0 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -27,9 +27,13 @@ def detect_bias(
     """Detects biases in the data using feature correlations, standardized mean differences, and feature importances.
 
     Detects biases with respect to sensitive features, which can be either a specified subset of features or all features in adata.var.
-    The method computes pairwise correlations between features, standardized mean differences between groups of sensitive features, and
-    feature importances for predicting one feature with another. The results are stored in adata.varp and adata.varm.
-    Values that exceed the specified thresholds are considered of interest and returned in the results.
+    The method detects biases by computing:
+    - pairwise correlations between features
+    - standardized mean differences for numeric features between groups of sensitive features
+    - value counts of categorical features between groups of sensitive features
+    - feature importances for predicting one feature with another.
+    Results of the computations are stored in var, varp, and uns of the adata object.
+    Values that exceed the specified thresholds are considered of interest and returned in the results dictionary.
 
     Args:
         adata: An annotated data matrix containing EHR data.
@@ -53,6 +57,8 @@ def detect_bias(
         A dictionary containing the results of the bias detection. The keys are:
         - "feature_correlations": Pairwise correlations between features that exceed the correlation threshold.
         - "standardized_mean_differences": Standardized mean differences between groups of sensitive features that exceed the SMD threshold.
+        - "categorical_value_counts": Value counts of categorical features between groups of sensitive features that exceed the categorical factor
+            threshold.
         - "feature_importances": Feature importances for predicting one feature with another that exceed the feature importance and prediction
             confidence thresholds.
 

From c1d3916266bbdbde25851a2d7f3be798b9a75f1f Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Thu, 2 May 2024 12:10:09 +0200
Subject: [PATCH 23/29] Sort feature importances results

---
 ehrapy/preprocessing/_bias.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index 3de32aa0..c45107a6 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -235,7 +235,9 @@ def detect_bias(
                     feature_importances_results["Predicted Feature"].append(prediction_feature)
                     feature_importances_results["Feature Importance"].append(feature_importance)
                     feature_importances_results["Prediction Score"].append(prediction_score)
-        bias_results["feature_importances"] = pd.DataFrame(feature_importances_results)
+        bias_results["feature_importances"] = pd.DataFrame(feature_importances_results).sort_values(
+            by="Feature Importance", key=abs
+        )
 
     if copy:
         return bias_results, adata

From f2d11f82cd8d1a7f5504e9a9727da49c0c910492 Mon Sep 17 00:00:00 2001
From: Lilly May <93096564+Lilly-May@users.noreply.github.com>
Date: Thu, 2 May 2024 16:03:39 +0200
Subject: [PATCH 24/29] Apply suggestions from code review

Co-authored-by: Lukas Heumos <lukas.heumos@posteo.net>
---
 ehrapy/preprocessing/_bias.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index c45107a6..e401e724 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -38,7 +38,6 @@ def detect_bias(
     Args:
         adata: An annotated data matrix containing EHR data.
         sensitive_features: Sensitive features to consider for bias detection. If set to "all", all features in adata.var will be considered.
-            If only a subset of features should be considered, provide as an iterable.
         run_feature_importances: Whether to run feature importances for detecting bias. If set to None, the function will run feature importances if
             sensitive_features is not set to "all", as this can be computationally expensive. Defaults to None.
         corr_threshold: The threshold for the correlation coefficient between two features to be considered of interest. Defaults to 0.5.
@@ -49,7 +48,7 @@ def detect_bias(
             of interest. Defaults to 0.1.
         prediction_confidence_threshold: The threshold for the prediction confidence (R2 or accuracy) of a sensitive feature for predicting another
             feature to be considered of interest. Defaults to 0.5.
-        corr_method: The correlation method to use. Choose between "pearson" and "spearman". Defaults to "spearman".
+        corr_method: The correlation method to use. Defaults to "spearman".
         copy: If set to False, adata is updated in place. If set to True, the adata is copied and the results are stored in the copied adata, which
             is then returned. Defaults to False.
 

From 2e8d630183e6744956d3474224b138e76b6b1c7b Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Thu, 2 May 2024 16:22:46 +0200
Subject: [PATCH 25/29] Review comments

---
 ehrapy/preprocessing/_bias.py                        | 10 +++++++---
 ehrapy/tools/feature_ranking/_feature_importances.py |  6 +++---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index e401e724..beaa22f9 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -22,16 +22,19 @@ def detect_bias(
     feature_importance_threshold: float = 0.1,
     prediction_confidence_threshold: float = 0.5,
     corr_method: Literal["pearson", "spearman"] = "spearman",
+    layer: str | None = None,
     copy: bool = False,
 ) -> dict[str, pd.DataFrame] | tuple[dict[str, pd.DataFrame], AnnData]:
     """Detects biases in the data using feature correlations, standardized mean differences, and feature importances.
 
     Detects biases with respect to sensitive features, which can be either a specified subset of features or all features in adata.var.
     The method detects biases by computing:
+
     - pairwise correlations between features
     - standardized mean differences for numeric features between groups of sensitive features
     - value counts of categorical features between groups of sensitive features
-    - feature importances for predicting one feature with another.
+    - feature importances for predicting one feature with another
+
     Results of the computations are stored in var, varp, and uns of the adata object.
     Values that exceed the specified thresholds are considered of interest and returned in the results dictionary.
 
@@ -49,6 +52,7 @@ def detect_bias(
         prediction_confidence_threshold: The threshold for the prediction confidence (R2 or accuracy) of a sensitive feature for predicting another
             feature to be considered of interest. Defaults to 0.5.
         corr_method: The correlation method to use. Defaults to "spearman".
+        layer: The layer in adata.layers to use for computation. If None, adata.X will be used. Defaults to None.
         copy: If set to False, adata is updated in place. If set to True, the adata is copied and the results are stored in the copied adata, which
             is then returned. Defaults to False.
 
@@ -91,7 +95,7 @@ def detect_bias(
     if copy:
         adata = adata.copy()
 
-    adata_df = anndata_to_df(adata)
+    adata_df = anndata_to_df(adata, layer=layer)
 
     for feature in adata.var_names:
         if not np.all(adata_df[feature].dropna().apply(type).isin([int, float, complex])):
@@ -218,7 +222,7 @@ def detect_bias(
                 model="rf",
                 key_added=f"{prediction_feature}_feature_importances",
                 percent_output=True,
-                logging=False,
+                verbose=False,
                 return_score=True,
             )
 
diff --git a/ehrapy/tools/feature_ranking/_feature_importances.py b/ehrapy/tools/feature_ranking/_feature_importances.py
index a2af2c61..5183bb24 100644
--- a/ehrapy/tools/feature_ranking/_feature_importances.py
+++ b/ehrapy/tools/feature_ranking/_feature_importances.py
@@ -27,7 +27,7 @@ def rank_features_supervised(
     key_added: str = "feature_importances",
     feature_scaling: Literal["standard", "minmax"] | None = "standard",
     percent_output: bool = False,
-    logging: bool = True,
+    verbose: bool = True,
     return_score: bool = False,
     **kwargs,
 ) -> float | None:
@@ -50,7 +50,7 @@ def rank_features_supervised(
             for each feature individually. Defaults to 'standard'.
         percent_output: Set to True to output the feature importances as percentages. Note that information about positive or negative
             coefficients for regression models will be lost. Defaults to False.
-        logging: Set to False to disable logging. Defaults to True.
+        verbose: Set to False to disable logging. Defaults to True.
         return_score: Set to True to return the R2 score / the accuracy of the model. Defaults to False.
         **kwargs: Additional keyword arguments to pass to the model. See the documentation of the respective model in scikit-learn for details.
 
@@ -141,7 +141,7 @@ def rank_features_supervised(
     score = predictor.score(x_test, y_test)
     evaluation_metric = "R2 score" if prediction_type == "continuous" else "accuracy"
 
-    if logging:
+    if verbose:
         logger.info(
             f"Training completed. The model achieved an {evaluation_metric} of {score:.2f} on the test set, consisting of {len(y_test)} samples."
         )

From 9d8b74e1caaa7a4da831565bab60eb7d2274c7e0 Mon Sep 17 00:00:00 2001
From: eroell <eljas.roellin@ikmail.com>
Date: Fri, 3 May 2024 18:44:37 +0200
Subject: [PATCH 26/29] doc formating

---
 ehrapy/preprocessing/_bias.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index beaa22f9..6d22bbb8 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -57,13 +57,14 @@ def detect_bias(
             is then returned. Defaults to False.
 
     Returns:
-        A dictionary containing the results of the bias detection. The keys are:
+        A dictionary containing the results of the bias detection. The keys are
+
         - "feature_correlations": Pairwise correlations between features that exceed the correlation threshold.
         - "standardized_mean_differences": Standardized mean differences between groups of sensitive features that exceed the SMD threshold.
         - "categorical_value_counts": Value counts of categorical features between groups of sensitive features that exceed the categorical factor
-            threshold.
+          threshold.
         - "feature_importances": Feature importances for predicting one feature with another that exceed the feature importance and prediction
-            confidence thresholds.
+          confidence thresholds.
 
         If copy is set to True, the function returns a tuple with the results dictionary and the updated adata.
 

From daef6064862ea2193cf54995360ffbf68d6c293d Mon Sep 17 00:00:00 2001
From: Lilly May <93096564+Lilly-May@users.noreply.github.com>
Date: Sat, 4 May 2024 11:43:17 +0200
Subject: [PATCH 27/29] Apply suggestions from code review

Co-authored-by: Eljas Roellin <65244425+eroell@users.noreply.github.com>
---
 ehrapy/preprocessing/_bias.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index 6d22bbb8..40e98f63 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -216,7 +216,8 @@ def detect_bias(
             "Prediction Score": [],
         }
         for prediction_feature in adata.var_names:
-            prediction_score = rank_features_supervised(
+            try:
+                prediction_score = rank_features_supervised(
                 adata,
                 prediction_feature,
                 input_features="all",
@@ -226,7 +227,11 @@ def detect_bias(
                 verbose=False,
                 return_score=True,
             )
-
+            except ValueError as e:
+                if "Input y contains NaN" in str(e):
+                    raise ValueError(f"During feature importance computation, input feature y ({prediction_feature}) was found to contain NaNs.")
+                else: raise e
+                
             for sens_feature in sens_features_list:
                 if prediction_feature == sens_feature:
                     continue

From c22ee85ae3161d773a571a9ad3934955a27792ee Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 4 May 2024 09:43:34 +0000
Subject: [PATCH 28/29] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 ehrapy/preprocessing/_bias.py | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index 40e98f63..2e30b849 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -218,20 +218,23 @@ def detect_bias(
         for prediction_feature in adata.var_names:
             try:
                 prediction_score = rank_features_supervised(
-                adata,
-                prediction_feature,
-                input_features="all",
-                model="rf",
-                key_added=f"{prediction_feature}_feature_importances",
-                percent_output=True,
-                verbose=False,
-                return_score=True,
-            )
+                    adata,
+                    prediction_feature,
+                    input_features="all",
+                    model="rf",
+                    key_added=f"{prediction_feature}_feature_importances",
+                    percent_output=True,
+                    verbose=False,
+                    return_score=True,
+                )
             except ValueError as e:
                 if "Input y contains NaN" in str(e):
-                    raise ValueError(f"During feature importance computation, input feature y ({prediction_feature}) was found to contain NaNs.")
-                else: raise e
-                
+                    raise ValueError(
+                        f"During feature importance computation, input feature y ({prediction_feature}) was found to contain NaNs."
+                    )
+                else:
+                    raise e
+
             for sens_feature in sens_features_list:
                 if prediction_feature == sens_feature:
                     continue

From 5ec7f8a0a6bc102edbd79ebb17f332b2f6240aa6 Mon Sep 17 00:00:00 2001
From: Lilly <lilly.may@tum.de>
Date: Sat, 4 May 2024 11:50:34 +0200
Subject: [PATCH 29/29] Fixed error raising

---
 ehrapy/preprocessing/_bias.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ehrapy/preprocessing/_bias.py b/ehrapy/preprocessing/_bias.py
index 2e30b849..577308b5 100644
--- a/ehrapy/preprocessing/_bias.py
+++ b/ehrapy/preprocessing/_bias.py
@@ -231,7 +231,7 @@ def detect_bias(
                 if "Input y contains NaN" in str(e):
                     raise ValueError(
                         f"During feature importance computation, input feature y ({prediction_feature}) was found to contain NaNs."
-                    )
+                    ) from e
                 else:
                     raise e