From 9f7787cb488386336af988b63a9e7a76fc0aaa07 Mon Sep 17 00:00:00 2001
From: Robert Petryszak <info@datasome.co.uk>
Date: Wed, 18 Oct 2023 17:26:13 +0100
Subject: [PATCH] Broken the one method in cellsign.py into smaller methods,
 following linter suggestion

---
 cellphonedb/src/core/utils/cellsign.py | 113 +++++++++++++++++--------
 1 file changed, 76 insertions(+), 37 deletions(-)

diff --git a/cellphonedb/src/core/utils/cellsign.py b/cellphonedb/src/core/utils/cellsign.py
index b6eb4d0..76defba 100644
--- a/cellphonedb/src/core/utils/cellsign.py
+++ b/cellphonedb/src/core/utils/cellsign.py
@@ -3,6 +3,68 @@
 from typing import Tuple
 
 
+def get_active_cts_tf_for_interaction(
+    partner: str,
+    col_name: str,
+    id_cp_interaction: str,
+    receptor2tfs: dict,
+    active_tf2cell_types: dict,
+    active_interactions: pd.DataFrame
+) -> list:
+    ct_pair = col_name.split(separator)
+    if partner in receptor2tfs:
+        for tf in receptor2tfs[partner]:
+            if tf in active_tf2cell_types:
+                cts = active_tf2cell_types[tf]
+                active_cts_for_interaction = list(set(cts) & set(ct_pair))
+                partner_active = any(active_cts_for_interaction)
+                # Add interaction to active_interactions_deconvoluted only if partner is active
+                # and the interaction is relevant
+                if partner_active and active_interactions.at[id_cp_interaction, col_name] == 1:
+                    return active_cts_for_interaction, tf
+    return None, None
+
+
+def collect_active_interactions_across_cell_types(
+    ct_pair_cols: list,
+    significant_relevant_df: pd.DataFrame,
+    receptor2tfs: dict,
+    active_tf2cell_types: dict,
+    separator: str,
+    active_interactions_deconvoluted: list,
+    active_interactions: pd.DataFrame
+) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    for col_name in ct_pair_cols:
+        cols = ['id_cp_interaction', 'interacting_pair', 'partner_a', 'partner_b', 'gene_a', 'gene_b', col_name]
+        for row in significant_relevant_df[~significant_relevant_df[col_name].isna()][cols].values:
+            id_cp_interaction = row[0]
+            interacting_pair = row[1]
+            interaction_active = False
+            partner_pair = [row[4], row[5]]
+            for idx, partner in enumerate(partner_pair):
+                if not partner:
+                    # partner is a complex - pick its id from partner_a/b position
+                    partner = row[idx + 2].split(":")[1]
+
+                active_cts_for_interaction, tf = \
+                    get_active_cts_tf_for_interaction(
+                        partner, col_name, id_cp_interaction, receptor2tfs,
+                        active_tf2cell_types, active_interactions)
+
+                interaction_active = active_cts_for_interaction is not None
+                if interaction_active:
+                    for ct in active_cts_for_interaction:
+                        active_interactions_deconvoluted.append(
+                            [id_cp_interaction, interacting_pair, row[2], row[3], row[4], row[5], tf,
+                             col_name, ct])
+                    break
+            if not interaction_active:
+                # active_interactions already contains 1 for relevant interactions and 0 for non-relevant, hence
+                # the operation below is a no-op if the interactions was already irrelevant
+                active_interactions.at[id_cp_interaction, col_name] = 0
+    return active_interactions, active_interactions_deconvoluted
+
+
 def find_active_interactions(
         significant_relevant_df: pd.DataFrame,
         receptor2tfs: dict,
@@ -20,44 +82,21 @@ def find_active_interactions(
     # If active_interactions was copied from significant_means (in the case of statistical analysis), we want to make it
     # look the same as relevant_interactions (from DEG analysis). This is so that active_interactions can be handled in
     # the same way to find out if an interaction is relevant/significant before recording it as active:
-    # N.B. assumption: It's never the case in statistical analysis that all means are significant
-    is_significant_means = active_interactions[ct_pair_cols].apply(lambda x: np.isnan(x)).values.any()
-    if is_significant_means:
-        # Replace all non-nan values with 1, then all nan values with 0
-        for col in ct_pair_cols:
-            active_interactions[col] = active_interactions[col].apply(lambda x: 1 if not np.isnan(x) else 0)
-            active_interactions.astype({col: 'int'})
+    # Replace all non-nan values with 1, then all nan values with 0
+    for col in ct_pair_cols:
+        active_interactions[col] = active_interactions[col].apply(lambda x: 1 if not np.isnan(x) else 0)
+        active_interactions.astype({col: 'int'})
+
+    active_interactions, active_interactions_deconvoluted = \
+        collect_active_interactions_across_cell_types(
+            ct_pair_cols,
+            significant_relevant_df,
+            receptor2tfs,
+            active_tf2cell_types,
+            separator,
+            active_interactions_deconvoluted,
+            active_interactions)
 
-    for col_name in ct_pair_cols:
-        ct_pair = col_name.split(separator)
-        cols = ['id_cp_interaction', 'interacting_pair', 'partner_a', 'partner_b', 'gene_a', 'gene_b', col_name]
-        for row in significant_relevant_df[~significant_relevant_df[col_name].isna()][cols].values:
-            id_cp_interaction = row[0]
-            interacting_pair = row[1]
-            interaction_active = False
-            partner_pair = [row[4], row[5]]
-            for idx, partner in enumerate(partner_pair):
-                if not partner:
-                    # partner is a complex - pick its id from partner_a/b position
-                    partner = row[idx + 2].split(":")[1]
-                if partner in receptor2tfs:
-                    for tf in receptor2tfs[partner]:
-                        if tf in active_tf2cell_types:
-                            cts = active_tf2cell_types[tf]
-                            active_cts_for_interaction = list(set(cts) & set(ct_pair))
-                            interaction_active = any(active_cts_for_interaction)
-                            if interaction_active and active_interactions.at[id_cp_interaction, col_name] == 1:
-                                # Add interaction to active_interactions_deconvoluted only if it's active and relevant
-                                for ct in active_cts_for_interaction:
-                                    active_interactions_deconvoluted.append(
-                                        [id_cp_interaction, interacting_pair, row[2], row[3], row[4], row[5], tf,
-                                         col_name, ct])
-                if interaction_active:
-                    break
-            if not interaction_active:
-                # active_interactions already contains 1 for relevant interactions and 0 for non-relevant, hence
-                # either the operation below is a no-op if the interactions was already irrelevant
-                active_interactions.at[id_cp_interaction, col_name] = 0
     # Remove from active_interactions all interactions with 0 across all ct_pair_cols (such interaction are not active)
     active_interactions = active_interactions[
         active_interactions[ct_pair_cols].apply(lambda row: row.sum() > 0, axis=1)]