From 0cd35fff43b338f02d3808031fbd7800a5dfeb8c Mon Sep 17 00:00:00 2001
From: zethson <lukas.heumos@posteo.net>
Date: Tue, 8 Jun 2021 17:42:06 +0200
Subject: [PATCH] add pseudobulk

Signed-off-by: zethson <lukas.heumos@posteo.net>
---
 sc_toolbox/api/calc/__init__.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/sc_toolbox/api/calc/__init__.py b/sc_toolbox/api/calc/__init__.py
index 33ecac0..9bfb75f 100644
--- a/sc_toolbox/api/calc/__init__.py
+++ b/sc_toolbox/api/calc/__init__.py
@@ -449,6 +449,35 @@ def extended_marker_table(
     return all_markers_df
 
 
+def generate_pseudobulk(adata: AnnData, group_key: str = "identifier", sep="\t", save: str = None) -> pd.DataFrame:
+    """
+    Generates a pseudobulk for a given key of groups in the AnnData object.
+    Looks like:
+      Genes    group_member_1    group_member_2
+    1 gene_1   value_1           value_2
+    2 gene_2   value_3           value_4
+
+    Args:
+        adata: AnnData object
+        group_key: The key to group by. E.g. by mice, by condition, ... (default: 'identifier')
+        sep: Separator to use when saving the pseudobulk table (default: '\t')
+        save: Path to save the pseudobulk table to (default: None)
+
+    Returns:
+        A Pandas DataFrame containing the pseudobulk table
+    """
+    pseudobulk = pd.DataFrame(data=adata.var_names.values, columns=["Genes"])
+
+    for i in adata.obs.loc[:, group_key].cat.categories:
+        temp = adata.obs.loc[:, group_key] == i
+        pseudobulk[i] = adata[temp].X.sum(0, dtype=int)  # column sums (genes)
+
+    if save:
+        pseudobulk.to_csv(save, sep=sep, index=False)
+
+    return pseudobulk
+
+
 def automated_marker_annotation(
     adata: AnnData,
     organism: str,