From 0cd35fff43b338f02d3808031fbd7800a5dfeb8c Mon Sep 17 00:00:00 2001 From: zethson Date: Tue, 8 Jun 2021 17:42:06 +0200 Subject: [PATCH] add pseudobulk Signed-off-by: zethson --- sc_toolbox/api/calc/__init__.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/sc_toolbox/api/calc/__init__.py b/sc_toolbox/api/calc/__init__.py index 33ecac0..9bfb75f 100644 --- a/sc_toolbox/api/calc/__init__.py +++ b/sc_toolbox/api/calc/__init__.py @@ -449,6 +449,35 @@ def extended_marker_table( return all_markers_df +def generate_pseudobulk(adata: AnnData, group_key: str = "identifier", sep="\t", save: str = None) -> pd.DataFrame: + """ + Generates a pseudobulk for a given key of groups in the AnnData object. + Looks like: + Genes group_member_1 group_member_2 + 1 gene_1 value_1 value_2 + 2 gene_2 value_3 value_4 + + Args: + adata: AnnData object + group_key: The key to group by. E.g. by mice, by condition, ... (default: 'identifier') + sep: Separator to use when saving the pseudobulk table (default: '\t') + save: Path to save the pseudobulk table to (default: None) + + Returns: + A Pandas DataFrame containing the pseudobulk table + """ + pseudobulk = pd.DataFrame(data=adata.var_names.values, columns=["Genes"]) + + for i in adata.obs.loc[:, group_key].cat.categories: + temp = adata.obs.loc[:, group_key] == i + pseudobulk[i] = adata[temp].X.sum(0, dtype=int) # column sums (genes) + + if save: + pseudobulk.to_csv(save, sep=sep, index=False) + + return pseudobulk + + def automated_marker_annotation( adata: AnnData, organism: str,