Skip to content

Commit

Permalink
Decoupler AUCell var improvement and optional gene sets (#309)
Browse files Browse the repository at this point in the history
* Var improvement and optional gene sets

* Fix python gene set list handling
  • Loading branch information
pcm32 authored Nov 16, 2023
1 parent c8c39f1 commit 56273bc
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 18 deletions.
17 changes: 11 additions & 6 deletions tools/tertiary-analysis/decoupler/decoupler_aucell_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,15 +109,19 @@ def run_for_genelists(
if __name__ == "__main__":
# Create command-line arguments parser
parser = argparse.ArgumentParser(description="Score genes using Aucell")
parser.add_argument("--input_file", type=str, help="Path to input AnnData file")
parser.add_argument("--output_file", type=str, help="Path to output file")
parser.add_argument(
"--input_file", type=str, help="Path to input AnnData file", required=True
)
parser.add_argument(
"--output_file", type=str, help="Path to output file", required=True
)
parser.add_argument("--gmt_file", type=str, help="Path to GMT file", required=False)
# add argument for gene sets to score
parser.add_argument(
"--gene_sets_to_score",
type=str,
required=False,
help="Comma separated list of gene sets to score (the need to be in the gmt file)",
help="Optional comma separated list of gene sets to score (the need to be in the gmt file)",
)
# add argument for gene list (comma separated) to score
parser.add_argument(
Expand All @@ -137,6 +141,7 @@ def run_for_genelists(
"--gene_symbols_field",
type=str,
help="Name of the gene symbols field in the AnnData object",
required=True,
)
parser.add_argument("--use_raw", action="store_true", help="Use raw data")
parser.add_argument(
Expand All @@ -149,15 +154,15 @@ def run_for_genelists(
# Load input AnnData object
adata = anndata.read_h5ad(args.input_file)

if args.gene_sets_to_score is not None and args.gmt_file is not None:
if args.gmt_file is not None:
# Load MSigDB file in GMT format
msigdb = read_gmt(args.gmt_file)

gene_sets_to_score = args.gene_sets_to_score.split(",")
gene_sets_to_score = args.gene_sets_to_score.split(",") if args.gene_sets_to_score else []
# Score genes by their ensembl ids using the score_genes_aucell function
for _, row in msigdb.iterrows():
gene_set_name = row["gene_set_name"]
if gene_set_name in gene_sets_to_score:
if not gene_sets_to_score or gene_set_name in gene_sets_to_score:
genes = row["genes"].split(",")
# Convert gene symbols to ensembl ids by using the columns gene_symbols and index in adata.var specific to the gene set
ens_gene_ids = adata.var[
Expand Down
41 changes: 29 additions & 12 deletions tools/tertiary-analysis/decoupler/decoupler_aucell_score.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<?xml version="1.0"?>
<tool id="score_genes_aucell" name="Decoupler AUCell" version="1.4.0+galaxy0" profile="20.05">
<tool id="score_genes_aucell" name="Decoupler AUCell" version="1.4.0+galaxy1" profile="20.05">
<description>
scores cells using the AUCell method for gene sets.
</description>
Expand All @@ -10,11 +10,13 @@
python '$__tool_directory__/decoupler_aucell_score.py'
--input_file '$input_file'
#if $gene_lists_source.source == "gmt"
--gmt_file '$gmt_file'
--gene_sets_to_score '$gene_sets_to_score'
--gmt_file '$gene_lists_source.gmt_file'
#if $gene_lists_source.gene_sets_to_score
--gene_sets_to_score '$gene_lists_source.gene_sets_to_score'
#end if
#else:
--gene_lists_to_score '$gene_lists_to_score'
--score_names '$score_names'
--gene_lists_to_score '$gene_lists_source.gene_lists_to_score'
--score_names '$gene_lists_source.score_names'
#end if
--gene_symbols_field '$gene_symbols_field'
$use_raw
Expand All @@ -34,7 +36,7 @@
</param>
<when value="gmt">
<param name="gmt_file" type="data" format="txt" label="GMT file with gene sets" />
<param name="gene_sets_to_score" type="text" label="Gene sets to score within the GMT file" />
<param name="gene_sets_to_score" type="text" optional="true" label="Gene sets to score within the GMT file" />
</when>
<when value="enumerated">
<param name="gene_lists_to_score" type="text" label="Genes to score" />
Expand All @@ -56,12 +58,27 @@
<tests>
<test expect_num_outputs="1">
<param name="input_file" value="mito_counted_anndata.h5ad"/>
<param name="gene_sets_to_score" value="HALLMARK_NOTCH_SIGNALING,HALLMARK_APICAL_SURFACE"/>
<param name="gmt_file" value="mouse_hallmark_ss.gmt"/>
<param name="gene_symbols_field" value="Symbol"/>
<param name="write_anndata" value="true"/>
<conditional name="gene_lists_source">
<param name="gene_sets_to_score" value="HALLMARK_NOTCH_SIGNALING,HALLMARK_APICAL_SURFACE"/>
<param name="gmt_file" value="mouse_hallmark_ss.gmt"/>
<param name="source" value="gmt"/>
</conditional>
<output name="output_ad">
<assert_contents>
<has_h5_keys keys="obs/AUCell_HALLMARK_NOTCH_SIGNALING"/>
<has_h5_keys keys="obs/AUCell_HALLMARK_APICAL_SURFACE"/>
</assert_contents>
</output>
</test>
<test expect_num_outputs="1">
<param name="input_file" value="mito_counted_anndata.h5ad"/>
<param name="gene_symbols_field" value="Symbol"/>
<param name="write_anndata" value="true"/>
<conditional name="gene_lists_source">
<param name="source" value="gmt"/>
<param name="gmt_file" value="mouse_hallmark_ss.gmt"/>
</conditional>
<output name="output_ad">
<assert_contents>
Expand All @@ -72,12 +89,12 @@
</test>
<test expect_num_outputs="1">
<param name="input_file" value="mito_counted_anndata.h5ad"/>
<param name="gene_lists_to_score" value="Cd8b1,Cd8b2,Cd8a,Cd4,Nrp1,Cd80:Il1a,Il1b,Il6,Nos2,Tlr2,Tlr4,Cd80"/>
<param name="score_names" value="TCell,Macro"/>
<param name="gene_symbols_field" value="Symbol"/>
<param name="write_anndata" value="true"/>
<conditional name="gene_lists_source">
<param name="source" value="enumerated"/>
<param name="gene_lists_to_score" value="Cd8b1,Cd8b2,Cd8a,Cd4,Nrp1,Cd80:Il1a,Il1b,Il6,Nos2,Tlr2,Tlr4,Cd80"/>
<param name="score_names" value="TCell,Macro"/>
</conditional>
<output name="output_ad">
<assert_contents>
Expand All @@ -88,12 +105,12 @@
</test>
<test expect_num_outputs="1">
<param name="input_file" value="mito_counted_anndata.h5ad"/>
<param name="gene_sets_to_score" value="HALLMARK_NOTCH_SIGNALING,HALLMARK_APICAL_SURFACE"/>
<param name="gmt_file" value="mouse_hallmark_ss.gmt"/>
<param name="gene_symbols_field" value="Symbol"/>
<param name="write_anndata" value="False"/>
<conditional name="gene_lists_source">
<param name="source" value="gmt"/>
<param name="gene_sets_to_score" value="HALLMARK_NOTCH_SIGNALING,HALLMARK_APICAL_SURFACE"/>
<param name="gmt_file" value="mouse_hallmark_ss.gmt"/>
</conditional>
<output name="output_table">
<assert_contents>
Expand Down

0 comments on commit 56273bc

Please sign in to comment.