Skip to content

Commit

Permalink
Test data, formatting, arboretum option
Browse files Browse the repository at this point in the history
  • Loading branch information
pcm32 committed Aug 16, 2024
1 parent e075901 commit 2f2e46e
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 20 deletions.
14 changes: 8 additions & 6 deletions tools/tertiary-analysis/pyscenic/get_test_data.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
#!/usr/bin/env bash
TF_DATA_LINK='https://raw.githubusercontent.com/aertslab/scenic-nf/master/example/allTFs_hg38.txt'
MOTIF2TF_LINK='https://raw.githubusercontent.com/aertslab/scenic-nf/master/example/motifs.tbl'
RANKING_LINK='https://raw.githubusercontent.com/aertslab/scenic-nf/master/example/genome-ranking.feather'
RANKING_LINK='https://zenodo.org/records/13328724/files/genome-ranking_v2.feather'
LOOM_INPUT_LINK='https://raw.githubusercontent.com/aertslab/scenic-nf/master/example/expr_mat.loom'

REGULONS_LINK='https://zenodo.org/records/13328724/files/regulons.tsv'
TF2TARGETS_LINK='https://zenodo.org/records/13328724/files/tf2targets.tsv'

function get_data {
local link=$1
local fname=$2
Expand All @@ -18,9 +21,8 @@ function get_data {
mkdir -p test-data
pushd test-data
get_data $TF_DATA_LINK "allTFs_hg38.txt"
#unzip mtx.zip
#rm -f mtx.zip

get_data $MOTIF2TF_LINK "motifs.tbl"
get_data $RANKING_LINK "genome-ranking.feather"
get_data $LOOM_INPUT_LINK "expr_mat.loom"
get_data $RANKING_LINK "genome-ranking_v2.feather"
get_data $LOOM_INPUT_LINK "expr_mat.loom"
get_data $REGULONS_LINK regulons.tsv
get_data $TF2TARGETS_LINK tf2targets.tsv
2 changes: 1 addition & 1 deletion tools/tertiary-analysis/pyscenic/pyscenic_aucell.xml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
<param name="nes_threshold" type="float" label="NES Threshold" help="The Normalized Enrichment Score (NES) threshold for finding enriched features (default: 3.0)." optional="true"/>
<param name="cell_id_attribute" type="text" label="Cell ID Attribute" help="The name of the column attribute that specifies the identifiers of the cells in the loom file." optional="true"/>
<param name="gene_attribute" type="text" label="Gene Attribute" help="The name of the row attribute that specifies the gene symbols in the loom file." optional="true"/>
<param name="sparse" type="boolean" label="Sparse Matrix" truevalue="--sparse" falsevalue="" help="If set, load the expression data as a sparse matrix. Currently applies to the grn inference step only." optional="true"/>
<param name="sparse" type="boolean" label="Sparse Matrix" truevalue="--sparse" falsevalue="" help="If set, load the expression data as a sparse matrix. Currently applies to the grn inference step only."/>
</inputs>
<outputs>
<data name="output" format="tsv" label="${tool.name} on ${on_string}: AUCell scores for regulons or gene sets."/>
Expand Down
7 changes: 3 additions & 4 deletions tools/tertiary-analysis/pyscenic/pyscenic_ctx.xml
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,8 @@
#if $gene_attribute
--gene_attribute '${gene_attribute}'
#end if
#if $sparse
--sparse
#end if
$sparse
&&
mv regulons.tsv '${output}'
Expand All @@ -97,7 +96,7 @@ mv regulons.tsv '${output}'
<param type="boolean" name="mask_dropouts" label="Mask Dropouts" optional="true" help="Mask dropouts in the expression matrix."/>
<param type="text" name="cell_id_attribute" label="Cell ID Attribute" optional="true" help="The name of the attribute in the loom expression matrix that contains cell IDs."/>
<param type="text" name="gene_attribute" label="Gene Attribute" optional="true" help="The name of the attribute in the loom expression matrix that contains gene names."/>
<param type="boolean" name="sparse" label="Sparse" optional="true" help="Use a sparse matrix for the gene regulatory network inference step."/>
<param name="sparse" type="boolean" label="Sparse Matrix" truevalue="--sparse" falsevalue="" help="If set, load the expression data as a sparse matrix. Currently applies to the grn inference step only."/>
</inputs>
<outputs>
<data name="output" format="tabular" label="${tool.name} on ${on_string}: table of enriched motifs and target genes"/>
Expand Down
34 changes: 25 additions & 9 deletions tools/tertiary-analysis/pyscenic/pyscenic_grn.xml
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
<tool id="pyscenic_grn" name="PySCENIC GRN" version="0.12.1+galaxy0" profile="21.09">
<description>Gene regulatory network inference using PySCENIC</description>
<description>infers gene regulatory networks</description>
<requirements>
<container type="docker">
aertslab/pyscenic:0.12.1
</container>
</requirements>
<command detect_errors="exit_code">
<![CDATA[
ln -s '${expression_mtx}' expr_mat.loom &&
ln -s '${tfs_fname}' tfs.txt &&
pyscenic grn
ln -s '${expression_mtx}' expr_mat.loom &&
ln -s '${tfs_fname}' tfs.txt &&
#if $use_arboretum
arboreto_with_multiprocessing.py
#else
pyscenic grn
#end if
-o tf2targets.tsv
#if $transpose
-t
Expand All @@ -27,24 +31,24 @@
#if $gene_attribute
--gene_attribute '${gene_attribute}'
#end if
#if $sparse
--sparse
#end if
$sparse
expr_mat.loom tfs.txt &&
mv tf2targets.tsv '${tf2targets}'
]]>
</command>
<inputs>
<param name="expression_mtx" type="data" format="loom" label="Expression Matrix Loom File" help="In format rows=genes x columns=cells"/>
<param name="tfs_fname" type="data" format="txt" label="Transcription Factors File" help="Simple text file, one transcription factor symbol per line"/>
<param name="transpose" type="boolean" default_value="false" label="Transpose Expression Matrix"/>
<param name="use_arboretum" type="boolean" label="Use arboretum" checked="false" help="Uses the arboretum approach instead of pyscenic grn call, which can be better for multi processing"/>
<param name="transpose" type="boolean" default_value="false" label="Transpose Expression Matrix" help="Use this if the matrix is cell x genes instead of genes x cells as expected"/>
<param name="method" type="select" label="Method">
<option value="genie3">GENIE3</option>
<option value="grnboost2" selected="true">GRNBoost2</option>
</param>
<param name="cell_id_attribute" type="text" optional="true" label="Cell ID Attribute"/>
<param name="gene_attribute" type="text" optional="true" label="Gene Attribute"/>
<param name="sparse" type="boolean" optional="true" label="Load as Sparse Matrix"/>
<param name="sparse" type="boolean" label="Sparse Matrix" truevalue="--sparse" falsevalue="" help="If set, load the expression data as a sparse matrix. Currently applies to the grn inference step only."/>
<param name="seed" type="integer" optional="true" label="Seed"/>
</inputs>
<outputs>
Expand All @@ -62,6 +66,18 @@
</assert_contents>
</output>
</test>
<test expect_num_outputs="1">
<param name="expression_mtx" value="expr_mat.loom"/>
<param name="tfs_fname" value="allTFs_hg38.txt"/>
<param name="use_arboretum" value="true"/>
<param name="seed" value="1"/>
<output name="tf2targets" file="tf2targets_arboretum.tsv" compare="sim_size" delta_frac="0.2">
<!-- file="tf2targets.tsv" compare="sim_size" delta_frac="0.2"/> -->
<assert_contents>
<has_n_lines n="1006973"/>
</assert_contents>
</output>
</test>
</tests>
<help><![CDATA[
This tool runs the `pyscenic grn` command to infer gene regulatory networks.
Expand Down

0 comments on commit 2f2e46e

Please sign in to comment.