From 4990a52ac6fd3d97434a24051e5828c86c8a802c Mon Sep 17 00:00:00 2001 From: Pablo Moreno Date: Tue, 20 Aug 2024 13:26:27 +0100 Subject: [PATCH] Adds PySCENIC wrappers (#328) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * pyscenic grn passing tests * Minor improvements to grn * Shed file for pyscenic * ctx passing tests (test files unavailable for CI though) * AUCell passing tests locally * Comestic changes * Apply suggestions from bgruening's code review Co-authored-by: Björn Grüning * Formatting and others * Test data, formatting, arboretum option * Fix tests * ctx missing documentation * Use macros * Boolean variables * Tool version macro and missing boolean * Apply suggestions from Bjeorn's code review Co-authored-by: Björn Grüning * Hopefully fixes test for ctx * Missing boolean changes --------- Co-authored-by: Björn Grüning --- tools/tertiary-analysis/pyscenic/.shed.yml | 21 +++ .../pyscenic/get_test_data.sh | 28 ++++ tools/tertiary-analysis/pyscenic/macros.xml | 15 ++ .../pyscenic/pyscenic_aucell.xml | 104 +++++++++++++ .../pyscenic/pyscenic_ctx.xml | 139 ++++++++++++++++++ .../pyscenic/pyscenic_grn.xml | 98 ++++++++++++ 6 files changed, 405 insertions(+) create mode 100644 tools/tertiary-analysis/pyscenic/.shed.yml create mode 100644 tools/tertiary-analysis/pyscenic/get_test_data.sh create mode 100644 tools/tertiary-analysis/pyscenic/macros.xml create mode 100644 tools/tertiary-analysis/pyscenic/pyscenic_aucell.xml create mode 100644 tools/tertiary-analysis/pyscenic/pyscenic_ctx.xml create mode 100644 tools/tertiary-analysis/pyscenic/pyscenic_grn.xml diff --git a/tools/tertiary-analysis/pyscenic/.shed.yml b/tools/tertiary-analysis/pyscenic/.shed.yml new file mode 100644 index 00000000..f4c05268 --- /dev/null +++ b/tools/tertiary-analysis/pyscenic/.shed.yml @@ -0,0 +1,21 @@ +categories: + - Transcriptomics + - RNA + - Sequence Analysis +description: "PySCENIC scripts based on usage at https://pyscenic.readthedocs.io/" +long_description: | + pySCENIC is a lightning-fast python implementation of the SCENIC pipeline (Single-Cell rEgulatory Network Inference and Clustering) + which enables biologists to infer transcription factors, gene regulatory networks and cell types from single-cell RNA-seq data. +name: suite_pyscenic +owner: ebi-gxa +remote_repository_url: https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ +type: unrestricted +auto_tool_repositories: + name_template: "{{ tool_id }}" + description_template: "Wrapper for the pySCENIC tool suite: {{ tool_name }}" +suite: + name: "suite_pyscenic" + description: "PySCENIC scripts based on usage at https://pyscenic.readthedocs.io/" + long_description: | + pySCENIC is a lightning-fast python implementation of the SCENIC pipeline (Single-Cell rEgulatory Network Inference and Clustering) + which enables biologists to infer transcription factors, gene regulatory networks and cell types from single-cell RNA-seq data. diff --git a/tools/tertiary-analysis/pyscenic/get_test_data.sh b/tools/tertiary-analysis/pyscenic/get_test_data.sh new file mode 100644 index 00000000..5cf84555 --- /dev/null +++ b/tools/tertiary-analysis/pyscenic/get_test_data.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash +TF_DATA_LINK='https://raw.githubusercontent.com/aertslab/scenic-nf/master/example/allTFs_hg38.txt' +MOTIF2TF_LINK='https://raw.githubusercontent.com/aertslab/scenic-nf/master/example/motifs.tbl' +RANKING_LINK='https://zenodo.org/records/13328724/files/genome-ranking_v2.feather' +LOOM_INPUT_LINK='https://raw.githubusercontent.com/aertslab/scenic-nf/master/example/expr_mat.loom' + +REGULONS_LINK='https://zenodo.org/records/13328724/files/regulons.tsv' +TF2TARGETS_LINK='https://zenodo.org/records/13328724/files/tf2targets.tsv' + +function get_data { + local link=$1 + local fname=$2 + + if [ ! -f $fname ]; then + echo "$fname not available locally, downloading.." + wget -O $fname --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 3 $link + fi +} + +# get matrix data +mkdir -p test-data +pushd test-data +get_data $TF_DATA_LINK "allTFs_hg38.txt" +get_data $MOTIF2TF_LINK "motifs.tbl" +get_data $RANKING_LINK "genome-ranking_v2.feather" +get_data $LOOM_INPUT_LINK "expr_mat.loom" +get_data $REGULONS_LINK regulons.tsv +get_data $TF2TARGETS_LINK tf2targets.tsv \ No newline at end of file diff --git a/tools/tertiary-analysis/pyscenic/macros.xml b/tools/tertiary-analysis/pyscenic/macros.xml new file mode 100644 index 00000000..1979d46e --- /dev/null +++ b/tools/tertiary-analysis/pyscenic/macros.xml @@ -0,0 +1,15 @@ + + 0.12.1 + + + + aertslab/pyscenic:@TOOL_VERSION@ + + + + + + 10.1038/nmeth.4463 + + + diff --git a/tools/tertiary-analysis/pyscenic/pyscenic_aucell.xml b/tools/tertiary-analysis/pyscenic/pyscenic_aucell.xml new file mode 100644 index 00000000..e9c079ba --- /dev/null +++ b/tools/tertiary-analysis/pyscenic/pyscenic_aucell.xml @@ -0,0 +1,104 @@ + + calculates AUCell to find relevant regulons/gene sets + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/tertiary-analysis/pyscenic/pyscenic_ctx.xml b/tools/tertiary-analysis/pyscenic/pyscenic_ctx.xml new file mode 100644 index 00000000..82abf371 --- /dev/null +++ b/tools/tertiary-analysis/pyscenic/pyscenic_ctx.xml @@ -0,0 +1,139 @@ + + + computes active regulons based on a gene regulatory network + + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tools/tertiary-analysis/pyscenic/pyscenic_grn.xml b/tools/tertiary-analysis/pyscenic/pyscenic_grn.xml new file mode 100644 index 00000000..35e84632 --- /dev/null +++ b/tools/tertiary-analysis/pyscenic/pyscenic_grn.xml @@ -0,0 +1,98 @@ + + infers gene regulatory networks + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +