From 5edbbfc9957d98baa2a0c03b96c28104b1027b82 Mon Sep 17 00:00:00 2001 From: littlecabiria <71769896+littlecabiria@users.noreply.github.com> Date: Wed, 10 Jul 2024 19:54:03 +1000 Subject: [PATCH] upload SRTsim --- src/methods/SRTsim/config.vsh.yaml | 29 +++++++++++ src/methods/SRTsim/script.R | 78 ++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 src/methods/SRTsim/config.vsh.yaml create mode 100644 src/methods/SRTsim/script.R diff --git a/src/methods/SRTsim/config.vsh.yaml b/src/methods/SRTsim/config.vsh.yaml new file mode 100644 index 00000000..525fd395 --- /dev/null +++ b/src/methods/SRTsim/config.vsh.yaml @@ -0,0 +1,29 @@ +__merge__: ../../api/comp_method.yaml + +name: splatter + +info: + label: Splatter + summary: A single cell RNA-seq data simulator based on a gamma-Poisson distribution. + description: | + The Splat model is a gamma-Poisson distribution used to generate a gene by cell matrix of counts. Mean expression levels for each gene are simulated from a gamma distribution and the Biological Coefficient of Variation is used to enforce a mean-variance trend before counts are simulated from a Poisson distribution. + reference: 10.1186/s13059-017-1305-0 + documentation_url: https://bioconductor.org/packages/devel/bioc/vignettes/splatter/inst/doc/splatter.html + repository_url: https://github.com/Oshlack/splatter + +resources: + - type: r_script + path: script.R + +engines: + - type: docker + image: ghcr.io/openproblems-bio/base_images/r:1.1.0 + setup: + - type: r + bioc: splatter + +runners: + - type: executable + - type: nextflow + directives: + label: [midtime, midmem, midcpu] diff --git a/src/methods/SRTsim/script.R b/src/methods/SRTsim/script.R new file mode 100644 index 00000000..ffc52bc5 --- /dev/null +++ b/src/methods/SRTsim/script.R @@ -0,0 +1,78 @@ +suppressMessages(library(SingleCellExperiment, quietly = TRUE)) +suppressMessages(library(splatter, quietly = TRUE)) + +## VIASH START +par <- list( + input = "resources_test/datasets/MOBNEW/dataset_sp.h5ad", + base = "domain" +) +meta <- list( + name = "splatter" +) +## VIASH END + +cat("Reading input files\n") +input <- anndata::read_h5ad(par$input) + +sce <- SingleCellExperiment( + list(counts = Matrix::t(input$layers[["counts"]])), + colData = input$obs +) + +cat("Splatter simulation start\n") + +set.seed(1) +if (par$base != "domain") { + stop("ONLY domain base") +} + +ordered_indices <- order(colData(sce)$spatial_cluster) +sce_ordered <- sce[, ordered_indices] + +simulated_result <- NULL +for (spatial_cluster in (unique(sce_ordered$spatial_cluster))) { + print(spatial_cluster) + res <- try({ + sce_spatial_cluster <- sce_ordered[, sce_ordered$spatial_cluster == spatial_cluster] + params <- splatter::splatEstimate(as.matrix(counts(sce_spatial_cluster))) + sim_spatial_cluster <- splatter::splatSimulate(params) + sim_spatial_cluster$spatial_cluster <- spatial_cluster + colnames(sim_spatial_cluster) <- paste0(spatial_cluster, colnames(sim_spatial_cluster)) + names(rowData(sim_spatial_cluster)) <- paste(spatial_cluster, names(rowData(sim_spatial_cluster))) + + # combine the cell types + if (is.null(simulated_result)) { + simulated_result <- sim_spatial_cluster + } else { + simulated_result <- SingleCellExperiment::cbind(simulated_result, sim_spatial_cluster) + } + }) +} + +colnames(simulated_result) <- colnames(sce_ordered) +rownames(simulated_result) <- rownames(sce_ordered) + +cat("Generating output\n") + +simulated_result_order <- sce_ordered +counts(simulated_result_order) <- counts(simulated_result) + +simulated_result_order <- simulated_result_order[, match(colnames(sce), colnames(simulated_result_order))] +simulated_result_order <- simulated_result_order[match(rownames(sce), rownames(simulated_result_order)), ] + +output <- anndata::AnnData( + layers = list( + counts = Matrix::t(counts(simulated_result_order)) + ), + obs = as.data.frame(simulated_result_order@colData), + var = input$var, + uns = c( + input$uns, + list( + method_id = meta$name + ) + ) +) + +cat("Write output files\n") +output$write_h5ad(par$output, compression = "gzip")