minor code improvements

openproblems-bio · Nov 20, 2024 · 1d1ffcc · 1d1ffcc
1 parent 69eb17e
commit 1d1ffcc
Show file tree

Hide file tree

Showing 10 changed files with 122 additions and 157 deletions.
diff --git a/src/helpers/utils.R b/src/helpers/utils.R
@@ -18,6 +18,7 @@ generate_moransI <- function(adata) {
   return(res)
 }
 
+
 generate_cosine <- function(real, sim) {
   requireNamespace("lsa", quietly = TRUE)
 
@@ -71,10 +72,9 @@ calculate_recall <- function(real_svg, sim_svg) {
   return(recall)
 }
 
+
 # cell type deconvolution
 CARD_processing <- function(sp_adata, sc_adata){
-  # sp_adata <- input_real_sp
-  # sc_adata <- input_sc
   requireNamespace("MuSiC", quietly = TRUE)
   requireNamespace("CARD", quietly = TRUE)
   spatial_count <- Matrix::t(sp_adata$layers[["counts"]])
@@ -110,6 +110,7 @@ CARD_processing <- function(sp_adata, sc_adata){
 
 }
 
+
 generate_jds <- function(real, sim) {
   common_row_names <- intersect(rownames(real), rownames(sim))
   real_common <- real[common_row_names, , drop = FALSE]
@@ -122,6 +123,7 @@ generate_jds <- function(real, sim) {
   return(average_jsd)
 }
 
+
 generate_rmse <- function(real, sim) {
   common_row_names <- intersect(rownames(real), rownames(sim))
   real_common <- real[common_row_names, , drop = FALSE]
@@ -172,6 +174,7 @@ reclassify_simsce <- function(location, real_cluster, sim_cluster){
 
 }
 
+
 # generate sparial clustering in simulated data
 generate_sim_spatialCluster <- function(real_adata, sim_adata){
   colnames(sim_adata$obs)[colnames(sim_adata$obs) == "col"] <- "array_col"
@@ -195,6 +198,3 @@ generate_sim_spatialCluster <- function(real_adata, sim_adata){
   sim_cluster <- sim_sce$spatial.cluster
   return(sim_cluster)
 }
-
-
-
diff --git a/src/methods/scdesign2/script.R b/src/methods/scdesign2/script.R
@@ -1,5 +1,3 @@
-# requireNamespace("scDesign2", quietly = TRUE)
-
 ## VIASH START
 par <- list(
   input = "resources_test/spatialsimbench_mobnew/dataset_sp.h5ad",
@@ -20,7 +18,7 @@ if (par$base != "domain") {
 }
 
 cat("scDesign2 simulation start\n")
-counts <- as.matrix(Matrix::t(input$layers[["counts"]]))
+counts <- Matrix::t(input$layers[["counts"]])
 colnames(counts) <- as.character(input$obs$spatial_cluster)
 spatial_cluster_prop <- table(input$obs$spatial_cluster)
 
@@ -38,9 +36,6 @@ sim_out <- scDesign2::simulate_count_scDesign2(
   cell_type_prop = prop.table(spatial_cluster_prop)
 )
 
-# colnames(sim_out) <- colnames(t(as.matrix(input$layers[["counts"]])))
-#rownames(sim_out) <- rownames(t(as.matrix(input$layers[["counts"]])))
-
 rownames(sim_out) <- input$var_names
 colnames(sim_out) <- input$obs_names
 

diff --git a/src/methods/sparsim/script.R b/src/methods/sparsim/script.R
@@ -14,67 +14,64 @@ meta <- list(
 
 find_cluster_indices <- function(cluster_column) {
   unique_clusters <- sort(unique(cluster_column))
-  conditions <- list()
-
-  for (i in seq_along(unique_clusters)) {
-    cluster <- unique_clusters[i]
+  conditions <- lapply(unique_clusters, function(cluster) {
     indices <- which(cluster_column == cluster)
-    range_name <- sprintf("cluster_%s_column_index", LETTERS[i])
-    assign(range_name, c(min(indices):max(indices)), envir = .GlobalEnv)
-    conditions[[range_name]] <- get(range_name)
-  }
-
+    seq(min(indices), max(indices))
+  })
+  names(conditions) <- sprintf("cluster_%s_column_index", LETTERS[seq_along(unique_clusters)])
   return(conditions)
 }
 
 cat("Reading input files\n")
 input <- anndata::read_h5ad(par$input)
 
-sce <- SingleCellExperiment::SingleCellExperiment(
-  list(counts = Matrix::t(input$layers[["counts"]])),
-  colData = input$obs
-)
-
-cat("SPARsim simulation start\n")
-
-ordered_indices <- order(SingleCellExperiment::colData(sce)$spatial_cluster)
-sce_ordered <- sce[, ordered_indices]
+cat("SPARSim simulation start\n")
 
 if (par$base != "domain") {
-  stop("ONLY domain base")
+  stop("Error: Only 'domain' base is supported.")
 }
 
-count_matrix <- data.frame(as.matrix(SummarizedExperiment::assay(sce_ordered)))
-sce_scran <- SingleCellExperiment::SingleCellExperiment(assays = list(counts = as.matrix(count_matrix)))
+# Order by spatial cluster
+ordered_indices <- order(input$obs$spatial_cluster)
+input_ordered <- input[ordered_indices]
+
+count_matrix <- as.matrix(Matrix::t(input_ordered$layers[["counts"]]))
+sce_scran <- SingleCellExperiment::SingleCellExperiment(
+  assays = list(counts = count_matrix),
+  colData = input_ordered$obs
+)
 sce_scran <- scran::computeSumFactors(sce_scran, sizes = seq(20, 100, 5), positive = F) 
 
-if (any(sce_scran$sizeFactor <= 0)) {
-  threshold <- 1e-10
-  sce_scran$sizeFactor[sce_scran$sizeFactor <= 0] <- threshold
-}
+# Replace zero or negative size factors with threshold 1e-10
+sce_scran$sizeFactor[sce_scran$sizeFactor <= 0] <- 1e-10
+
+# Perform normalization
 count_matrix_norm <- scater::normalizeCounts(sce_scran, log = FALSE)
-count_matrix_conditions <- find_cluster_indices(sce_ordered@colData$spatial_cluster)
+
+# Find cluster indices for conditions
+count_matrix_conditions <- find_cluster_indices(input_ordered$obs[["spatial_cluster"]])
+
+# Estimate SPARSim parameters
 SPARSim_sim_param <- SPARSim::SPARSim_estimate_parameter_from_data(
   raw_data = count_matrix,
   norm_data = count_matrix_norm,
   conditions = count_matrix_conditions
 )
 
+# Simulate new dataset
 sim_result <- SPARSim::SPARSim_simulation(dataset_parameter = SPARSim_sim_param)
-colnames(sim_result$count_matrix) <- gsub("\\.", "-", colnames(sim_result$count_matrix))
-simulated_result_order <- sce_ordered
-SummarizedExperiment::assays(simulated_result_order, withDimnames = FALSE) <- list(counts = sim_result$count_matrix)
-simulated_result_order <- simulated_result_order[,match(colnames(sce), colnames(simulated_result_order))]
-simulated_result_order <- simulated_result_order[match(rownames(sce), rownames(simulated_result_order)),]
-new_obs <- as.data.frame(simulated_result_order@colData[c("row", "col")])
+
+# Reorder simulated results
+simulated_result_ordered <- sim_result$count_matrix[
+  match(rownames(sim_result$count_matrix), rownames(input_ordered$var)),
+  match(colnames(sim_result$count_matrix), rownames(input_ordered$obs))
+]
 
 cat("Generating output\n")
 output <- anndata::AnnData(
-  layers = list(
-    counts = Matrix::t(SingleCellExperiment::counts(simulated_result_order))
-  ),
-  obs = new_obs,
-  var = input$var,
+  layers = list(counts = t(simulated_result_ordered)),
+  obs = input_ordered$obs[c("row", "col")],
+  var = input_ordered$var,
   uns = c(
     input$uns,
     list(
@@ -83,6 +80,5 @@ output <- anndata::AnnData(
   )
 )
 
-cat("Write output files\n")
-output$write_h5ad(par$output, compression = "gzip")
-
+cat("Writing output files\n")
+output$write_h5ad(par$output, compression = "gzip")
diff --git a/src/methods/splatter/script.R b/src/methods/splatter/script.R
@@ -3,7 +3,7 @@ suppressMessages(library(splatter, quietly = TRUE))
 
 ## VIASH START
 par <- list(
-  input = "resources_test/spatialsimbench_mobnew/MOBNEW.rds",
+  input = "resources_test/spatialsimbench_mobnew/dataset_sp.h5ad",
   base = "domain"
 )
 meta <- list(
@@ -14,26 +14,20 @@ meta <- list(
 cat("Reading input files\n")
 input <- anndata::read_h5ad(par$input)
 
-sce <- SingleCellExperiment(
-  list(counts = Matrix::t(input$layers[["counts"]])),
-  colData = input$obs
-)
-
 cat("Splatter simulation start\n")
 
 if (par$base != "domain") {
   stop("ONLY domain base")
 }
 
-ordered_indices <- order(colData(sce)$spatial_cluster)
-sce_ordered <- sce[, ordered_indices]
+ordered_indices <- order(input$obs$spatial_cluster)
+input_ordered <- input[ordered_indices]
 
 simulated_result <- NULL
-for (spatial_cluster in (unique(sce_ordered$spatial_cluster))) {
-  print(spatial_cluster)
+for (spatial_cluster in unique(input_ordered$obs[["spatial_cluster"]])) {
   res <- try({
-    sce_spatial_cluster <- sce_ordered[, sce_ordered$spatial_cluster == spatial_cluster]
-    params <- splatter::splatEstimate(as.matrix(counts(sce_spatial_cluster)))
+    input_spatial_cluster <- input_ordered[input_ordered$obs[["spatial_cluster"]] == spatial_cluster]
+    params <- splatter::splatEstimate(as.matrix(t(input_spatial_cluster$layers[["counts"]])))
     sim_spatial_cluster <- splatter::splatSimulate(params)
     sim_spatial_cluster$spatial_cluster <- spatial_cluster
     colnames(sim_spatial_cluster) <- paste0(spatial_cluster, colnames(sim_spatial_cluster))
@@ -48,24 +42,19 @@ for (spatial_cluster in (unique(sce_ordered$spatial_cluster))) {
   })
 }
 
-colnames(simulated_result) <- colnames(sce_ordered)
-rownames(simulated_result) <- rownames(sce_ordered)
-
-cat("Generating output\n")
-
-simulated_result_order <- sce_ordered
-counts(simulated_result_order) <- counts(simulated_result)
+colnames(simulated_result) <- rownames(input_ordered$obs)
+rownames(simulated_result) <- rownames(input_ordered$var)
 
-simulated_result_order <- simulated_result_order[, match(colnames(sce), colnames(simulated_result_order))]
-simulated_result_order <- simulated_result_order[match(rownames(sce), rownames(simulated_result_order)), ]
-new_obs <- as.data.frame(simulated_result_order@colData[c("row", "col")])
+simulated_result_ordered <- counts(simulated_result)[
+  match(rownames(counts(simulated_result)), rownames(input_ordered$var)),
+  match(colnames(counts(simulated_result)), rownames(input_ordered$obs))
+]
 
+cat("Generating output\n")
 output <- anndata::AnnData(
-  layers = list(
-    counts = Matrix::t(counts(simulated_result_order))
-  ),
-  obs = new_obs,
-  var = input$var,
+  layers = list(counts = t(simulated_result_ordered)),
+  obs = input_ordered$obs[c("row", "col")],
+  var = input_ordered$var,
   uns = c(
     input$uns,
     list(

diff --git a/src/methods/symsim/script.R b/src/methods/symsim/script.R
@@ -3,7 +3,7 @@ suppressMessages(library(SymSim, quietly = TRUE))
 
 ## VIASH START
 par <- list(
-  input = "resources_test/spatialsimbench_mobnew/MOBNEW.rds",
+  input = "resources_test/spatialsimbench_mobnew/dataset_sp.h5ad",
   base = "domain"
 )
 meta <- list(
@@ -14,11 +14,6 @@ meta <- list(
 cat("Reading input files\n")
 input <- anndata::read_h5ad(par$input)
 
-sce <- SingleCellExperiment(
-  list(counts = Matrix::t(input$layers[["counts"]])),
-  colData = input$obs
-)
-
 cat("SymSim simulation start\n")
 
 if (par$base != "domain") {
@@ -28,28 +23,27 @@ if (par$base != "domain") {
 simulated_result <- NULL
 tech <-  "UMI"
 
-ordered_indices <- order(colData(sce)$spatial_cluster)
-sce_ordered <- sce[, ordered_indices]
+ordered_indices <- order(input$obs$spatial_cluster)
+input_ordered <- input[ordered_indices]
 
-for (thisSpatialCluster in (unique(sce_ordered$spatial_cluster))) {
+for (thisSpatialCluster in unique(input_ordered$obs[["spatial_cluster"]])) {
   res <- try({
-    # subset to one cell type
-    sce_thiscelltype <- sce_ordered[, sce_ordered$spatial_cluster == thisSpatialCluster]
+    input_thiscelltype <- input_ordered[input_ordered$obs[["spatial_cluster"]] == thisSpatialCluster]
 
     # this is because if some genes are 0 , this will cause error in simulation
-    keep_feature <- rowSums(counts(sce_thiscelltype) > 0) > 0
-    sce_thiscelltype_f <- sce_thiscelltype[keep_feature, ]
+    keep_feature <- colSums(input_thiscelltype$layers[["counts"]] > 0) > 0
+    input_thiscelltype_f <- input_thiscelltype[, keep_feature]
 
     best_matches_UMI <- BestMatchParams(
       tech = "UMI",
-      counts = as.matrix(counts(sce_thiscelltype_f)),
+      counts = as.matrix(t(input_thiscelltype_f$layers[["counts"]])),
       plotfilename = "best_params.umi.qqplot",
       n_optimal = 1
     )
 
     sim_thiscelltype <- SimulateTrueCounts(
-      ncells_total =  dim(sce_thiscelltype)[2],
-      ngenes =  dim(sce_thiscelltype)[1],
+      ncells_total =  dim(input_thiscelltype)[1],
+      ngenes =  dim(input_thiscelltype)[2],
       evf_type = "one.population",
       randseed = 1,
       Sigma = best_matches_UMI$Sigma[1],
@@ -60,7 +54,7 @@ for (thisSpatialCluster in (unique(sce_ordered$spatial_cluster))) {
       mean_hge = best_matches_UMI$mean_hge[1]
     )
 
-    gene_len <- sample(gene_len_pool, dim(sce_thiscelltype)[1], replace = FALSE)
+    gene_len <- sample(gene_len_pool, dim(input_thiscelltype)[2], replace = FALSE)
     sim_thiscelltype <- True2ObservedCounts(
       true_counts = sim_thiscelltype[[1]],
       meta_cell = sim_thiscelltype[[3]],
@@ -73,7 +67,7 @@ for (thisSpatialCluster in (unique(sce_ordered$spatial_cluster))) {
     )
 
     # tidy up the names
-    sim_thiscelltype <- SingleCellExperiment(list(counts = sim_thiscelltype$counts))
+    sim_thiscelltype <- SingleCellExperiment(list(counts = as.matrix(sim_thiscelltype$counts)))
     sim_thiscelltype$spatial_cluster <- thisSpatialCluster
 
     # combine the cell types
@@ -86,24 +80,21 @@ for (thisSpatialCluster in (unique(sce_ordered$spatial_cluster))) {
   })
 }
 
-colnames(simulated_result) <- colnames(sce_ordered)
-rownames(simulated_result) <- rownames(sce_ordered)
+colnames(simulated_result) <- rownames(input_ordered$obs)
+rownames(simulated_result) <- rownames(input_ordered$var)
 
-simulated_result_order <- sce_ordered
-counts(simulated_result_order) <- counts(simulated_result)
-
-simulated_result_order <- simulated_result_order[, match(colnames(sce), colnames(simulated_result_order))]
-simulated_result_order <- simulated_result_order[match(rownames(sce), rownames(simulated_result_order)), ]
-new_obs <- as.data.frame(simulated_result_order@colData[c("row", "col")])
+simulated_result_ordered <- counts(simulated_result)[
+  match(rownames(counts(simulated_result)), rownames(input_ordered$var)),
+  match(colnames(counts(simulated_result)), rownames(input_ordered$obs))
+]
 
 cat("Generating output\n")
-
 output <- anndata::AnnData(
   layers = list(
-    counts = Matrix::t(counts(simulated_result_order))
+    counts = Matrix::t(simulated_result_ordered)
   ),
-  obs = new_obs,
-  var = input$var,
+  obs = input_ordered$obs[c("row", "col")],
+  var = input_ordered$var,
   uns = c(
     input$uns,
     list(