Skip to content

Commit

Permalink
minor code improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
sainirmayi committed Nov 20, 2024
1 parent 69eb17e commit 1d1ffcc
Show file tree
Hide file tree
Showing 10 changed files with 122 additions and 157 deletions.
10 changes: 5 additions & 5 deletions src/helpers/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ generate_moransI <- function(adata) {
return(res)
}


generate_cosine <- function(real, sim) {
requireNamespace("lsa", quietly = TRUE)

Expand Down Expand Up @@ -71,10 +72,9 @@ calculate_recall <- function(real_svg, sim_svg) {
return(recall)
}


# cell type deconvolution
CARD_processing <- function(sp_adata, sc_adata){
# sp_adata <- input_real_sp
# sc_adata <- input_sc
requireNamespace("MuSiC", quietly = TRUE)
requireNamespace("CARD", quietly = TRUE)
spatial_count <- Matrix::t(sp_adata$layers[["counts"]])
Expand Down Expand Up @@ -110,6 +110,7 @@ CARD_processing <- function(sp_adata, sc_adata){

}


generate_jds <- function(real, sim) {
common_row_names <- intersect(rownames(real), rownames(sim))
real_common <- real[common_row_names, , drop = FALSE]
Expand All @@ -122,6 +123,7 @@ generate_jds <- function(real, sim) {
return(average_jsd)
}


generate_rmse <- function(real, sim) {
common_row_names <- intersect(rownames(real), rownames(sim))
real_common <- real[common_row_names, , drop = FALSE]
Expand Down Expand Up @@ -172,6 +174,7 @@ reclassify_simsce <- function(location, real_cluster, sim_cluster){

}


# generate sparial clustering in simulated data
generate_sim_spatialCluster <- function(real_adata, sim_adata){
colnames(sim_adata$obs)[colnames(sim_adata$obs) == "col"] <- "array_col"
Expand All @@ -195,6 +198,3 @@ generate_sim_spatialCluster <- function(real_adata, sim_adata){
sim_cluster <- sim_sce$spatial.cluster
return(sim_cluster)
}



7 changes: 1 addition & 6 deletions src/methods/scdesign2/script.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# requireNamespace("scDesign2", quietly = TRUE)

## VIASH START
par <- list(
input = "resources_test/spatialsimbench_mobnew/dataset_sp.h5ad",
Expand All @@ -20,7 +18,7 @@ if (par$base != "domain") {
}

cat("scDesign2 simulation start\n")
counts <- as.matrix(Matrix::t(input$layers[["counts"]]))
counts <- Matrix::t(input$layers[["counts"]])
colnames(counts) <- as.character(input$obs$spatial_cluster)
spatial_cluster_prop <- table(input$obs$spatial_cluster)

Expand All @@ -38,9 +36,6 @@ sim_out <- scDesign2::simulate_count_scDesign2(
cell_type_prop = prop.table(spatial_cluster_prop)
)

# colnames(sim_out) <- colnames(t(as.matrix(input$layers[["counts"]])))
#rownames(sim_out) <- rownames(t(as.matrix(input$layers[["counts"]])))

rownames(sim_out) <- input$var_names
colnames(sim_out) <- input$obs_names

Expand Down
76 changes: 36 additions & 40 deletions src/methods/sparsim/script.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,67 +14,64 @@ meta <- list(

find_cluster_indices <- function(cluster_column) {
unique_clusters <- sort(unique(cluster_column))
conditions <- list()

for (i in seq_along(unique_clusters)) {
cluster <- unique_clusters[i]
conditions <- lapply(unique_clusters, function(cluster) {
indices <- which(cluster_column == cluster)
range_name <- sprintf("cluster_%s_column_index", LETTERS[i])
assign(range_name, c(min(indices):max(indices)), envir = .GlobalEnv)
conditions[[range_name]] <- get(range_name)
}

seq(min(indices), max(indices))
})
names(conditions) <- sprintf("cluster_%s_column_index", LETTERS[seq_along(unique_clusters)])
return(conditions)
}

cat("Reading input files\n")
input <- anndata::read_h5ad(par$input)

sce <- SingleCellExperiment::SingleCellExperiment(
list(counts = Matrix::t(input$layers[["counts"]])),
colData = input$obs
)

cat("SPARsim simulation start\n")

ordered_indices <- order(SingleCellExperiment::colData(sce)$spatial_cluster)
sce_ordered <- sce[, ordered_indices]
cat("SPARSim simulation start\n")

if (par$base != "domain") {
stop("ONLY domain base")
stop("Error: Only 'domain' base is supported.")
}

count_matrix <- data.frame(as.matrix(SummarizedExperiment::assay(sce_ordered)))
sce_scran <- SingleCellExperiment::SingleCellExperiment(assays = list(counts = as.matrix(count_matrix)))
# Order by spatial cluster
ordered_indices <- order(input$obs$spatial_cluster)
input_ordered <- input[ordered_indices]

count_matrix <- as.matrix(Matrix::t(input_ordered$layers[["counts"]]))
sce_scran <- SingleCellExperiment::SingleCellExperiment(
assays = list(counts = count_matrix),
colData = input_ordered$obs
)
sce_scran <- scran::computeSumFactors(sce_scran, sizes = seq(20, 100, 5), positive = F)

if (any(sce_scran$sizeFactor <= 0)) {
threshold <- 1e-10
sce_scran$sizeFactor[sce_scran$sizeFactor <= 0] <- threshold
}
# Replace zero or negative size factors with threshold 1e-10
sce_scran$sizeFactor[sce_scran$sizeFactor <= 0] <- 1e-10

# Perform normalization
count_matrix_norm <- scater::normalizeCounts(sce_scran, log = FALSE)
count_matrix_conditions <- find_cluster_indices(sce_ordered@colData$spatial_cluster)

# Find cluster indices for conditions
count_matrix_conditions <- find_cluster_indices(input_ordered$obs[["spatial_cluster"]])

# Estimate SPARSim parameters
SPARSim_sim_param <- SPARSim::SPARSim_estimate_parameter_from_data(
raw_data = count_matrix,
norm_data = count_matrix_norm,
conditions = count_matrix_conditions
)

# Simulate new dataset
sim_result <- SPARSim::SPARSim_simulation(dataset_parameter = SPARSim_sim_param)
colnames(sim_result$count_matrix) <- gsub("\\.", "-", colnames(sim_result$count_matrix))
simulated_result_order <- sce_ordered
SummarizedExperiment::assays(simulated_result_order, withDimnames = FALSE) <- list(counts = sim_result$count_matrix)
simulated_result_order <- simulated_result_order[,match(colnames(sce), colnames(simulated_result_order))]
simulated_result_order <- simulated_result_order[match(rownames(sce), rownames(simulated_result_order)),]
new_obs <- as.data.frame(simulated_result_order@colData[c("row", "col")])

# Reorder simulated results
simulated_result_ordered <- sim_result$count_matrix[
match(rownames(sim_result$count_matrix), rownames(input_ordered$var)),
match(colnames(sim_result$count_matrix), rownames(input_ordered$obs))
]

cat("Generating output\n")
output <- anndata::AnnData(
layers = list(
counts = Matrix::t(SingleCellExperiment::counts(simulated_result_order))
),
obs = new_obs,
var = input$var,
layers = list(counts = t(simulated_result_ordered)),
obs = input_ordered$obs[c("row", "col")],
var = input_ordered$var,
uns = c(
input$uns,
list(
Expand All @@ -83,6 +80,5 @@ output <- anndata::AnnData(
)
)

cat("Write output files\n")
output$write_h5ad(par$output, compression = "gzip")

cat("Writing output files\n")
output$write_h5ad(par$output, compression = "gzip")
43 changes: 16 additions & 27 deletions src/methods/splatter/script.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ suppressMessages(library(splatter, quietly = TRUE))

## VIASH START
par <- list(
input = "resources_test/spatialsimbench_mobnew/MOBNEW.rds",
input = "resources_test/spatialsimbench_mobnew/dataset_sp.h5ad",
base = "domain"
)
meta <- list(
Expand All @@ -14,26 +14,20 @@ meta <- list(
cat("Reading input files\n")
input <- anndata::read_h5ad(par$input)

sce <- SingleCellExperiment(
list(counts = Matrix::t(input$layers[["counts"]])),
colData = input$obs
)

cat("Splatter simulation start\n")

if (par$base != "domain") {
stop("ONLY domain base")
}

ordered_indices <- order(colData(sce)$spatial_cluster)
sce_ordered <- sce[, ordered_indices]
ordered_indices <- order(input$obs$spatial_cluster)
input_ordered <- input[ordered_indices]

simulated_result <- NULL
for (spatial_cluster in (unique(sce_ordered$spatial_cluster))) {
print(spatial_cluster)
for (spatial_cluster in unique(input_ordered$obs[["spatial_cluster"]])) {
res <- try({
sce_spatial_cluster <- sce_ordered[, sce_ordered$spatial_cluster == spatial_cluster]
params <- splatter::splatEstimate(as.matrix(counts(sce_spatial_cluster)))
input_spatial_cluster <- input_ordered[input_ordered$obs[["spatial_cluster"]] == spatial_cluster]
params <- splatter::splatEstimate(as.matrix(t(input_spatial_cluster$layers[["counts"]])))
sim_spatial_cluster <- splatter::splatSimulate(params)
sim_spatial_cluster$spatial_cluster <- spatial_cluster
colnames(sim_spatial_cluster) <- paste0(spatial_cluster, colnames(sim_spatial_cluster))
Expand All @@ -48,24 +42,19 @@ for (spatial_cluster in (unique(sce_ordered$spatial_cluster))) {
})
}

colnames(simulated_result) <- colnames(sce_ordered)
rownames(simulated_result) <- rownames(sce_ordered)

cat("Generating output\n")

simulated_result_order <- sce_ordered
counts(simulated_result_order) <- counts(simulated_result)
colnames(simulated_result) <- rownames(input_ordered$obs)
rownames(simulated_result) <- rownames(input_ordered$var)

simulated_result_order <- simulated_result_order[, match(colnames(sce), colnames(simulated_result_order))]
simulated_result_order <- simulated_result_order[match(rownames(sce), rownames(simulated_result_order)), ]
new_obs <- as.data.frame(simulated_result_order@colData[c("row", "col")])
simulated_result_ordered <- counts(simulated_result)[
match(rownames(counts(simulated_result)), rownames(input_ordered$var)),
match(colnames(counts(simulated_result)), rownames(input_ordered$obs))
]

cat("Generating output\n")
output <- anndata::AnnData(
layers = list(
counts = Matrix::t(counts(simulated_result_order))
),
obs = new_obs,
var = input$var,
layers = list(counts = t(simulated_result_ordered)),
obs = input_ordered$obs[c("row", "col")],
var = input_ordered$var,
uns = c(
input$uns,
list(
Expand Down
51 changes: 21 additions & 30 deletions src/methods/symsim/script.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ suppressMessages(library(SymSim, quietly = TRUE))

## VIASH START
par <- list(
input = "resources_test/spatialsimbench_mobnew/MOBNEW.rds",
input = "resources_test/spatialsimbench_mobnew/dataset_sp.h5ad",
base = "domain"
)
meta <- list(
Expand All @@ -14,11 +14,6 @@ meta <- list(
cat("Reading input files\n")
input <- anndata::read_h5ad(par$input)

sce <- SingleCellExperiment(
list(counts = Matrix::t(input$layers[["counts"]])),
colData = input$obs
)

cat("SymSim simulation start\n")

if (par$base != "domain") {
Expand All @@ -28,28 +23,27 @@ if (par$base != "domain") {
simulated_result <- NULL
tech <- "UMI"

ordered_indices <- order(colData(sce)$spatial_cluster)
sce_ordered <- sce[, ordered_indices]
ordered_indices <- order(input$obs$spatial_cluster)
input_ordered <- input[ordered_indices]

for (thisSpatialCluster in (unique(sce_ordered$spatial_cluster))) {
for (thisSpatialCluster in unique(input_ordered$obs[["spatial_cluster"]])) {
res <- try({
# subset to one cell type
sce_thiscelltype <- sce_ordered[, sce_ordered$spatial_cluster == thisSpatialCluster]
input_thiscelltype <- input_ordered[input_ordered$obs[["spatial_cluster"]] == thisSpatialCluster]

# this is because if some genes are 0 , this will cause error in simulation
keep_feature <- rowSums(counts(sce_thiscelltype) > 0) > 0
sce_thiscelltype_f <- sce_thiscelltype[keep_feature, ]
keep_feature <- colSums(input_thiscelltype$layers[["counts"]] > 0) > 0
input_thiscelltype_f <- input_thiscelltype[, keep_feature]

best_matches_UMI <- BestMatchParams(
tech = "UMI",
counts = as.matrix(counts(sce_thiscelltype_f)),
counts = as.matrix(t(input_thiscelltype_f$layers[["counts"]])),
plotfilename = "best_params.umi.qqplot",
n_optimal = 1
)

sim_thiscelltype <- SimulateTrueCounts(
ncells_total = dim(sce_thiscelltype)[2],
ngenes = dim(sce_thiscelltype)[1],
ncells_total = dim(input_thiscelltype)[1],
ngenes = dim(input_thiscelltype)[2],
evf_type = "one.population",
randseed = 1,
Sigma = best_matches_UMI$Sigma[1],
Expand All @@ -60,7 +54,7 @@ for (thisSpatialCluster in (unique(sce_ordered$spatial_cluster))) {
mean_hge = best_matches_UMI$mean_hge[1]
)

gene_len <- sample(gene_len_pool, dim(sce_thiscelltype)[1], replace = FALSE)
gene_len <- sample(gene_len_pool, dim(input_thiscelltype)[2], replace = FALSE)
sim_thiscelltype <- True2ObservedCounts(
true_counts = sim_thiscelltype[[1]],
meta_cell = sim_thiscelltype[[3]],
Expand All @@ -73,7 +67,7 @@ for (thisSpatialCluster in (unique(sce_ordered$spatial_cluster))) {
)

# tidy up the names
sim_thiscelltype <- SingleCellExperiment(list(counts = sim_thiscelltype$counts))
sim_thiscelltype <- SingleCellExperiment(list(counts = as.matrix(sim_thiscelltype$counts)))
sim_thiscelltype$spatial_cluster <- thisSpatialCluster

# combine the cell types
Expand All @@ -86,24 +80,21 @@ for (thisSpatialCluster in (unique(sce_ordered$spatial_cluster))) {
})
}

colnames(simulated_result) <- colnames(sce_ordered)
rownames(simulated_result) <- rownames(sce_ordered)
colnames(simulated_result) <- rownames(input_ordered$obs)
rownames(simulated_result) <- rownames(input_ordered$var)

simulated_result_order <- sce_ordered
counts(simulated_result_order) <- counts(simulated_result)

simulated_result_order <- simulated_result_order[, match(colnames(sce), colnames(simulated_result_order))]
simulated_result_order <- simulated_result_order[match(rownames(sce), rownames(simulated_result_order)), ]
new_obs <- as.data.frame(simulated_result_order@colData[c("row", "col")])
simulated_result_ordered <- counts(simulated_result)[
match(rownames(counts(simulated_result)), rownames(input_ordered$var)),
match(colnames(counts(simulated_result)), rownames(input_ordered$obs))
]

cat("Generating output\n")

output <- anndata::AnnData(
layers = list(
counts = Matrix::t(counts(simulated_result_order))
counts = Matrix::t(simulated_result_ordered)
),
obs = new_obs,
var = input$var,
obs = input_ordered$obs[c("row", "col")],
var = input_ordered$var,
uns = c(
input$uns,
list(
Expand Down
Loading

0 comments on commit 1d1ffcc

Please sign in to comment.