diff --git a/.Rbuildignore b/.Rbuildignore
new file mode 100644
index 0000000..91114bf
--- /dev/null
+++ b/.Rbuildignore
@@ -0,0 +1,2 @@
+^.*\.Rproj$
+^\.Rproj\.user$
diff --git a/.gitignore b/.gitignore
index d54dfbc..6020180 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@
 .nfs*
 *.png
 Rcheck.txt
+.Rproj.user
diff --git a/R/Color_by_Genes_Automatic.R b/R/Color_by_Genes_Automatic.R
new file mode 100644
index 0000000..d97aa01
--- /dev/null
+++ b/R/Color_by_Genes_Automatic.R
@@ -0,0 +1,233 @@
+# This code comes from NIDAP 'Color by Genes Automatic [scRNA-seq][CCBR]' code template
+# Template Manager https://nidap.nih.gov/workspace/vector/templates/ri.vector.main.template.d71ed4e6-a25d-4f66-a186-27c00a50a703
+# Documentation https://nidap.nih.gov/workspace/notepad/view/ri.notepad.main.notepad.8101be48-85a5-40ec-8fa8-9fe28e5d31cb
+
+#' @title Create gene expression annotated dimension plots based on user inputted table
+#' @description Returns a panel of dimension plots colored by individual gene expression
+#' @details Takes in a list of genes inputted by the user, displays gene expression information on tsne, umap, or pca
+#' 
+#' @param SO Seurat-class object
+#' @param samples_to_include List of samples to subset the data by
+#' @param samples_to_display List of samples to depict on dimension plot, samples not in the list would be colored gray in the background
+#' @param marker_list Table of marker genes for each celltype (column names of the table), append "_prot" or "_neg" for proteins or negative markers
+#' @param cells_of_interest Vector of celltypes from geneset_dataframe to screen for
+#' @param protein_presence Set to TRUE if there are CITE-seq markers in geneset_dataframe
+#' @param assay Name of the assay to extract gene expression data from
+#' @param reduction_type Choose among tsne, umap, and pca
+#' @param point_transparency Set to lower value for more see through points on dimension plot
+#' @param point_shape Change the shape of points for between visualization
+#' @param number_of_rows Set the number of rows to be displayed on the compiled gene expression dimension plot
+#' @param doCiteSeq Set to TRUE if using Cite-seq data
+#' 
+#' @import Seurat 
+#' @import tidyverse
+#' @import gridExtra
+#' @import ggpubr
+#' @import ggplot2
+#'   
+#' @export
+
+#' @return compiled dimension plots of markers, in the same layout as the user-inputted marker table
+
+color_by_genes <- function(SO, 
+                             samples_to_include,
+                             samples_to_display,
+                             marker_list,
+                             cells_of_interest,
+                             protein_presence = FALSE,
+                             assay = "SCT",
+                             reduction_type = "umap",
+                             point_transparency = 0.5,
+                             point_shape = 16,
+                             number_of_rows = 0,
+                             doCiteSeq = FALSE){
+    
+  ## --------- ##
+  ## Functions ##
+  ## --------- ##
+  
+  plotMarkers <- function(Markers){
+    if (is.na(Markers) == TRUE) {
+      g <- ggplot() + theme_void()
+      return(g)
+    } else {
+      Markers.mat=SO.sub[[assay]]@scale.data[Markers,]
+      Markers.quant=quantile(Markers.mat[Markers.mat>1],probs=c(.1,.5,.90))
+      Markers.mat[Markers.mat>Markers.quant[3]]=Markers.quant[3]
+      Markers.mat[Markers.mat<Markers.quant[1]]=0
+      
+      if (!(doCiteSeq)) {
+        if(reduction_type == "tsne"){
+          p1 <- DimPlot(SO.sub, reduction = "tsne", group.by = "ident")
+          clusmat=data.frame(umap1=p1$data$tSNE_1,umap2=p1$data$tSNE_2, Markers=Markers.mat, ident = as.factor(p1$data$ident))
+        }
+        else if(reduction_type == "umap"){
+          p1 <- DimPlot(SO.sub, reduction = "umap", group.by = "ident")
+          clusmat=data.frame(umap1=p1$data$UMAP_1,umap2=p1$data$UMAP_2, Markers=Markers.mat,ident = as.factor(p1$data$ident))
+        }
+        else{
+          p1 <- DimPlot(SO.sub, reduction = "pca", group.by = "ident")
+          clusmat=data.frame(umap1=p1$data$PC_1,umap2=p1$data$PC_2, Markers=Markers.mat,ident = as.factor(p1$data$ident))
+        } #if CITEseq is chosen then:
+      } else {
+        if(reduction_type == "tsne"){
+          p1 <- DimPlot(SO.sub, reduction = "protein_tsne", group.by = "ident")
+          clusmat=data.frame(umap1=p1$data$protein_tsne_1,umap2=p1$data$protein_tsne_2, Markers=Markers.mat,ident = as.factor(p1$data$ident))
+        }
+        else if(reduction_type == "umap"){
+          p1 <- DimPlot(SO.sub, reduction = "protein_umap", group.by = "ident")
+          clusmat=data.frame(umap1=p1$data$protein_umap_1,umap2=p1$data$protein_umap_2, Markers=Markers.mat,ident = as.factor(p1$data$ident))
+        }
+        else{
+          p1 <- DimPlot(SO.sub, reduction = "protein_pca", group.by = "ident")
+          clusmat=data.frame(umap1=p1$data$protein_pca_1,umap2=p1$data$protein_pca_2, Markers=Markers.mat,ident = as.factor(p1$data$ident))
+        }
+      }
+      
+      # Samples caption
+      samples_displayed_message <- paste(samples_to_display, sep = "", collapse = "\n")
+      final_caption <- paste("Samples Displayed: ", samples_displayed_message, sep = "", collapse = "\n")
+      
+      clusmat <- mutate(clusmat, sample_Markers = clusmat$Markers * grepl(paste(samples_to_display, collapse = "|"), clusmat$ident))
+      
+      clusmat %>% dplyr::arrange(sample_Markers) -> clusmat
+      if (grepl("_neg",Markers) == TRUE){
+        
+        clusmat %>% dplyr::arrange(desc(sample_Markers)) -> clusmat
+        g <- ggplot(clusmat, aes(x=umap1, y=umap2, group=ident)) +
+          theme_bw() +
+          theme(legend.title=element_blank()) +
+          ggtitle(Markers) +
+          geom_point(aes(color=sample_Markers, shape=ident),alpha=point_transparency,shape=point_shape, size=1) +
+          theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
+                panel.background = element_blank(),legend.text=element_text(size=rel(0.5)) )+
+          scale_color_gradient(limits = c(0, Markers.quant[3]),low = "lightgrey", high = "red") +
+          xlab("umap-1") + ylab("umap-2")
+        return(g)
+      } else {
+        clusmat %>% dplyr::arrange(sample_Markers) -> clusmat
+        g <- ggplot(clusmat, aes(x=umap1, y=umap2, group=ident)) +
+          theme_bw() +
+          theme(legend.title=element_blank()) +
+          ggtitle(Markers) +
+          geom_point(aes(color=sample_Markers, shape=ident),alpha=point_transparency,shape=point_shape, size=1) +
+          theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
+                panel.background = element_blank(),legend.text=element_text(size=rel(0.5)) )+
+          scale_color_gradient(limits = c(0, Markers.quant[3]),low = "lightgrey", high = "red") +
+          xlab("umap-1") + ylab("umap-2")
+        return(g)}
+    }
+  }
+  
+  ## --------------- ##
+  ## Main Code Block ##
+  ## --------------- ##
+  
+  if (length(samples_to_include) == 0) {
+    samples_to_include = unique(SO@meta.data$sample_name)
+  }
+  
+  if("active.ident" %in% slotNames(SO)){
+    sample_name = as.factor(SO@meta.data$orig.ident)
+    names(sample_name)=names(SO@active.ident)
+    SO@active.ident <- as.factor(vector())
+    SO@active.ident <- sample_name
+    SO.sub = subset(SO, ident = samples_to_include)
+  } else {
+    sample_name = as.factor(SO@meta.data$orig.ident)
+    names(sample_name)=names(SO@active.ident)
+    SO@active.ident <- as.factor(vector())
+    SO@active.ident <- sample_name
+    SO.sub = subset(SO, ident = samples_to_include)
+  }
+  
+  marker_list <- marker_list[cells_of_interest]
+  
+  # Remove columns with all missing values
+  Present_Markers_ls <- list()
+  
+  for (celltype in colnames(marker_list)) {
+    print(names(marker_list[celltype]))
+    present=lapply(marker_list[[celltype]], function(x) x %in% rownames(SO.sub$SCT@scale.data)) 
+    absentgenes = unlist(marker_list[[celltype]])[present==FALSE]
+    presentgenes = unlist(marker_list[[celltype]])[present==TRUE]
+    print(paste0("Genes not present: ",paste0(absentgenes,collapse=",")))
+    print(paste0("Genes present: ",paste0(presentgenes,collapse=",")))
+    
+    if(length(presentgenes) == 0){
+      print(paste0(names(marker_list[celltype]), " genes were not found in SO and will not be analyzed"))
+    } else {Present_Markers_ls[[celltype]] <- presentgenes}
+  }
+  
+  # Padd processed list containing only the present genes
+  padded_list <- lapply(Present_Markers_ls, `length<-`, max(lengths(Present_Markers_ls)))
+  Markers_from_list <- as.data.frame.matrix(do.call(cbind, padded_list))
+  
+  # Recognize any markers designated as proteins and insert protein expression data into slot where plots are created
+  if (protein_presence){
+    protein_markers <- Markers_from_list[grepl("_prot",Markers_from_list)]
+    
+    protein_orig_markers <- gsub("_prot.*","",protein_markers)
+    
+    protein_markers_name <- paste(protein_orig_markers,
+                                  "_prot", sep = "")
+    
+    i = 0
+    protein_array <- list()
+    for (i in seq_along(protein_orig_markers)){
+      protein_array[[i]] <- SO.sub@assays$Protein[protein_orig_markers[i],]
+      rownames(protein_array[[i]]) <- protein_markers_name[i]
+    }
+    protein_array_comp <- do.call(rbind,protein_array)
+    SO.sub@assays$SCT@scale.data <- rbind(SO.sub@assays$SCT@scale.data,protein_array_comp)
+  }
+  
+  # Add negative/low identifiers
+  neg_markers_names <- Markers_from_list[grepl("_neg",Markers_from_list)]
+  orig_markers <- gsub("_neg.*","",neg_markers_names)
+  
+  # Append neg_markers_names to rownames of SO.sub
+  neg_markers_list <- list()
+  
+  # Calculate adjusted expression for negative markers
+  for (i in seq_along(orig_markers)){
+    if (orig_markers[i] %in% rownames(SO.sub@assays$SCT@scale.data)){
+      # Format the data so that it can rbinded with SO$SCT@scale.data
+      neg_markers_list[[i]] <- t(matrix(max(SO.sub@assays$SCT@scale.data[orig_markers[i],]) - SO.sub@assays$SCT@scale.data[orig_markers[i],]))
+      row.names(neg_markers_list[[i]]) <- neg_markers_names[i]
+      colnames(neg_markers_list[[i]]) <- colnames(SO.sub@assays$SCT@scale.data)
+      
+      # Append new Negative/low marker (w Expression Count) to SO slot
+      SO.sub@assays$SCT@scale.data <- rbind(SO.sub@assays$SCT@scale.data, neg_markers_list[[i]]) 
+    } else {
+      print(paste(orig_markers[i], " is not found in Seurat, cannot calculate negative expression", sep = ""))
+    }} 
+  
+  Markers_present = unlist(Markers_from_list)
+
+  if(!length(Markers_present)>0){
+    print("No Markers found in dataset")
+    return(NULL)
+  }
+  
+  # Create list for storing color by gene plots of each celltype column
+  gg_storage <- list()
+  
+  for (cell in colnames(Markers_from_list)){
+    
+    title <- cell
+    
+    markers_to_analyze <- as.character(Markers_from_list[[cell]])
+    
+    grob <- lapply(markers_to_analyze,function(x) plotMarkers(x))
+    gg_storage[[cell]] <- gridExtra::arrangeGrob(grobs=grob,ncol = 1,newpage=F, as.table = F, top = text_grob(title,size = 15, face = "bold"))
+    
+  }
+  
+  #pdf("Color_by_Genes.pdf", 12, 15)
+  final_figures <- do.call(grid.arrange, c(gg_storage, ncol = ncol(Markers_from_list)))
+  #dev.off()
+  
+  return(final_figures)
+}
+  
\ No newline at end of file
diff --git a/R/Harmony_Batch_Correction.R b/R/Harmony_Batch_Correction.R
new file mode 100644
index 0000000..4961de6
--- /dev/null
+++ b/R/Harmony_Batch_Correction.R
@@ -0,0 +1,141 @@
+# This code comes from NIDAP 'Harmony Batch Correction [scRNA-seq][CCBR]' code template
+# Template Manager https://nidap.nih.gov/workspace/vector/templates/ri.vector.main.template.a097334e-21ac-466c-b467-c761b2c25f02
+# Documentation https://nidap.nih.gov/workspace/notepad/view/ri.notepad.main.notepad.facd9ed0-90fc-4c55-bbb8-0153f06858d4
+
+#' @title Performs batch correction on Seurat-class object
+#' @description Returns a Seurat-class object with adjusted cell embeddings and gene expression data to account for variations in sample collection
+#' @details Takes in a list of genes inputted by the user, displays gene expression information in particular slot-assay with (optional) outliers removed
+#' 
+#' @param seurat_object Seurat-class object
+#' @param variable_features Number of most variable genes to subset the gene expression data by
+#' @param genes_to_add Add genes that might not be found among variably expressed genes
+#' @param group.by.vars which variable should be accountted for when running batch correction
+
+#' @import Seurat 
+#' @import harmony
+#' @import gridExtra
+#' @import RColorBrewer
+#' @import ggplot2
+#'   
+#' @export
+
+#' @return Seurat-class Object with Harmony-adjusted gene expression (SCT slot) and tSNE cell embedding
+
+
+harmony_batch_correct <- function(so, 
+                                  variable_features = 2000, 
+                                  genes_to_add,
+                                  group.by.vars) {
+  
+  ## --------------- ##
+  ## Main Code Block ##
+  ## --------------- ##
+  
+  seur.SCT <- so@assays$SCT@scale.data
+  seur.SCT[1:10, 1:10]
+  
+  genes_of_interest <- genes_to_add[genes_to_add %in% rownames(so@assays$SCT@scale.data)]
+  
+  # Add more genes to analyze
+  VariableFeatures(so) <- c(VariableFeatures(so), genes_of_interest) 
+  mvf <- VariableFeatures(so)[1:(variable_features + length(genes_of_interest))] 
+  print(dim(so@assays$SCT@scale.data))
+  seur.SCT <- seur.SCT[mvf,]
+  seur.SCT <- t(seur.SCT) 
+  # need the original loadings and embeddings to compare with my SVD
+  seur.loads <- so@reductions$pca@feature.loadings
+  seur.pca <- so@reductions$pca@cell.embeddings
+  sm.pca <- seur.pca[1:10, 1:10]
+  sm.seur.ldngs <- seur.loads[1:10, 1:10]
+  
+  # SVD on scaled counts
+  
+  variable_features <- length(mvf)
+  Sys.time()
+  pppca <- svd(seur.SCT) 
+  Sys.time() 
+  ppembed <- pppca$u %*% diag(pppca$d)
+  pcnames <- vector(mode = "character")
+  for (i in 1:dim(ppembed)[2])pcnames[i] <- paste("PC", i, sep = "_")
+  colnames(ppembed) <- pcnames
+  rownames(ppembed) <- rownames(seur.SCT)
+  sm.ppembed <- ppembed[1:10, 1:10]
+  ppldngs <- pppca$v
+  colnames(ppldngs) <- pcnames
+  rownames(ppldngs) <- mvf 
+  sm.ppldngs <- ppldngs[1:10, 1:10]
+  ppembed[1:10, 1:10]
+  ppldngs[1:10, 1:10]
+  
+  so@reductions$pca@cell.embeddings <- ppembed
+  so@reductions$pca@feature.loadings <- ppldngs
+  so@reductions$pca@stdev <- pppca$d
+  print(Sys.time())
+  
+  so <- RunHarmony(so, group.by.vars,
+                   do_pca=FALSE,
+                   assay.use = "SCT",
+                   plot_convergence = TRUE,
+                   return_object=TRUE)
+  
+  head(so@reductions$harmony@cell.embeddings)
+  head(so@reductions$harmony@feature.loadings)
+  
+  so <- RunUMAP(so, reduction = "harmony",dims=1:4)
+  so <- RunTSNE(so, reduction = "harmony",dims=1:4)
+  
+  sdat <- data.frame(as.vector(so@reductions$tsne@cell.embeddings[,1]),
+                     as.vector(so@reductions$tsne@cell.embeddings[,2]),
+                     so@meta.data[eval(parse(text = "group.by.vars"))])
+  names(sdat) <- c("TSNE1","TSNE2","ident")
+  
+  n <- 2e3
+  set.seed(10)
+  ourColorSpace <- colorspace::RGB(runif(n), runif(n), runif(n))
+  ourColorSpace <- as(ourColorSpace, "LAB")
+  
+  
+  distinctColorPalette <-function(k=1) {
+    currentColorSpace <- ourColorSpace@coords
+    # Set iter.max to 20 to avoid convergence warnings.
+    set.seed(1)
+    km <- kmeans(currentColorSpace, k, iter.max=20)
+    colors <- unname(hex(LAB(km$centers)))
+    return(colors)
+  }   
+  
+  n <- 60
+  qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
+  qual_col_pals = qual_col_pals[c(7,6,2,1,8,3,4,5),]
+  cols = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
+  
+  g1 <- ggplot(sdat, aes(x=TSNE1, y=TSNE2)) +
+    theme_bw() +
+    theme(legend.title=element_blank()) +
+    geom_point(aes(colour=ident),size=1) +
+    scale_color_manual(values=cols) +
+    theme(panel.grid.major = element_blank(), 
+          panel.grid.minor = element_blank(), 
+          legend.position="top",
+          panel.background = element_blank(),
+          legend.text=element_text(size=rel(0.5))) +
+    guides(colour = guide_legend(override.aes = list(size=5, alpha = 1))) 
+  
+  solist <- list(so, ppldngs)
+  
+  # Calculate adjusted gene expression from embeddings
+  so <- solist[[1]]
+  ppldngs <- solist[[2]]
+  seur.SCT <- t(so@assays$SCT@scale.data)
+  harm.embeds <- so@reductions$harmony@cell.embeddings
+  #print(dim(harm.embeds))
+  harm.lvl.backcalc <- harm.embeds %*% t(ppldngs)
+  
+  sm.harmexpr <- harm.lvl.backcalc[1:10, 1:4]
+  sm.comp.scld <- (seur.SCT[rownames(sm.harmexpr), colnames(sm.harmexpr)])
+
+  # Convert to ggplot that includes color by samples for higher resolution of batch correction, use for loop to screen for different genes
+  so@assays$SCT@scale.data <- t(harm.lvl.backcalc)
+
+  return(so)
+}
diff --git a/R/ModuleScore.R b/R/ModuleScore.R
new file mode 100644
index 0000000..f52f46c
--- /dev/null
+++ b/R/ModuleScore.R
@@ -0,0 +1,356 @@
+  # This code comes from NIDAP 'ModuleScore [scRNA-seq][CCBR]' code template
+  # Template Manager https://nidap.nih.gov/workspace/vector/templates/ri.vector.main.template.10cf059e-0bd0-4a1c-9a3a-65b8688dab23
+  # Documentation https://nidap.nih.gov/workspace/notepad/view/ri.notepad.main.notepad.1d5957fa-ef90-4473-b627-989b9620278f
+
+  #' @title Calculate average gene expression (subtracted by aggregated control feature set) and label cells according to ModuleScores
+  #' @description Returns Seurat-class object with metadata containing ModuleScores and Likely_CellType calls
+  #' @details Analyzed features are binned based on averaged expression; control features are randomly selected from each bin.
+  #' 
+  #' @param SO Seurat-class object
+  #' @param sample_names List of samples to subset the data by
+  #' @param sample_to_display List of samples to depict on dimension plot, samples not in the list would be colored gray in the background
+  #' @param geneset_dataframe Table of marker genes for each celltype (column names of the table), append "_prot" or "_neg" for proteins or negative markers
+  #' @param proteins_presence Set to TRUE if there are CITE-seq markers in geneset_dataframe
+  #' @param celltypes_to_analyze Vector of celltypes from geneset_dataframe to screen for
+  #' @param manual_threshold Specify bimodal thresholds for cell classification, should be of the same length as celltypes_to_analyze vector
+  #' @param general_class Base population of cells to classify
+  #' @param multi_level_class Toggle to TRUE if there are subpopulations of cells you want to screen for
+  #' @param levels_dataframe Table of subpopulation levels (column names of the table) and parent-child information (e.g. Tcells-CD4, Tcells-CD8)
+  #' @param reduction Choose among tsne, umap, and pca
+  #' @param nbins Number of bins for storing control features and analyzing average expression
+  #' @param gradient_density_font_size Set size of axis labels on gradient density plot of ModuleScore distribution
+  #' @param violinplot_font_size Set size of axis labels on violin plot of ModuleScore distribution
+  #' @param step_size Set step size of distribution plots
+  #' @param image_width Set Image width (pixel units)
+  #' @param image_height Set Image height (pixel units)
+  #' @param image_type Choose between png and svg
+  #' 
+  #' @import Seurat
+  #' @import tidyverse
+  #' @import gridExtra
+  #' @import quantmod
+  #' @import grid
+  #' @import data.table
+  #' @import utils
+  #'   
+  #' @export
+
+  #' @return Annotated dimension plot with ModuleScore distribution of cell marker gene, Seurat Object with cell classification metadata
+
+ModuleScore <- function(SO, 
+                         sample_names, 
+                         sample_to_display, 
+                         geneset_dataframe, 
+                         proteins_presence = FALSE, 
+                         celltypes_to_analyze,
+                         manual_threshold = c(0), 
+                         general_class, 
+                         multi_level_class, 
+                         levels_dataframe, 
+                         reduction = "tsne",
+                         nbins = 24,
+                         gradient_density_font_size = 6,
+                         violinplot_font_size = 6,
+                         step_size = 0.1
+                         ){
+
+  ## ---------------- ##
+  ## Helper Functions ##
+  ## ---------------- ##
+  
+  Predict_Cell_from_ModScore <- function(ModScore_Metadata,manual_threshold,rejection){
+    
+    thres_ls <- list()
+    for (i in 1:ncol(ModScore_Metadata)){
+      thres_ls[[i]]<- rep(manual_threshold[i],nrow(ModScore_Metadata))
+    }
+    thres_df <- data.frame(matrix(unlist(thres_ls),nrow = nrow(ModScore_Metadata)))
+    
+    thres_filter <- ModScore_Metadata > thres_df
+    ModScore_Metadata_post_thres_filter <- ModScore_Metadata * thres_filter
+    
+    # Find column number with highest modscore
+    max_col_vector <- max.col(ModScore_Metadata_post_thres_filter)
+    
+    # If a row contains all zeroes, they will be labeled with unknown
+    all_zero_filter <- as.integer(!apply(ModScore_Metadata_post_thres_filter, 1, function(find_zero_rows) all(find_zero_rows == 0)))
+    
+    # Final filtering: 
+    final_filter <- (max_col_vector * all_zero_filter) + 1
+    
+    # Original names appended to "unknown" classification for cells with ModScores below threshold
+    appended_names <- c(rejection, names(ModScore_Metadata))
+    
+    # Added the names into a Likely_CellType Column
+    dupl_data <- ModScore_Metadata
+    dupl_data[,"Likely_CellType"] <- appended_names[final_filter]
+    return(dupl_data)
+  }
+  
+  ## --------------- ##
+  ## Main Code Block ##
+  ## --------------- ##
+  
+  geneset_for_module_scores <- unlist(geneset_dataframe)
+  
+  # Create a Barcode column if none is detected - required for matching cell calls in downstream steps
+  if (!"Barcode" %in% colnames(SO@meta.data)){
+    SO@meta.data$Barcode <- rownames(SO@meta.data)
+  }
+  
+  if (length(sample_names) == 0) {
+    sample_names = unique(SO@meta.data$sample_name)
+  }
+  
+  colnames(SO@meta.data) <- gsub("orig_ident","orig.ident",colnames(SO@meta.data))
+  
+  if("active.ident" %in% slotNames(SO)){
+    sample_name = as.factor(SO@meta.data$orig.ident)
+    names(sample_name)=names(SO@active.ident)
+    SO@active.ident <- as.factor(vector())
+    SO@active.ident <- sample_name
+    SO.sub = subset(SO, ident = sample_names)
+  } else {
+    sample_name = as.factor(SO@meta.data$orig.ident)
+    names(sample_name)=names(SO@active.ident)
+    SO@active.ident <- as.factor(vector())
+    SO@active.ident <- sample_name
+    SO.sub = subset(SO, ident = sample_names)
+  } 
+  
+  # Remove original unprocessed SO
+  rm(SO)
+  
+  # Adding protein marker expression
+  if (proteins_presence){
+    protein_markers <- geneset_for_module_scores[grepl("_prot",geneset_for_module_scores)]
+    
+    protein_orig_markers <- gsub("_prot.*","",protein_markers)
+    
+    protein_markers_name <- paste(protein_orig_markers,
+                                  "_prot", sep = "")
+    
+    prot_indx = 0
+    protein_array <- list()
+    for (prot_indx in seq_along(protein_orig_markers)){
+      protein_array[[prot_indx]] <- SO.sub@assays$Protein[protein_orig_markers[prot_indx],]
+      rownames(protein_array[[prot_indx]]) <- protein_markers_name[prot_indx]
+    }
+    protein_array_comp <- do.call(rbind,protein_array)
+    SO.sub@assays$SCT@data <- rbind(SO.sub@assays$SCT@data,protein_array_comp)
+  }
+  
+  # Recognize any negative markers in marker list
+  neg_markers_names <- geneset_for_module_scores[grepl("_neg",geneset_for_module_scores)]
+  orig_markers <- gsub("_neg.*","",neg_markers_names)
+  
+  # Retrieve markers found in counts data
+  orig_markers <- orig_markers[orig_markers %in% rownames(SO.sub@assays$SCT@data)]
+  
+  neg_markers_list <- list()
+  
+  # Calculate adjusted expression for negative markers
+  for (neg_indx in seq_along(orig_markers)){
+    
+    # Format the data so that it can rbinded with SO$SCT@scale.data
+    neg_markers_list[[neg_indx]] <- t(matrix(max(SO.sub@assays$SCT@data[orig_markers[neg_indx],]) - SO.sub@assays$SCT@data[orig_markers[neg_indx],]))
+    row.names(neg_markers_list[[neg_indx]]) <- neg_markers_names[neg_indx]
+    colnames(neg_markers_list[[neg_indx]]) <- colnames(SO.sub@assays$SCT@data)
+    
+    # Append new Negative/low marker (w Expression Count) to SO slot
+    SO.sub@assays$SCT@data <- rbind(SO.sub@assays$SCT@data, neg_markers_list[[neg_indx]]) 
+  }
+  
+  # Retrive markers from list
+  marker = select(geneset_dataframe, celltypes_to_analyze)
+  marker.list = as.list(marker)
+  
+  # Error checking and messages for when threshold vector is zero or of non-matching length to number of celltypes to analyze
+  if (sum(unlist(marker.list) %in% rownames(SO.sub@assays$SCT@data)) == 0){
+    stop("No genes from list was found in data")
+  }
+  
+  if (length(manual_threshold) != length(celltypes_to_analyze)){
+    if (sum(manual_threshold) == 0){
+      manual_threshold <- rep(0, length(celltypes_to_analyze))
+      print("Manual threshold set to zero - outputing preliminary data")
+    } else {
+      stop("Manual threshold length does not match number of celltypes to analyze - please check manual thresholds")
+    }}
+  
+  names(manual_threshold) <- celltypes_to_analyze
+  
+  # Check if all markers for particular celltype is present, if none are present - corresponding celltype will not be analyzed
+  figures <- list()
+  exclude_cells <- c()
+  
+  h = 0
+  j = 1
+  
+  for (h in seq_along(marker.list)) {
+    print(names(marker.list[h]))
+    present=lapply(marker.list[[h]], function(x) x %in% rownames(SO.sub)) # apply function(x) x %in% rownames(SO.sub) to each element of marker.list
+    absentgenes = unlist(marker.list[[h]])[present==FALSE];     absentgenes=absentgenes[is.na(absentgenes)==F]
+    presentgenes = unlist(marker.list[[h]])[present==TRUE];presentgenes=presentgenes[is.na(presentgenes)==F]
+    print(paste0("Genes not present: ",paste0(absentgenes,collapse=",")))
+    print(paste0("Genes present: ",paste0(presentgenes,collapse=",")))
+    
+    if(length(presentgenes) == 0){
+      print(paste0(names(marker.list[h]), " genes were not found in SO and will not be analyzed"))
+      exclude_cells[j] <- h
+      j = j + 1
+    }}  
+  
+  if (length(exclude_cells) > 0){
+    marker.list <- marker.list[-exclude_cells]} else {
+      marker.list <- marker.list
+    }  
+  
+  # Calculate aggregate expression of markers with Seurat's Module Score function
+  for (i in seq_along(marker.list)) { 
+    SO.sub=AddModuleScore(SO.sub,marker.list[i],name = names(marker.list[i]),
+                          nbin = nbins)
+    
+    m = 0
+    
+    m = paste0(names(marker.list[i]),"1")
+    SO.sub@meta.data[[m]] <- scales::rescale(SO.sub@meta.data[[m]], to=c(0,1))
+    
+    clusid = SO.sub@meta.data[[m]] 
+    
+    # Calculate density data for ModScore vs Number of cells
+    d <- density(clusid)
+    
+    # Calculate dimension reduction
+    if(reduction=="tsne"){
+      p1 <- DimPlot(SO.sub, reduction = "tsne", group.by = "ident") 
+    } else if(reduction=="umap"){
+      p1 <- DimPlot(SO.sub, reduction = "umap", group.by = "ident")
+    } else { 
+      p1 <- DimPlot(SO.sub, reduction = "pca", group.by = "ident")
+    }
+    
+    if(reduction=="tsne"){
+      clusmat=data.frame(ident=p1$data$ident,umap1=p1$data$tSNE_1,umap2=p1$data$tSNE_2, clusid=as.numeric(SO.sub@meta.data[[m]]))
+    } else if(reduction=="umap"){
+      clusmat=data.frame(ident=p1$data$ident,umap1=p1$data$UMAP_1,umap2=p1$data$UMAP_2, clusid=as.numeric(SO.sub@meta.data[[m]]))
+    } else { 
+      clusmat=data.frame(ident=p1$data$ident,umap1=p1$data$PC_1,umap2=p1$data$PC_2, clusid=as.numeric(SO.sub@meta.data[[m]]))
+    }
+    clusmat <- mutate(clusmat, sample_clusid = clusmat$clusid * grepl(paste(sample_to_display, collapse = "|"), clusmat$ident))
+    
+    clusmat %>% group_by(clusid) %>% summarise(umap1.mean=mean(umap1), umap2.mean=mean(umap2)) -> umap.pos
+    title=as.character(m)
+    clusmat %>% dplyr::arrange(clusid) -> clusmat
+    
+    # Plot data onto colored dimension reduction plot
+    clusid.df <- data.frame(id=SO.sub@meta.data$orig.ident,ModuleScore=SO.sub@meta.data[[m]])
+    
+    g <- ggplot(clusmat, aes(x=umap1, y=umap2)) +
+      theme_bw() +
+      theme(legend.title=element_blank()) +
+      geom_point(aes(colour=sample_clusid),alpha=0.5,shape = 20,size=1) +
+      scale_color_gradientn(colours = c("blue4","lightgrey", "red"), values = scales::rescale(c(0,manual_threshold[i]/2,manual_threshold[i],(manual_threshold[i]+1)/2,1), limits = c(0, 1))) + guides(colour = guide_legend(override.aes = list(size=5, alpha = 1))) +
+      theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank()) +
+      xlab("tsne-1") + ylab("tsne-2")
+    
+    g1 = RidgePlot(SO.sub,features=m,group.by="orig.ident") + theme(legend.position = "none", title = element_blank(), axis.text.x = element_text(size = gradient_density_font_size)) + geom_vline(xintercept = manual_threshold[i], linetype = "dashed", color = "red3") + scale_x_continuous(breaks = seq(0,1,step_size))
+    
+    # Violin Plot
+    g2 = ggplot(clusid.df,aes(x=id,y=ModuleScore)) + geom_violin(aes(fill=id)) +  theme_classic() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),legend.title = element_blank(), panel.background = element_blank(), axis.text.x=element_blank(),legend.text=element_text(size=rel(0.8)),legend.position="top", axis.text.y = element_text(size = violinplot_font_size)) + guides(colour = guide_legend(override.aes = list(size=5, alpha = 1))) + geom_hline(yintercept = manual_threshold[i], linetype = "dashed", color = "red3") + scale_y_continuous(breaks = seq(0,1,step_size))
+    
+    # Color gradient density plot
+    g3 = ggplot(data.frame(x = d$x, y = d$y), aes(x, y)) + xlab("ModuleScore") + ylab("Density") + geom_line() + 
+      geom_segment(aes(xend = d$x, yend = 0, colour = x)) + scale_y_log10() +
+      scale_color_gradientn(colours = c("blue4","lightgrey", "red"), values = scales::rescale(c(0,manual_threshold[i]/2,manual_threshold[i],(manual_threshold[i]+1)/2,1), limits = c(0, 1))) + geom_vline(xintercept = manual_threshold[i], linetype = "dashed", color = "red3") + geom_vline(xintercept = manual_threshold[i], linetype = "dashed", color = "red3") + scale_x_continuous(breaks = seq(0,1,step_size)) + theme(legend.title = element_blank(), axis.text.x = element_text(size = 6))
+    
+    # Set title for grid.arranged final figure
+    figures[[i]] = grid.arrange(g,g1,g2,g3, ncol=2, top=textGrob(names(marker.list[i]), gp = gpar(fontsize = 14, fontface = "bold")))
+    }
+  
+  # Get rid of "1" at the end of MS columns
+  colnames(SO.sub@meta.data)[colnames(SO.sub@meta.data) %in% paste0(names(marker.list),1)] <- names(marker.list)
+  
+  # Classification of general cell class
+  
+  # Analyze and plot only the class of cells found in the metadata
+  general_class <- general_class[general_class %in% colnames(SO.sub@meta.data)]
+  
+  # Subset the columns of the metadata containing module scores only
+  SO_Trunc_Metadata_General <- SO.sub@meta.data[general_class]
+  
+  General_thres_vec <- manual_threshold[general_class]
+  
+  # See if elements in each ModScore column exceeds CellType threshold, set elements below threshold to zero. Keep values of elements above threshold
+  storage_list_MS_calls <- list()
+  
+  Calls_output <- Predict_Cell_from_ModScore(SO_Trunc_Metadata_General,General_thres_vec,rejection = "unknown")
+  Calls_output$Barcode <- rownames(Calls_output)
+  
+  if (multi_level_class){   
+    
+    for (k in 1:ncol(levels_dataframe)){ 
+      
+      # Initialize list for temporarily keeping results from subpopulation calls
+      Sub_class_calls <- list()
+      
+      ## Subclass Identification
+      # Remove any NAs from comparisons
+      Sub_class_storage <- levels_dataframe[[k]][!is.na(levels_dataframe[[k]])]
+      
+      parent_class <- unique(gsub("(.*)-(.*)","\\1",Sub_class_storage))
+      
+      for (parent in parent_class){
+        Sub_class <- Sub_class_storage[grepl(parent,Sub_class_storage)]
+        children_class <- gsub("(.*)-(.*)","\\2",Sub_class)
+        
+        # Subset out cells predicted to be parents. Child cells will be screened from this population
+        parents <- Calls_output$Barcode[Calls_output$Likely_CellType == parent]
+        SO_Trunc_Metadata_parents <- SO.sub@meta.data[parents,] %>% select(children_class)
+        
+        # Stores a new density plot containing MS information of children cells within parent population
+        for (children in children_class){
+          
+          plot_title <- paste("Density plot for",children,"Module Scores within", parent,"population", sep = " ")
+          adjusted_density_plot <- ggplot(SO_Trunc_Metadata_parents, aes_string(x = children)) + geom_density() + ggtitle(plot_title) +                  geom_vline(xintercept = manual_threshold[children], linetype = "dashed", color = "red3")
+          
+          figures[[length(figures) + 1]] <- adjusted_density_plot
+        }
+        
+        # Create output table without information of parent population, will be the table with updated cell calls will be reinserted
+        SO_Trunc_Metadata_no_parents <- Calls_output[!Calls_output$Likely_CellType == parent,]
+        non_parents <- rownames(SO_Trunc_Metadata_no_parents)
+        
+        gen_annot_table <- data.frame(cells = non_parents, identity = SO_Trunc_Metadata_no_parents$Likely_CellType)
+        
+        # Repeat Module Score Comparison and Cell Prediction with Child Subset:
+        children_thres_vec <- manual_threshold[children_class]
+        
+        Sub_class_calls[[match(parent,parent_class)]] <- Predict_Cell_from_ModScore(SO_Trunc_Metadata_parents,children_thres_vec,rejection = parent) %>%  select(Likely_CellType)
+      }
+      
+      # Reappend subclassification results back to general output for higher level classification
+      Sub_class_calls <- do.call(rbind,Sub_class_calls)
+      Sub_class_calls$Barcode <- rownames(Sub_class_calls)
+      
+      # Continuously update Likely_CellType column in Calls Output with new results from each level subclassification(s)
+      Calls_output$Likely_CellType_updated <- Sub_class_calls$Likely_CellType[match(Calls_output$Barcode,Sub_class_calls$Barcode)]
+      
+      Calls_output <- Calls_output %>% mutate(Likely_CellType = case_when(
+        is.na(Likely_CellType_updated) ~ Likely_CellType,
+        TRUE ~ Likely_CellType_updated
+      ))
+      
+      # Remove Likely_CellType_updated
+      Calls_output$Likely_CellType_updated <- NULL
+    }}
+  
+  ## Updating CellType(s) in metadata with subclass calls
+  SO.sub@meta.data$Likely_CellType <- Calls_output$Likely_CellType[match(SO.sub@meta.data$Barcode,Calls_output$Barcode)]
+  
+  modscore_res <- list(Arranged_figures <- do.call("grid.arrange", c(figures)),
+                       Seurat_Object = SO.sub)
+  
+  return(modscore_res)
+ }
+ 
\ No newline at end of file
diff --git a/R/Pseudobulk_DEG.R b/R/Pseudobulk_DEG.R
new file mode 100644
index 0000000..ddab0ea
--- /dev/null
+++ b/R/Pseudobulk_DEG.R
@@ -0,0 +1,549 @@
+# This code comes from NIDAP 'Pseudobulk DEG [scRNA-seq][CCBR]' code template
+# Template Manager https://nidap.nih.gov/workspace/vector/templates/ri.vector.main.template.07c5b878-ada6-4cd8-9f61-e312f8491904
+# Documentation https://nidap.nih.gov/workspace/notepad/view/ri.notepad.main.notepad.422102e6-4d67-4ec9-baf7-e4f1fd78e0a8
+
+#' @title Pseudobulk Differential Gene Expression Analysis
+#' @description Performs Linear Models for Microarray Data on single-cell data 
+#' @details Aggregate gene expression amongst replicates before performing limma regression
+#' 
+#' @param so Seurat-class object
+#' @param contrasts Choose the groups (group1-group2) to compare transcription profiles against, positive Fold Change values indicate upregulation in group1
+#' @param replicate Column of metadata containing replicate information, gene expression will first be aggregated based on this column before differential analysis is ran
+#' @param comparison_level Name of metadata column containing information on the sub-levels you want to divide your data into. 
+#' @param label Name of metadata column containing information on the groups (contrasts) you want to compare.
+#' @param min_num_cells Filters a comparison level based on the minimum number of cells at that level.
+#' @param min_num_reps Filters a comparison level based on the minimum number of replicates at that level.
+#' @param min_num_features Filters out genes if they are expressed in fewer than X number of cells.
+#' @param subdivide_data Toggle to TRUE if you want to make comparisons across different levels within your data.
+#' 
+#' @import Seurat
+#' @import edgeR
+#' @import purrr
+#' @import magrittr
+#' @import tidyverse
+#'   
+#' @export
+
+#' @return data.frame of differentially expressed genes across comparisons
+
+Pseudobulk_DEG <- function(so,
+                               contrasts,
+                               replicate,
+                               comparison_level,
+                               label,
+                               min_num_cells = 3,
+                               min_num_reps = 1,
+                               min_num_features = 0,
+                               subdivide_data = FALSE) {
+  
+  ## ---------------- ##
+  ## Helper Functions ##
+  ## ---------------- ##
+  
+  check_inputs = function(input,
+                          meta = meta,
+                          replicate_col = 'Replicate',
+                          cell_type_col = 'Comparison',
+                          label_col = 'Label') {
+    
+    # extract cell types and label from metadata
+    if ("Seurat" %in% class(input)) {
+      # confirm Seurat is installed
+      if (!requireNamespace("Seurat", quietly = TRUE)) {
+        stop("install \"Seurat\" R package for Augur compatibility with ",
+             "input Seurat object", call. = FALSE)
+      }
+      meta = input@meta.data %>%
+        droplevels()
+      if (!is.null(replicate_col))
+        replicates = as.character(meta[[replicate_col]])
+      if (!is.factor(meta[[label_col]])) {
+        labels = meta[[label_col]]
+      } else {
+        labels = as.character(meta[[label_col]])
+      }
+      cell_types = as.character(meta[[cell_type_col]])
+      expr = Seurat::GetAssayData(input, slot = 'counts')
+    } else if ("cell_data_set" %in% class(input)) {
+      # confirm monocle3 is installed
+      if (!requireNamespace("monocle3", quietly = TRUE)) {
+        stop("install \"monocle3\" R package for Augur compatibility with ",
+             "input monocle3 object", call. = FALSE)
+      }
+      meta = monocle3::pData(input) %>%
+        droplevels() %>%
+        as.data.frame()
+      if (!is.null(replicate_col))
+        replicates = as.character(meta[[replicate_col]])
+      if (!is.factor(meta[[label_col]])) {
+        labels = meta[[label_col]]
+      } else {
+        labels = as.character(meta[[label_col]])
+      }
+      cell_types = as.character(meta[[cell_type_col]])
+      expr = monocle3::exprs(input)
+    } else if ("SingleCellExperiment" %in% class(input)){
+      # confirm SingleCellExperiment is installed
+      if (!requireNamespace("SingleCellExperiment", quietly = TRUE)) {
+        stop("install \"SingleCellExperiment\" R package for Augur ",
+             "compatibility with input SingleCellExperiment object",
+             call. = FALSE)
+      }
+      meta = SummarizedExperiment::colData(input) %>%
+        droplevels() %>%
+        as.data.frame()
+      if (!is.null(replicate_col))
+        replicates = as.character(meta[[replicate_col]])
+      if (!is.factor(meta[[label_col]])) {
+        labels = meta[[label_col]]
+      } else {
+        labels = as.character(meta[[label_col]])
+      }
+      cell_types = as.character(meta[[cell_type_col]])
+      expr = SummarizedExperiment::assay(input)
+    } else {
+      # check if input is sparse matrix or numberic matrix/df
+      valid_input = is(input, 'sparseMatrix') ||
+        is_numeric_matrix(input) ||
+        is_numeric_dataframe(input)
+      if (!valid_input)
+        stop("input must be Seurat, monocle, sparse matrix, numeric matrix, or ",
+             "numeric data frame")
+      if (is.null(meta))
+        stop("input matrix must be accompanied by a metadata table")
+      expr = input
+      if (!is.null(replicate_col))
+        replicates = as.character(meta[[replicate_col]])
+      labels = as.character(meta[[label_col]])
+      cell_types = as.character(meta[[cell_type_col]])
+    }
+    
+    # check dimensions are non-zero
+    if (length(dim(expr)) != 2 || !all(dim(expr) > 0)) {
+      stop("expression matrix has at least one dimension of size zero")
+    }
+    
+    # check dimensions match
+    n_cells1 = nrow(meta)
+    n_cells2 = ncol(expr)
+    if (n_cells1 != n_cells2) {
+      stop("number of cells in metadata (", n_cells1, ") does not match number ",
+           "of cells in expression (", n_cells2, ")")
+    }
+    
+    # check at least two labels
+    if (n_distinct(labels) == 1) {
+      stop("only one label provided: ", unique(labels))
+    }
+    
+    # check for missing labels or cell types
+    if (any(is.na(labels))) {
+      stop("labels contain ", sum(is.na(labels)), "missing values")
+    }
+    if (any(is.na(cell_types))) {
+      stop("cell types contain ", sum(is.na(cell_types)), "missing values")
+    }
+    if (!is.null(replicate_col) && any(is.na(replicates))) {
+      stop("replicates contain ", sum(is.na(replicates)), "missing values")
+    }
+    
+    # check for missing replicates
+    if (!is.null(replicate_col) && is.null(replicates)) {
+      stop("metadata does not contain replicate information")
+    }
+    
+    # remove missing values
+    missing = is.na(expr)
+    if (any(missing)) {
+      stop("matrix contains ", sum(missing), "missing values")
+    }
+    
+    # clean up the meta data
+    if (!is.null(replicate_col)) {
+      meta <- meta %>% as.data.frame() %>%
+        mutate(cell_barcode = rownames(meta),
+               replicate = meta[[replicate_col]],
+               cell_type = meta[[cell_type_col]],
+               label = meta[[label_col]]) %>%
+        mutate_at(vars(replicate, cell_type, label), as.factor)
+    } else {
+      meta <- meta %>% as.data.frame() %>%
+        mutate(cell_barcode = rownames(meta),
+               cell_type = meta[[cell_type_col]],
+               label = meta[[label_col]]) %>%
+        mutate_at(vars(cell_type, label), as.factor)
+    }
+    
+    # make sure meta contains row names and is a data frame
+    rownames(meta) = colnames(expr)
+    meta = as.data.frame(meta)
+    to_return = list(
+      expr = expr,
+      meta = meta
+    )
+    return(to_return)
+  }
+  
+  # Create a pseudobulk matrix
+  # Convert a single-cell expression matrix (i.e., genes by cells)
+  #   to a pseudobulk matrix by summarizing counts within biological replicates
+  
+  to_pseudobulk = function(input, 
+                           meta = NULL, 
+                           replicate_col = 'Replicate',
+                           cell_type_col = 'Comparison',
+                           label_col = 'Label',
+                           min_cells = 3,
+                           min_reps = 2,
+                           min_features = 0,
+                           external = T,
+                           subdivide = T) {
+    if (external) {
+      # first, make sure inputs are correct
+      inputs = check_inputs(
+        input, 
+        meta = meta,
+        replicate_col = replicate_col,
+        cell_type_col = cell_type_col,
+        label_col = label_col)
+      expr = inputs$expr
+      meta = inputs$meta
+    } else {
+      expr = input
+    }
+    
+    # convert to characters
+    meta$replicate <- as.character(meta[,eval(parse(text = "replicate"))])
+    meta$cell_type <- as.character(meta[,eval(parse(text = "comparison_level"))])
+    meta$label <- as.character(meta[,eval(parse(text = "label"))])
+    
+    ## Code from v20 ~ not working when using variable to subset
+    #meta %<>% mutate(replicate = as.character(meta[,replicate]),
+    #                 cell_type = as.character(meta[,comparison_level]),
+    #                 label = as.character(meta[,label]))
+    
+    # keep only cell types with enough cells
+    keep = meta %>%
+      dplyr::count(cell_type, label) %>%
+      group_by(cell_type) %>%
+      filter(all(n >= min_cells)) %>%
+      pull(cell_type) %>%
+      unique()
+    
+    # Need to make rownames = barcode otherwise, downstream matrix multiplication won't work
+    rownames(meta) <- meta$Barcode
+    
+    if (subdivide == TRUE){
+      # process data into gene x replicate x cell_type matrices
+      pseudobulks = keep %>%
+        map( ~ {
+          print(.)
+          cell_type = .
+          meta0 = meta %>% filter(cell_type == !!cell_type)
+          expr0 = expr %>% magrittr::extract(, meta0$cell_barcode)
+          # catch cell types without replicates or conditions
+          if (n_distinct(meta0$label) < 2)
+            return(NA)
+          replicate_counts = distinct(meta0, label, replicate) %>%
+            group_by(label) %>%
+            summarise(replicates = n_distinct(replicate)) %>%
+            pull(replicates)
+          if (any(replicate_counts < min_reps))
+            return(NA)
+          
+          # process data into gene X replicate X cell_type matrice
+          mm = model.matrix(~ 0 + replicate:label, data = meta0)
+          
+          # Test #
+          #mm = model.matrix(~ replicate:label, data = meta0)
+          # Test #
+          
+          mat_mm = expr0 %*% mm
+          
+          ### Original code - start ###
+          #keep_genes = rowSums(mat_mm > 0) > min_features
+          ### Original code - end ###
+          
+          ### New code - start ###
+          keep_genes = rowSums(as.array(mat_mm > 0)) > min_features
+          ### New code - end ###
+          
+          mat_mm = mat_mm[keep_genes, ] %>% as.data.frame()
+          mat_mm %<>% as.data.frame()
+          colnames(mat_mm) = gsub("replicate|label", "", colnames(mat_mm))
+          # drop empty columns
+          keep_samples = colSums(mat_mm) > 0
+          mat_mm %<>% magrittr::extract(, keep_samples)
+          return(mat_mm)
+        }) %>%
+        setNames(keep)
+    } else {
+      keep = "all"
+      pseudobulks = keep %>%
+        map( ~ {
+          print(.)
+          cell_type = .
+          meta0 = meta
+          expr0 = expr
+          # catch cell types without replicates or conditions
+          if (n_distinct(meta0$label) < 2)
+            return(NA)
+          replicate_counts = distinct(meta0, label, replicate) %>%
+            group_by(label) %>%
+            summarise(replicates = n_distinct(replicate)) %>%
+            pull(replicates)
+          if (any(replicate_counts < min_reps))
+            return(NA)
+          
+          # process data into gene X replicate X cell_type matrice
+          mm = model.matrix(~ 0 + replicate:label, data = meta0)
+          
+          # Test #
+          #mm = model.matrix(~ replicate:label, data = meta0)
+          # Test #
+          
+          mat_mm = expr0 %*% mm
+          
+          ### Original code - start ###
+          #keep_genes = rowSums(mat_mm > 0) > min_features
+          ### Original code - end ###
+          
+          ### New code - start ###
+          keep_genes = rowSums(as.array(mat_mm > 0)) > min_features
+          ### New code - end ###
+          
+          mat_mm = mat_mm[keep_genes, ] %>% as.data.frame()
+          mat_mm %<>% as.data.frame()
+          colnames(mat_mm) = gsub("replicate|label", "", colnames(mat_mm))
+          # drop empty columns
+          keep_samples = colSums(mat_mm) > 0
+          mat_mm %<>% magrittr::extract(, keep_samples)
+          return(mat_mm)
+        }) %>%
+        setNames(keep)
+    }
+    
+    # drop NAs
+    pseudobulks %<>% magrittr::extract(!is.na(.))
+    
+    # also filter out cell types with no retained genes
+    min_dim = map(pseudobulks, as.data.frame) %>% map(nrow)
+    pseudobulks %<>% magrittr::extract(min_dim > 1)
+    
+    # also filter out types without replicates
+    min_repl = map_int(pseudobulks, ~ {
+      # make sure we have a data frame a not a vector
+      tmp = as.data.frame(.)
+      targets = data.frame(group_sample = colnames(tmp)) %>%
+        mutate(group = gsub(".*\\:", "", group_sample))
+      if (n_distinct(targets$group) == 1)
+        return(as.integer(0))
+      min(table(targets$group))
+    })
+    pseudobulks %<>% magrittr::extract(min_repl >= min_reps)
+    return(pseudobulks)
+  }
+  
+  # Run a DE analysis within each cell type using Seurat
+  
+  pseudobulk_de = function(input, 
+                           meta = NULL, 
+                           replicate_col = replicate,
+                           cell_type_col = comparison_level,
+                           label_col = label,
+                           min_cells = min_num_cells,
+                           min_reps = min_num_reps,
+                           min_features = min_num_features,
+                           de_family = 'pseudobulk',
+                           de_method = 'limma',
+                           de_type = 'LRT',
+                           subdivide = subdivide_data) {
+    # check args
+    if (de_method == 'limma') {
+      if (de_type != 'voom') {
+        # change default type to use
+        de_type = 'trend'  
+      }
+    }
+    
+    # get the pseudobulks list
+    pseudobulks = to_pseudobulk(
+      input = input,
+      meta = meta,
+      replicate_col = replicate_col,
+      cell_type_col = cell_type_col,
+      label_col = label_col,
+      min_cells = min_cells,
+      min_reps = min_reps,
+      min_features = min_features,
+      external = T,
+      subdivide = subdivide
+    )
+    
+    results = map(pseudobulks, function(x) {
+      # create targets matrix
+      targets = data.frame(group_sample = colnames(x)) %>%
+        mutate(group = gsub(".*\\:", "", group_sample))
+      ## optionally, carry over factor levels from entire dataset
+      if (is.factor(meta$label)) {
+        targets$group %<>% factor(levels = levels(meta$label))
+      }
+      # Need to set to higher values if more than 2 contrasts variables
+      if (n_distinct(targets$group) > 100000)
+        return(NULL)
+      # create design
+      
+      ### Possible Code for Design Formula (dm.formula) ###
+      dm.formula <- as.formula(paste("~0", paste(c("group")), sep="+", collapse="+"))
+      #design = model.matrix(~ group, data = targets)
+      design = model.matrix(dm.formula, data = targets)
+      
+      DE = switch(de_method,
+                  edgeR = {
+                    tryCatch({
+                      y = DGEList(counts = x, group = targets$group) %>%
+                        calcNormFactors(method = 'TMM') %>%
+                        estimateDisp(design)
+                      test = switch(de_type,
+                                    QLF = {
+                                      fit = glmQLFit(y, design)
+                                      test = glmQLFTest(fit, coef = -1)
+                                    },
+                                    LRT = {
+                                      fit = glmFit(y, design = design)
+                                      test = glmLRT(fit)
+                                    })
+                      res = topTags(test, n = Inf) %>%
+                        as.data.frame() %>%
+                        rownames_to_column('gene') %>%
+                        # flag metrics in results
+                        mutate(de_family = 'pseudobulk',
+                               de_method = de_method,
+                               de_type = de_type)
+                    }, error = function(e) {
+                      message(e)
+                      data.frame()
+                    })
+                  },
+                  DESeq2 = {
+                    tryCatch({
+                      dds = DESeqDataSetFromMatrix(countData = x,
+                                                   colData = targets,
+                                                   design = ~ group)
+                      dds = switch(de_type,
+                                   Wald = {
+                                     dds = try(DESeq(dds,
+                                                     test = 'Wald',
+                                                     fitType = 'parametric',
+                                                     sfType = 'poscounts',
+                                                     betaPrior = F))
+                                   },
+                                   LRT = {
+                                     dds = try(DESeq(dds,
+                                                     test = 'LRT',
+                                                     reduced = ~ 1,
+                                                     fitType = 'parametric',
+                                                     sfType = 'poscounts',
+                                                     betaPrior = F))
+                                   }
+                      )
+                      res = results(dds)
+                      # write
+                      res = as.data.frame(res) %>%
+                        mutate(gene = rownames(x)) %>%
+                        # flag metrics in results
+                        mutate(de_family = 'pseudobulk',
+                               de_method = de_method,
+                               de_type = de_type)
+                    }, error = function(e) {
+                      message(e)
+                      data.frame()
+                    })
+                  },
+                  limma = {
+                    tryCatch({
+                      x = switch(de_type,
+                                 trend = {
+                                   trend_bool = T
+                                   dge = DGEList(as.matrix(x), group = targets$group)
+                                   dge = calcNormFactors(dge)
+                                   x = new("EList")
+                                   x$E = cpm(dge, log = TRUE, prior.count = 3)
+                                   x
+                                 },
+                                 voom = {
+                                   counts = all(as.matrix(x) %% 1 == 0)
+                                   if (counts) {
+                                     trend_bool = F
+                                     x = voom(as.matrix(x), design)
+                                     x
+                                   }
+                                 })
+                      
+                      # get fit
+                      #Run Contrasts                    
+                      
+                      contrasts <- unlist(contrasts %>% strsplit("-") %>% 
+                                            lapply(function (x) paste("group",x,sep="")) %>% 
+                                            lapply(paste, collapse = "-"))
+                      
+                      cm <- makeContrasts(contrasts = contrasts, levels=design)
+                      
+                      fit2 = lmFit(x, design) %>% contrasts.fit(cm) %>%
+                        eBayes(trend = trend_bool, robust = trend_bool)
+                      
+                      logFC = fit2$coefficients
+                      colnames(logFC)=paste(colnames(logFC),"logFC",sep="_")
+                      tstat = fit2$t
+                      colnames(tstat)=paste(colnames(tstat),"tstat",sep="_")
+                      FC = 2^fit2$coefficients
+                      FC = ifelse(FC<1,-1/FC,FC)
+                      colnames(FC)=paste(colnames(FC),"FC",sep="_")
+                      pvalall=fit2$p.value
+                      colnames(pvalall)=paste(colnames(pvalall),"pval",sep="_")
+                      pvaladjall=apply(pvalall,2,function(x) p.adjust(x,"BH"))
+                      colnames(pvaladjall)=paste(colnames(fit2$coefficients),"adjpval",sep="_")
+                      
+                      ### Original Code - Start ###        
+                      
+                      ## format the results
+                      #res = fit2 %>%
+                      ## extract all coefs except intercept
+                      #topTable(number = Inf, coef = NULL) %>%
+                      #rownames_to_column('gene') %>%
+                      ## flag metrics in results
+                      #mutate(
+                      #  de_family = 'pseudobulk',
+                      #  de_method = de_method,
+                      #  de_type = de_type)
+                      
+                      ### Original Code - End ### 
+                      
+                      res=as.data.frame(cbind(FC, logFC, tstat, pvalall, pvaladjall))
+                      
+                      res$Gene <- rownames(res)
+                      
+                      res <- res %>% mutate(
+                        de_family = 'pseudobulk',
+                        de_method = de_method,
+                        de_type = de_type)
+                      
+                    }, error = function(e) {
+                      message(e)
+                      data.frame()
+                    })
+                  }
+      )
+    })
+    results %<>% bind_rows(.id = 'cell_type')
+    results <- results %>% dplyr::select("Gene", everything())
+  }
+  
+  ## --------------- ##
+  ## Main Code Block ##
+  ## --------------- ##
+  
+  DE <- pseudobulk_de(so)
+  
+  return(DE)
+}
diff --git a/R/Violin_Plots_by_Metadata.R b/R/Violin_Plots_by_Metadata.R
new file mode 100644
index 0000000..7d78faf
--- /dev/null
+++ b/R/Violin_Plots_by_Metadata.R
@@ -0,0 +1,268 @@
+# This code comes from NIDAP 'Violin Plots by Metadata Column [scRNA-seq][CCBR]' code template
+# Template Manager https://nidap.nih.gov/workspace/vector/templates/ri.vector.main.template.1011399a-426d-4aa1-bb3f-4430e97ea9ec
+# Documentation https://nidap.nih.gov/workspace/notepad/view/ri.notepad.main.notepad.fbef08da-93ba-4301-aac7-2545631aad4f
+# splitFacet helper function comes from https://stackoverflow.com/questions/30510898/split-facet-plot-into-list-of-plots/52225543
+
+#' @title Violin Plot by Metadata
+#' @description Returns a violin plot of gene expression data that is partitioned by user defined metadata column
+#' @details Takes in a list of genes inputted by the user, displays gene expression information in particular slot-assay with (optional) outliers removed
+#' 
+#' @param seurat_object Seurat-class object
+#' @param assay Name of the assay to extract gene expression data from
+#' @param slot Name of the slot within the assay to extract gene expression data from
+#' @param ident_of_interest Split violin plot based on a column from the seurat object metadata
+#' @param groups_of_interest Include only a specific subset from ident_of_interest
+#' @param genes_of_interest Genes to visualize on the violin plot
+#' @param filter_outliers Filter outliers from the data (TRUE/FALSE)
+#' @param scale_data Scale data from 0 to 1 (TRUE/FALSE)
+#' @param log_scale_data Transform data onto a log10 scale (TRUE/FALSE)
+#' @param reorder_ident Numeric data will be ordered naturally by default. Toggling this option will order the groups to match the group list if non-numeric, and will have no effect if otherwise.
+#' @param rename_ident Give alternative names to ident_of_interest displayed on the graph
+#' @param ylimit Y-axis limit
+#' @param plot_style Choose between grid, labeled, and row
+#' @param outlier_low_lim Filter lower bound outliers (default = bottom 10 percentile)
+#' @param outlier_up_lim Filter upper bound outliers (default = bottom 90 percentile)
+#' @param jitter_points Scatter points on the plot (TRUE/FALSE)
+#' @param jitter_width Set spread of jittered points 
+#' @param jitter_dot_size Set size of individual points
+#' @param print_outliers Print outliers as points in your graph that may be redundant to jitter 
+
+#' @import Seurat 
+#' @import reshape2
+#' @import cowplot
+#' @import rlang
+#' @import ggplot2
+#'   
+#' @export
+
+#' @return violin ggplot2 object
+
+ViolinPlot <- function(so, 
+                       assay = "SCT", 
+                       slot = "scale.data", 
+                       ident_of_interest, 
+                       groups_of_interest, 
+                       genes_of_interest,
+                       filter_outliers = FALSE, 
+                       scale_data = TRUE, 
+                       log_scale_data = FALSE, 
+                       reorder_ident = TRUE,
+                       rename_ident = "", 
+                       ylimit = 0, 
+                       plot_style = "grid", 
+                       outlier_low_lim = 0.1, 
+                       outlier_up_lim = 0.9, 
+                       jitter_points = FALSE,
+                       jitter_width = 0.05, 
+                       jitter_dot_size = 0.5, 
+                       print_outliers = TRUE){
+
+  ## ---------------##
+  ## Error Messages ##
+  ## ---------------##
+  
+  gene_filter <- genes_of_interest %in% rownames(x = GetAssayData(object = so, slot = slot, assay = assay))
+  genes_of_interest <- genes_of_interest[gene_filter]
+  missing_genes <- genes_of_interest[!gene_filter]
+  if(length(missing_genes) > 0){
+    cat("The following genes are missing from the dataset:\n")
+    print(missing_genes)
+  }
+  
+  if(length(genes_of_interest) == 0){
+    stop("No query genes were found in the dataset.")
+  }
+  
+  if(!ident_of_interest %in% colnames(so@meta.data)){
+    colnames(so@meta.data) <- gsub("\\.","_",colnames(so@meta.data))
+    if(!ident_of_interest %in% colnames(so@meta.data)){
+      stop("Unable to find ident of interest in metadata.")
+    }
+  }
+  
+  group_filter <- groups_of_interest %in% so@meta.data[[ident_of_interest]]
+  groups_of_interest <- groups_of_interest[group_filter]
+  missing_groups <- groups_of_interest[!group_filter]
+  if(length(missing_groups) > 0){
+    cat("The following groups are missing from the selected ident:\n")
+    print(missing_groups)
+  }
+  
+  if(length(groups_of_interest) == 0){
+    stop("No groups were found in the selected ident.")
+  }
+  
+  if(rename_ident %in% c("Gene","Expression","scaled")){
+    stop("New ident name cannot be one of Gene, Expression, or scaled.")
+  }
+  
+  ## ---------------- ##
+  ## Helper Functions ##
+  ## ---------------- ##
+  
+  splitFacet <- function(x){
+    facet_vars <- names(x$facet$params$facets)         # 1
+    x$facet    <- ggplot2::ggplot()$facet              # 2
+    datasets   <- split(x$data, x$data[facet_vars])    # 3
+    new_plots  <- lapply(datasets,function(new_data) { # 4
+      x$data <- new_data
+      x})
+  }
+  
+  # from rapportools
+  is.empty <- function(x, trim = TRUE, ...) {
+    if (length(x) <= 1) {
+      if (is.null(x))
+        return (TRUE)
+      if (length(x) == 0)
+        return (TRUE)
+      if (is.na(x) || is.nan(x))
+        return (TRUE)
+      if (is.character(x) && nchar(ifelse(trim, trim.space(x), x)) == 0)
+        return (TRUE)
+      if (is.logical(x) && !isTRUE(x))
+        return (TRUE)
+      if (is.numeric(x) && x == 0)
+        return (TRUE)
+      return (FALSE)
+    } else
+      sapply(x, is.empty, trim = trim, ...)
+  }
+  
+  # from rapportools
+  trim.space <- function(x, what = c('both', 'leading', 'trailing', 'none'), space.regex = '[:space:]', ...){
+    if (missing(x))
+      stop('nothing to trim spaces to =(')
+    re <- switch(match.arg(what),
+                 both     = sprintf('^[%s]+|[%s]+$', space.regex, space.regex),
+                 leading  = sprintf('^[%s]+', space.regex),
+                 trailing = sprintf('[%s]+$', space.regex),
+                 none     = {
+                   return (x)
+                 })
+    vgsub(re, '', x, ...)
+  }
+  
+  # from rapportools
+  vgsub <- function(pattern, replacement, x, ...){
+    for(i in 1:length(pattern))
+      x <- gsub(pattern[i], replacement[i], x, ...)
+    x
+  }
+  
+  remove_outliers_func <- function(x, na.rm = TRUE){
+    qnt <- quantile(x, probs=c(outlier_low_lim,outlier_up_lim), na.rm = na.rm)
+    H <- 1.5*IQR(x, na.rm = na.rm)
+    y <- x
+    y[x < (qnt[1] - H)] <- NA
+    y[x > (qnt[2] + H)] <- NA
+    y
+  }
+  
+  ## --------------- ##
+  ## Main Code Block ##
+  ## --------------- ##
+  
+  # deal with limits
+  if(ylimit == 0){
+    ylimit <- NULL
+  }
+  
+  #set idents
+  if("active.ident" %in% slotNames(so)){
+    new_idents = as.factor(so@meta.data[[ident_of_interest]])
+    names(new_idents) = names(so@active.ident)
+    so@active.ident <- as.factor(vector())
+    so@active.ident <- new_idents
+    so.sub = subset(so, ident = groups_of_interest)
+  } else {
+    new_idents = as.factor(so@meta.data[[ident_of_interest]])
+    names(sample_name)=so@meta.data[["Barcode"]]
+    so@active.ident <- as.factor(vector())
+    so@active.ident <- new_idents
+    so.sub = subset(so, ident = groups_of_interest)
+  }
+  
+  DefaultAssay(object = so) <- assay
+  data <- FetchData(object = so.sub, vars = genes_of_interest, slot = slot)
+  
+  data[[ident_of_interest]] <- so.sub@meta.data[row.names(data),ident_of_interest]
+  
+  df.melt <- melt(data)
+  
+  if(!is.empty(rename_ident)){
+    ident_of_interest <- rename_ident
+  }
+  colnames(df.melt) <- c(ident_of_interest,"Gene","Expression")
+  
+  #check to see if ident of interest looks numeric
+  if(suppressWarnings(all(!is.na(as.numeric(as.character(df.melt[[ident_of_interest]])))))){
+    ident.values <- strtoi(df.melt[[ident_of_interest]])
+    ident.levels <- unique(ident.values)[order(unique(ident.values))]
+    df.melt[[ident_of_interest]] <- factor(ident.values, levels = ident.levels)
+  }else if(reorder_ident){
+    # if non-numeric, place in order of groups of interests
+    df.melt[[ident_of_interest]] <- factor(df.melt[[ident_of_interest]], levels = groups_of_interest)        
+  }
+  
+  # Filter outliers
+  if(filter_outliers){
+    for(gene in genes_of_interest){
+      for(group in groups_of_interest){
+        current.ind <- which(df.melt[["Gene"]] == gene & df.melt[[ident_of_interest]] == group)
+        df.melt[current.ind,"Expression"] <- remove_outliers_func(df.melt[current.ind,"Expression", drop = TRUE])
+      }
+    }
+  }
+  
+  # Scale data to y limit
+  if(scale_data){
+    expression_data = "scaled"
+    axis.title.y = "Expression (scaled)"
+    ylimit <- ylimit %||% 1
+    df.melt <- df.melt %>% group_by(Gene) %>% mutate(scaled = scales::rescale(Expression, to=c(0,ylimit)))
+  }else{
+    expression_data <- axis.title.y <- "Expression"
+  }
+  
+  g <- ggplot(df.melt, aes_string(x=ident_of_interest, y=expression_data)) +
+    geom_violin(aes_string(fill = ident_of_interest), scale="width", trim = FALSE, show.legend = FALSE) + 
+    #geom_jitter(height = 0, width = 0.05, size=0.1) +
+    theme_classic() + 
+    #scale_fill_manual(values=cols) + 
+    labs(y=axis.title.y) +
+    theme(strip.text.y = element_text( 
+      color="blue", face="bold.italic", angle = -90))
+  
+  
+  if(!is.null(ylimit)){
+    g <- g + ylim(0,ylimit)
+  }
+  
+  if(jitter_points){
+    g <- g + geom_jitter(height = 0, width = jitter_width, size=jitter_dot_size)
+  }
+  
+  if(log_scale_data){
+    g <- g + scale_y_log10()
+  }
+  
+  # Plot after jitter if wanted
+  g <- g + geom_boxplot(width=0.1, fill="white", outlier.shape = ifelse(print_outliers,19,NA))
+  
+  # Plot styles
+  ncol = ceiling(length(unique(df.melt$Gene))^0.5)
+  nrow = ceiling(length(unique(df.melt$Gene)) / ncol)
+  if(plot_style == "rows"){
+    g <- g + facet_grid(rows = vars(Gene))
+  }else{
+    g <- g + facet_wrap(~Gene, nrow = nrow, ncol = ncol)
+    if(plot_style == "labeled"){
+      plots <- splitFacet(g)
+      plots <- lapply(seq_along(plots), function(i) plots[[i]] + ggtitle(genes_of_interest[i]) + theme(plot.title = element_text(hjust = 0.5)) )
+      g <- plot_grid(plotlist = plots, nrow = nrow, ncol = ncol, labels = LETTERS[seq( from = 1, to = length(plots) )])
+    }
+  }
+  
+  return(g)
+}
diff --git a/SCWorkflow.Rproj b/SCWorkflow.Rproj
new file mode 100644
index 0000000..21a4da0
--- /dev/null
+++ b/SCWorkflow.Rproj
@@ -0,0 +1,17 @@
+Version: 1.0
+
+RestoreWorkspace: Default
+SaveWorkspace: Default
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source
diff --git a/tests/testthat/fixtures/MS_Levels_demo.csv b/tests/testthat/fixtures/MS_Levels_demo.csv
new file mode 100644
index 0000000..c066f09
--- /dev/null
+++ b/tests/testthat/fixtures/MS_Levels_demo.csv
@@ -0,0 +1,3 @@
+﻿level_1,level_2
+Pan_Tcells-CD4,CD4-Tregs
+Pan_Tcells-CD8,
diff --git a/tests/testthat/fixtures/Marker_Table_demo.csv b/tests/testthat/fixtures/Marker_Table_demo.csv
new file mode 100644
index 0000000..793ce1f
--- /dev/null
+++ b/tests/testthat/fixtures/Marker_Table_demo.csv
@@ -0,0 +1,19 @@
+﻿CD8_T,CD4_T,Tregs,Macrophages,M1,M2,Neutrophils,Monocytes,cDCs,pDC,B_cells,NKs
+Cd3e,Cd3e,Cd3e,Itgam,Cd38,Cd38_neg,Ly6g,Itgam,Ly6g_neg,Ccr9,Cd79a,Cd3e_neg
+Cd8a,Cd4,Cd4,Ly6g_neg,Mrc1_neg,Mrc1,Ly6c1_neg,Ly6c1,Adgre1_neg,Siglech,Fcmr,Ncr1
+Cd3d,Foxp3_neg,Foxp3,Adgre1,Nos2,Cd163,Adgre1_neg,Ly6g_neg,Itgax,Cox6a2,,Ifng
+Cd3g,Cd3d,Cd3d,Cd68,Sell_neg,,S100a9,Vcan,Cd209a,Klk1,,Klrb1c
+Cd4_neg,Cd3g,Cd8a_neg,Siglec1,Cd40,,S100a8,Fn1,Cd24a,,,
+Foxp3_neg,Cd8a_neg,Il2ra,Csf1r,Cd86,,Il1b,Ccr2,H2-Ab1,,,
+Gzmk,Prf1_neg,,Apoe,,,G0s2,Csf1r,,,,
+,,,Mafb,,,Csf3r,Mafb,,,,
+,,,Cx3cr1,,,,Nr4a1,,,,
+,,,Nr4a1_neg,,,,Lgals3,,,,
+,,,F13a1,,,,,,,,
+,,,Lgals3,,,,,,,,
+,,,Cxcl9,,,,,,,,
+,,,Cxcl10,,,,,,,,
+,,,Sell_neg,,,,,,,,
+,,,Arg1,,,,,,,,
+,,,Mmp13,,,,,,,,
+,,,Mmp12,,,,,,,,
diff --git a/tests/testthat/test-Color_by_Genes_Automatic.R b/tests/testthat/test-Color_by_Genes_Automatic.R
new file mode 100644
index 0000000..2a3a810
--- /dev/null
+++ b/tests/testthat/test-Color_by_Genes_Automatic.R
@@ -0,0 +1,14 @@
+test_that("Color by Genes Automatic returns correct class of figures", {
+  
+  so_demo <- readRDS(test_path("fixtures", "SO_moduleScore.rds"))
+  marker_tab_demo <- read.csv(test_path("fixtures", "Marker_Table_demo.csv"))
+  
+  Cbg_demo <- color_by_genes(SO = so_demo, 
+                             samples_to_include = c("1_E13","2_E15","3_Newborn","4_Adult"), 
+                             samples_to_display = c("1_E13","2_E15","3_Newborn","4_Adult"), 
+                             marker_list = marker_tab_demo,
+                             cells_of_interest = c("CD8_T","CD4_T","Tregs","Macrophages"))
+  
+  expected_elements <- c("gtable", "gTree", "grob", "gDesc")
+  expect_setequal(class(Cbg_demo), expected_elements)
+})
diff --git a/tests/testthat/test-Harmony_Batch_Correction.R b/tests/testthat/test-Harmony_Batch_Correction.R
new file mode 100644
index 0000000..0ec15fe
--- /dev/null
+++ b/tests/testthat/test-Harmony_Batch_Correction.R
@@ -0,0 +1,11 @@
+test_that("Harmony returns seurat object with adjusted embeddings", {
+  
+  so_demo <- readRDS(test_path("fixtures", "SO_moduleScore.rds"))
+  
+  so_harmonized <- harmony_batch_correct(so = so_demo,
+                        genes_to_add = c("Epcam"),
+                        group.by.vars = c("orig.ident"))
+  
+  expected_elements = c("pca","umap", "tsne", "harmony")
+  expect_setequal(names(so_harmonized@reductions), expected_elements)
+})
diff --git a/tests/testthat/test-ModuleScore.R b/tests/testthat/test-ModuleScore.R
new file mode 100644
index 0000000..8807da5
--- /dev/null
+++ b/tests/testthat/test-ModuleScore.R
@@ -0,0 +1,21 @@
+test_that("ModuleScore returns metadata with scores and cell calls", {
+  
+  so_demo <- readRDS(test_path("fixtures", "SO_moduleScore.rds"))
+  marker_tab_demo <- read.csv(test_path("fixtures", "Marker_Table_demo.csv"))
+  levels_df_demo = read.csv(test_path("fixtures", "MS_Levels_demo.csv"))
+  
+  modscore_demo <- ModuleScore(SO = so_demo, 
+              sample_names = c("1_E13","2_E15","3_Newborn","4_Adult"),
+              sample_to_display <- c("1_E13","2_E15","3_Newborn","4_Adult"),
+              geneset_dataframe = marker_tab_demo,
+              proteins_presence <- FALSE,
+              celltypes_to_analyze <- c("CD8_T","CD4_T","Tregs","Macrophages","M1","M2","Neutrophils","Monocytes","cDCs","pDC","B_cells","NKs"),
+              manual_threshold <- c(0),
+              general_class <- c("CD8_T","CD4_T","Tregs","Macrophages","M1","M2","Neutrophils","Monocytes","cDCs","pDC","B_cells","NKs"),
+              multi_level_class <- FALSE,
+              levels_dataframe = levels_df_demo,
+              nbins = 10)
+  
+  expected_elements <- c("Likely_CellType","CD8_T","CD4_T","Tregs","Macrophages","M1","Neutrophils","Monocytes","cDCs","pDC")
+  expect_setequal(colnames(modscore_demo[[2]]@meta.data)[30:39], expected_elements)
+})
diff --git a/tests/testthat/test-Pseudobulk_DEG.R b/tests/testthat/test-Pseudobulk_DEG.R
new file mode 100644
index 0000000..ae57e65
--- /dev/null
+++ b/tests/testthat/test-Pseudobulk_DEG.R
@@ -0,0 +1,11 @@
+test_that("Pseudobulk DEG returns table of differentially expressed genes", {
+  so_demo <- readRDS(test_path("fixtures", "SO_moduleScore.rds"))
+  
+  pseudo_res <- Pseudobulk_DEG(so = so_demo, contrasts = c("CD4_T-Tregs"), replicate = 'orig.ident', comparison_level = 'Likely_CellType',
+                 label = "Likely_CellType")
+  
+  expected_elements = c("Gene","cell_type","groupCD4_T-groupTregs_FC","groupCD4_T-groupTregs_logFC",
+                        "groupCD4_T-groupTregs_tstat","groupCD4_T-groupTregs_pval",
+                        "groupCD4_T-groupTregs_adjpval","de_family","de_method","de_type")
+  expect_setequal(colnames(pseudo_res), expected_elements)
+})
diff --git a/tests/testthat/test-Violin_Plots_by_Metadata.R b/tests/testthat/test-Violin_Plots_by_Metadata.R
new file mode 100644
index 0000000..901d8a0
--- /dev/null
+++ b/tests/testthat/test-Violin_Plots_by_Metadata.R
@@ -0,0 +1,12 @@
+test_that("Violin plot returns ggplot object", {
+  
+  so_demo <- readRDS(test_path("fixtures", "SO_moduleScore.rds"))
+  genes_demo <- read.csv(test_path("fixtures", "Marker_Table_demo.csv"))[,1][1:4]
+  
+  violin_res <- ViolinPlot(so = so_demo, ident_of_interest = "orig_ident", 
+             groups_of_interest = c("1_E13","2_E15","3_Newborn","4_Adult"),
+             genes_of_interest = genes_demo)
+  
+  expected_elements = c("gg","ggplot")
+  expect_setequal(class(violin_res), expected_elements)
+})