From 77522f640e4d555c2d0ab26d74f70f8bc983017f Mon Sep 17 00:00:00 2001 From: gabrielodom Date: Tue, 29 Mar 2022 17:07:39 -0400 Subject: [PATCH] Making progress on Issue #14; also related to Issue #18 and Issue #19 --- R/lmmTest.R | 41 +++++++++++++++++++++++---------------- R/lmmTestAllRegions.R | 45 +++++++++++++++++++++++++++---------------- 2 files changed, 52 insertions(+), 34 deletions(-) diff --git a/R/lmmTest.R b/R/lmmTest.R index 00c04d5..d319ca8 100644 --- a/R/lmmTest.R +++ b/R/lmmTest.R @@ -80,11 +80,13 @@ lmmTest <- function(betaOne_df, pheno_df, contPheno_char, covariates_char, outLogFile = NULL){ # browser() + ### Inputs ### modelType <- match.arg(modelType) arrayType <- match.arg(arrayType) genome <- match.arg(genome) - ### Transpose betaOne_df from wide to long ### + + ### Wrangle ### betaOne_df$ProbeID <- row.names(betaOne_df) betaOneTransp_df <- reshape( betaOne_df, @@ -95,15 +97,16 @@ lmmTest <- function(betaOne_df, pheno_df, contPheno_char, covariates_char, timevar = "Sample" ) - ### Calculate M values ### + # Calculate M values betaOneTransp_df$Mvalue <- log2( betaOneTransp_df$beta / (1 - betaOneTransp_df$beta) ) - ### Merge transposed beta matrix with phenotype ### + # Merge transposed beta matrix with phenotype betaOnePheno_df <- merge(betaOneTransp_df, pheno_df, by = "Sample") - # regionNames + + ### Setup ### regionName <- NameRegion( OrderCpGsByLocation( CpGs_char = betaOne_df$ProbeID, @@ -114,17 +117,22 @@ lmmTest <- function(betaOne_df, pheno_df, contPheno_char, covariates_char, output = "dataframe" ) ) - - modelFormula_char <- .MakeLmmFormula( - contPheno_char, covariates_char, modelType - ) - + if(!is.null(outLogFile)){ cat(paste0("Analyzing region ", regionName, ". \n")) } else { message("Analyzing region ", regionName, ". \n") } + modelFormula_char <- .MakeLmmFormula( + contPheno_char = contPheno_char, + covariates_char = covariates_char, + modelType = modelType + ) + + + ### Analysis ### + # Run f <- tryCatch( { suppressMessages( @@ -134,7 +142,7 @@ lmmTest <- function(betaOne_df, pheno_df, contPheno_char, covariates_char, error = function(e){NULL} ) - + # Check if(is.null(f)){ ps_df <- data.frame( @@ -170,22 +178,21 @@ lmmTest <- function(betaOne_df, pheno_df, contPheno_char, covariates_char, } - ### split regionName into chrom, start, end + + ### Return ### + # split regionName into chrom, start, end chrom <- sub(":.*", "", regionName) range <- sub("c.*:", "", regionName) start <- sub("-\\d*", "", range) end <- sub("\\d*.-", "", range) - ### Return results ### + # results nCpGs <- nrow(betaOne_df) - result <- cbind( - chrom, start, end, nCpGs, - ps_df, + cbind( + chrom, start, end, nCpGs, ps_df, stringsAsFactors = FALSE ) - result - } diff --git a/R/lmmTestAllRegions.R b/R/lmmTestAllRegions.R index 516171d..b593b46 100644 --- a/R/lmmTestAllRegions.R +++ b/R/lmmTestAllRegions.R @@ -119,14 +119,22 @@ lmmTestAllRegions <- function( nCores_int = 1L, ...){ # browser() - - warnLvl <- options()$warn - options(warn = 1) - - ### Setup ### + + + ### Inputs ### modelType <- match.arg(modelType) - arrayType <- match.arg(arrayType) + + # Available manifest files are: + # "EPIC.hg19.manifest" "EPIC.hg38.manifest" + # "HM450.hg19.manifest" "HM450.hg38.manifest" genome <- match.arg(genome) + arrayType <- match.arg(arrayType) + manifest <- paste( + switch(arrayType, "450k" = "HM450", "EPIC" = "EPIC"), + genome, "manifest", + sep = "." + ) + CpGlocations.gr <- ImportSesameData(manifest) if (is(betas, "matrix")){ beta_df <- as.data.frame(betas) @@ -141,6 +149,11 @@ lmmTestAllRegions <- function( CpGnames <- rownames(beta_df) + + ### Setup Log File ### + warnLvl <- options()$warn + options(warn = 1) + writeLog_logi <- !is.null(outLogFile) if(writeLog_logi){ @@ -158,13 +171,14 @@ lmmTestAllRegions <- function( } - ### Split Data by Region ### + ### Run mixed model for all the contiguous comethylated regions ### + # Split Data by Region coMethBetaDF_ls <- lapply( region_ls, function(x) beta_df[which(CpGnames %in% x), ] ) - - ### Run mixed model for all the contiguous comethylated regions ### + + # Apply cluster <- CreateParallelWorkers(nCores_int, ...) results_ls <- bplapply( @@ -177,11 +191,14 @@ lmmTestAllRegions <- function( modelType, genome, arrayType, - manifest_gr = NULL, + manifest_gr = CpGlocations.gr, ignoreStrand, outLogFile ) + + ### Close Log File ### + options(warn = warnLvl) if(writeLog_logi){ cat("\n") @@ -210,8 +227,7 @@ lmmTestAllRegions <- function( } - - ### Output results ### + ### Output results ### if (length(results_ls) > 0){ outDF <- do.call(rbind, results_ls) @@ -220,13 +236,8 @@ lmmTestAllRegions <- function( } - - options(warn = warnLvl) - if (is.null(outFile)){ - outDF - } else { message("writing results to ", outFile,".csv")