From 4055b7f4c1131ea1aecfd22c79b7f12f0b1efecf Mon Sep 17 00:00:00 2001 From: ttriche Date: Wed, 22 Feb 2017 12:06:28 -0800 Subject: [PATCH 01/12] add noobPipeline(rgSet) function to make my life easier --- DESCRIPTION | 2 +- R/preprocessNoob.R | 10 +++++++ inst/extData/GM12878.csv | 59 ++++++++++++++++++++++++++++++++++++++++ man/preprocessNoob.Rd | 8 +++++- 4 files changed, 77 insertions(+), 2 deletions(-) create mode 100644 inst/extData/GM12878.csv diff --git a/DESCRIPTION b/DESCRIPTION index 099c9e4..2f43211 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: minfi -Version: 1.21.5 +Version: 1.21.6 Title: Analyze Illumina Infinium DNA methylation arrays Description: Tools to analyze & visualize Illumina Infinium methylation arrays. Authors@R: c(person(c("Kasper", "Daniel"), "Hansen", role = c("cre", "aut"), diff --git a/R/preprocessNoob.R b/R/preprocessNoob.R index 7d04740..6099044 100644 --- a/R/preprocessNoob.R +++ b/R/preprocessNoob.R @@ -204,3 +204,13 @@ normexp.get.xcs <- function(xcf, params){ xcf[,i] <- normexp.signal(as.numeric((pars[i,])), xcf[,i] ) # from limma return( xcf + params[[grep('offset', names(params), value=TRUE)]][1] ) } + +# a "best practices" baseline for reading IDATs +noobPipeline <- function(rgSet, pCutoff=0.01) { + pvals <- detectionP(rgSet) + grSet <- mapToGenome(ratioConvert(preprocessNoob(rgSet))) + is.na(assays(grSet)$Beta) <- pval[rownames(grSet),] >= pCutoff + grSet$predictedSex <- getSex(grSet)$predictedSex + metadata(grSet)$SNPs <- getSnpBeta(rgSet) + return(grSet) +} diff --git a/inst/extData/GM12878.csv b/inst/extData/GM12878.csv new file mode 100644 index 0000000..08ea5d8 --- /dev/null +++ b/inst/extData/GM12878.csv @@ -0,0 +1,59 @@ +"","seqnames","start","end","width","strand","score","genotype","ref","alt","U","M" +"rs3936238","chr1",4031586,4031586,1,"*",3,"A:A","A","G","G","A" +"rs877309","chr1",11412265,11412265,1,"*",2,"A:G","A","G","A","G" +"rs213028","chr1",21524764,21524764,1,"*",2,"C:T","C","T","C","T" +"rs11249206","chr1",25150569,25150569,1,"*",1,"T:T","C","T","T","C" +"rs654498","chr1",81945636,81945636,1,"*",1,"C:C","C","T","C","T" +"rs715359","chr1",175697791,175697791,1,"*",1,"C:C","T","C","C","T" +"rs2804694","chr1",179598456,179598456,1,"*",1,"A:A","G","A","A","G" +"rs6426327","chr1",244156434,244156434,1,"*",1,"A:A","G","A","A","G" +"rs966367","chr2",12065671,12065671,1,"*",3,"C:C","C","T","T","C" +"rs4331560","chr2",48938177,48938177,1,"*",3,"G:G","A","G","A","G" +"rs1510480","chr2",60678945,60678945,1,"*",2,"A:G","G","A","A","G" +"rs6546473","chr2",69113861,69113861,1,"*",1,"A:A","A","G","A","G" +"rs264581","chr2",159679609,159679609,1,"*",1,"A:A","A","G","A","G" +"rs939290","chr3",14633870,14633870,1,"*",3,"C:C","T","C","T","C" +"rs9839873","chr3",86744845,86744845,1,"*",3,"C:C","T","C","T","C" +"rs1520670","chr3",100455616,100455616,1,"*",3,"A:A","A","G","G","A" +"rs10936224","chr3",162385225,162385225,1,"*",2,"A:G","G","A",NA,NA +"rs10033147","chr4",13966228,13966228,1,"*",1,NA,"A","G",NA,NA +"rs2125573","chr4",128827282,128827282,1,"*",2,"T:C","T","C",NA,NA +"rs7660805","chr4",131263500,131263500,1,"*",1,"A:A","G","A","A","G" +"rs9292570","chr5",35036069,35036069,1,"*",3,"C:C","T","C","T","C" +"rs348937","chr5",112853576,112853576,1,"*",1,"T:T","C","T","T","C" +"rs1019916","chr5",146628552,146628552,1,"*",2,"G:A","G","A",NA,NA +"rs7746156","chr6",47238778,47238778,1,"*",2,"C:T","T","C",NA,NA +"rs9363764","chr6",68288763,68288763,1,"*",2,"G:A","G","A",NA,NA +"rs10457834","chr6",149084493,149084493,1,"*",3,"C:C","T","C","T","C" +"rs6982811","chr8",36666471,36666471,1,"*",1,NA,"T","C",NA,NA +"rs1484127","chr8",51888207,51888207,1,"*",1,"A:A","G","A","A","G" +"rs6471533","chr8",96539476,96539476,1,"*",2,"G:A","G","A",NA,NA +"rs472920","chr8",96917920,96917920,1,"*",1,NA,"A","G",NA,NA +"rs6991394","chr8",121851583,121851583,1,"*",3,NA,"C","T",NA,NA +"rs2385226","chr8",126751178,126751178,1,"*",2,"T:C","C","T",NA,NA +"rs4742386","chr9",7701758,7701758,1,"*",1,NA,"A","G",NA,NA +"rs1040870","chr9",84605357,84605357,1,"*",2,"T:C","C","T",NA,NA +"rs10796216","chr10",14731895,14731895,1,"*",3,"A:A","A","G","G","A" +"rs10882854","chr10",98629087,98629087,1,"*",1,"C:C","T","C","C","T" +"rs11034952","chr11",38745191,38745191,1,"*",2,"C:T","T","C","C","T" +"rs1945975","chr11",104804934,104804934,1,"*",2,"T:C","C","A","A","C" +"rs2468330","chr12",41484989,41484989,1,"*",2,"A:G","G","A","A","G" +"rs1495031","chr12",61784403,61784403,1,"*",2,"T:C","C","T","T","C" +"rs951295","chr15",43787115,43787115,1,"*",1,"G:G","A","G","G","A" +"rs2959823","chr15",74200584,74200584,1,"*",2,"A:G","G","A","A","G" +"rs1510189","chr16",54658025,54658025,1,"*",1,"T:T","T","C","T","C" +"rs1941955","chr18",33416836,33416836,1,"*",1,"T:T","T","C","T","C" +"rs2235751","chr20",1917934,1917934,1,"*",2,"A:G","A","G","A","G" +"rs845016","chr21",32920155,32920155,1,"*",3,"C:C","C","T","T","C" +"rs2032088","chr21",37399200,37399200,1,"*",1,"A:A","G","A","A","G" +"rs1467387","chr22",24261372,24261372,1,"*",2,"T:C","T","C","T","C" +"rs133860","chr22",24474760,24474760,1,"*",2,"T:C","C","T","T","C" +"rs739259","chr22",25686579,25686579,1,"*",2,"A:G","A","G","A","G" +"rs2857639","chr22",28385674,28385674,1,"*",2,"A:G","A","G","A","G" +"rs2208123","chr22",46593476,46593476,1,"*",3,"A:A","A","G","G","A" +"rs2521373","chrX",9436990,9436990,1,"*",2,"A:G","A","G",NA,NA +"rs798149","chrX",15795352,15795352,1,"*",3,"G:G","A","G","A","G" +"rs5926356","chrX",28157252,28157252,1,"*",2,"A:G","A","G",NA,NA +"rs5987737","chrX",114616977,114616977,1,"*",1,NA,"T","C",NA,NA +"rs1416770","chrX",145026857,145026857,1,"*",2,"T:C","T","C",NA,NA +"rs6626309","chrX",147012065,147012065,1,"*",3,NA,"C","T",NA,NA diff --git a/man/preprocessNoob.Rd b/man/preprocessNoob.Rd index 79c5855..d3d76b2 100644 --- a/man/preprocessNoob.Rd +++ b/man/preprocessNoob.Rd @@ -1,5 +1,6 @@ \name{preprocessNoob} \alias{preprocessNoob} +\alias{noobPipeline} \title{ The Noob/ssNoob preprocessing method for Infinium methylation microarrays. } @@ -10,6 +11,8 @@ \usage{ preprocessNoob(rgSet, offset = 15, dyeCorr = TRUE, verbose = TRUE, dyeMethod=c("single", "reference")) + +noobPipeline(rgSet, pCutoff = 0.01) } \arguments{ \item{rgSet}{An object of class \code{RGChannelSet}.} @@ -17,9 +20,11 @@ preprocessNoob(rgSet, offset = 15, dyeCorr = TRUE, verbose = TRUE, \item{dyeCorr}{Should dye correction be done?} \item{verbose}{Should the function be verbose?} \item{dyeMethod}{How should dye bias correction be done: use a single sample approach (ssNoob), or a reference array?} + \item{pCutoff}{(for noobPipeline) Above what detection pvalue should probes be marked NA? (0.01)} } \value{ - An object of class \code{MethylSet}. + For preprocessNoob, an object of class \code{MethylSet}. + For noobPipeline, an object of class \code{GenomicRatioSet} with SNP metadata. } \references{ TJ Triche, DJ Weisenberger, D Van Den Berg, PW Laird and KD Siegmund @@ -42,6 +47,7 @@ if (require(minfiData)) { ## RGsetEx.sub is a small subset of RGsetEx; ## only used for computational speed. MsetEx.sub.noob <- preprocessNoob(RGsetEx.sub) + grSet.sub.noob <- noobPipeline(RGsetEx.sub) } \dontrun{ if (require(minfiData)) { From a6f96a5818668a63639f05fe2843ac578f8dd8e9 Mon Sep 17 00:00:00 2001 From: ttriche Date: Wed, 22 Feb 2017 12:15:35 -0800 Subject: [PATCH 02/12] fix NAMESPACE to export noobPipeline() --- NAMESPACE | 2 +- R/preprocessNoob.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 8b7ee4f..6b3b9ca 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -57,7 +57,7 @@ export(getManifest, getProbeInfo, getManifestInfo, normalize.illumina.control, bgcorrect.illumina, preprocessRaw, preprocessIllumina, preprocessSWAN, preprocessQuantile, - preprocessFunnorm, preprocessNoob, + preprocessFunnorm, preprocessNoob, noobPipeline, RGChannelSet, RGChannelSetExtended, MethylSet, RatioSet, GenomicMethylSet, GenomicRatioSet, diff --git a/R/preprocessNoob.R b/R/preprocessNoob.R index 6099044..d04289e 100644 --- a/R/preprocessNoob.R +++ b/R/preprocessNoob.R @@ -207,7 +207,7 @@ normexp.get.xcs <- function(xcf, params){ # a "best practices" baseline for reading IDATs noobPipeline <- function(rgSet, pCutoff=0.01) { - pvals <- detectionP(rgSet) + pval <- detectionP(rgSet) grSet <- mapToGenome(ratioConvert(preprocessNoob(rgSet))) is.na(assays(grSet)$Beta) <- pval[rownames(grSet),] >= pCutoff grSet$predictedSex <- getSex(grSet)$predictedSex From 7d9d63015ba8eafb6bd0a413c1337b1d935d8b95 Mon Sep 17 00:00:00 2001 From: ttriche Date: Wed, 22 Feb 2017 12:17:19 -0800 Subject: [PATCH 03/12] don't try predicting sex, as it can fail --- R/preprocessNoob.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/preprocessNoob.R b/R/preprocessNoob.R index d04289e..ff6c66d 100644 --- a/R/preprocessNoob.R +++ b/R/preprocessNoob.R @@ -210,7 +210,6 @@ noobPipeline <- function(rgSet, pCutoff=0.01) { pval <- detectionP(rgSet) grSet <- mapToGenome(ratioConvert(preprocessNoob(rgSet))) is.na(assays(grSet)$Beta) <- pval[rownames(grSet),] >= pCutoff - grSet$predictedSex <- getSex(grSet)$predictedSex metadata(grSet)$SNPs <- getSnpBeta(rgSet) return(grSet) } From 1ac7121754f0e8e514467d240e4adb1a822a4ffb Mon Sep 17 00:00:00 2001 From: ttriche Date: Sun, 9 Apr 2017 11:10:35 -0700 Subject: [PATCH 04/12] adding some rudimentary support for SNP tracking --- DESCRIPTION | 9 +++++---- R/generics.R | 1 + R/grset.R | 17 +++++++++++++++++ 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2f43211..a16e032 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: minfi -Version: 1.21.6 +Version: 1.21.7 Title: Analyze Illumina Infinium DNA methylation arrays Description: Tools to analyze & visualize Illumina Infinium methylation arrays. Authors@R: c(person(c("Kasper", "Daniel"), "Hansen", role = c("cre", "aut"), @@ -12,7 +12,7 @@ Authors@R: c(person(c("Kasper", "Daniel"), "Hansen", role = c("cre", "aut"), person("Jean-Philippe", "Fortin", role = "ctb"), person("Tim", "Triche", role = "ctb"), person(c("Shan", "V."), "Andrews", role = "ctb")) -Depends: methods, +Depends: methods, BiocGenerics (>= 0.15.3), GenomicRanges, SummarizedExperiment (>= 1.1.6), @@ -28,7 +28,7 @@ Suggests: IlluminaHumanMethylation450kmanifest (>= 0.2.0), BiocStyle, knitr, rmarkdown -Imports: S4Vectors, +Imports: S4Vectors, GenomeInfoDb, Biobase (>= 2.33.2), IRanges, @@ -53,7 +53,7 @@ Imports: S4Vectors, grDevices, graphics, utils -Collate: generics.R +Collate: generics.R mset.R rset.R rgset.R @@ -91,3 +91,4 @@ BugReports: https://github.com/kasperdanielhansen/minfi/issues LazyData: yes biocViews: DNAMethylation, DifferentialMethylation, Epigenetics, Microarray, MethylationArray, MultiChannel, TwoChannel, DataImport, Normalization, Preprocessing, QualityControl +RoxygenNote: 6.0.1 diff --git a/R/generics.R b/R/generics.R index db4aa47..f1004d5 100644 --- a/R/generics.R +++ b/R/generics.R @@ -8,5 +8,6 @@ setGeneric("getManifest", function(object) standardGeneric("getManifest")) ## setGeneric("getLocations", function(object, ...) standardGeneric("getLocations")) setGeneric("preprocessMethod", function(object) standardGeneric("preprocessMethod")) setGeneric("getCN", function(object, ...) standardGeneric("getCN")) +setGeneric("getSNPs", function(object) standardGeneric("getSNPs")) setGeneric("convertArray", function(object, ...) standardGeneric("convertArray")) setGeneric("combineArrays", function(object1, object2, ...) standardGeneric("combineArrays")) diff --git a/R/grset.R b/R/grset.R index 66797a8..756a4b9 100644 --- a/R/grset.R +++ b/R/grset.R @@ -70,6 +70,23 @@ setMethod("getCN", signature(object = "GenomicRatioSet"), return(NULL) }) +# FIXME: enforce something a bit less lackadaisical? +setMethod("getSNPs", signature(object = "GenomicRatioSet"), + function (object) { + if (!all(colnames(object) %in% colnames(metadata(object)$SNPs))) { + missed <- setdiff(colnames(object), + colnames(metadata(object)$SNPs)) + stop("Error: columns for ", paste(missed, sep=", "), + " are missing from metadata(object)$SNPs.") + } + if ("SNPs" %in% names(metadata(object))) { + return(metadata(object)$SNPs[, colnames(object)]) + } else { + message("No SNPs found in your GenomicRatioSet's metadata.") + return(NULL) + } + }) + setMethod("mapToGenome", signature(object = "GenomicRatioSet"), function(object, ...) { object From f8a56b4d8556e5c359a452e304ec7ba44addf6b9 Mon Sep 17 00:00:00 2001 From: ttriche Date: Mon, 10 Apr 2017 13:39:33 -0700 Subject: [PATCH 05/12] Check to see if colnames(SNPs) == colnames(grset), else warn --- R/grset.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/R/grset.R b/R/grset.R index 756a4b9..a3e2a4d 100644 --- a/R/grset.R +++ b/R/grset.R @@ -79,10 +79,15 @@ setMethod("getSNPs", signature(object = "GenomicRatioSet"), stop("Error: columns for ", paste(missed, sep=", "), " are missing from metadata(object)$SNPs.") } - if ("SNPs" %in% names(metadata(object))) { + if ("SNPs" %in% names(metadata(object))) { + if (!identical(colnames(object), + colnames(metadata(object)$SNPs))) { + message("colnames(metadata(object)$SNPs) != colnames(object)") + message("You should probably fix this if you use these SNPs.") + } return(metadata(object)$SNPs[, colnames(object)]) } else { - message("No SNPs found in your GenomicRatioSet's metadata.") + message("No SNPs found in your GenomicRatioSet metadata().") return(NULL) } }) From 4509728bc8f25f4fff711df38a9382231eed386e Mon Sep 17 00:00:00 2001 From: ttriche Date: Tue, 11 Apr 2017 18:51:12 -0700 Subject: [PATCH 06/12] expose getSNPs() method --- NAMESPACE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 0f4f51d..b6213ce 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -48,7 +48,7 @@ export(getManifest, getProbeInfo, getManifestInfo, IlluminaMethylationManifest, IlluminaMethylationAnnotation, getControlAddress, getRed, getGreen, getNBeads, getMeth, getUnmeth, - getBeta, getM, getCN, getMethSignal, + getBeta, getM, getCN, getSNPs, getMethSignal, getOOB, getSnpBeta, dropMethylationLoci, getLocations, getAnnotation, getAnnotationObject, From 09d9f40d00eb2daf92d415972a333c7f8e56a267 Mon Sep 17 00:00:00 2001 From: ttriche Date: Wed, 12 Apr 2017 15:41:55 -0700 Subject: [PATCH 07/12] add getSNPs method for RGChannelSet and export it --- NAMESPACE | 5 +++-- R/rgset.R | 6 ++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index b6213ce..d53e137 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -80,8 +80,9 @@ exportClasses(RGChannelSet, RGChannelSetExtended, GenomicMethylSet, GenomicRatioSet, IlluminaMethylationManifest, IlluminaMethylationAnnotation) -exportMethods(show, initialize, getBeta, getM, getCN, getMeth, getUnmeth, - getManifest, annotation, "annotation<-", preprocessMethod, +exportMethods(show, initialize, + getBeta, getM, getCN, getSNPs, getMeth, getUnmeth, getManifest, + annotation, "annotation<-", preprocessMethod, combine, mapToGenome, ratioConvert, bumphunter, pData, "pData<-", sampleNames, "sampleNames<-", featureNames, "featureNames<-", coerce) diff --git a/R/rgset.R b/R/rgset.R index 06d7028..1fa70b1 100644 --- a/R/rgset.R +++ b/R/rgset.R @@ -148,6 +148,12 @@ setMethod("getBeta", signature(object = "RGChannelSet"), callGeneric(object, ...) }) +# wrap getSnpBeta function, for RGChannelSet objects +setMethod("getSNPs", signature(object = "RGChannelSet"), + function (object) { + getSnpBeta(object) + }) + setMethod("mapToGenome", signature(object = "RGChannelSet"), function(object, ...) { object <- preprocessRaw(object) From 1a0c58255f10e3935d540fa35e17cd2455cf9cc6 Mon Sep 17 00:00:00 2001 From: ttriche Date: Sun, 16 Apr 2017 19:35:31 -0700 Subject: [PATCH 08/12] break out noobPipeline into its own function --- DESCRIPTION | 1 + R/noobPipeline.R | 14 ++++++++++++++ R/preprocessNoob.R | 9 --------- 3 files changed, 15 insertions(+), 9 deletions(-) create mode 100644 R/noobPipeline.R diff --git a/DESCRIPTION b/DESCRIPTION index a16e032..caf62e9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -74,6 +74,7 @@ Collate: generics.R preprocessQuantile.R preprocessNoob.R preprocessFunnorm.R + noobPipeline.R qc.R read.450k.R read.meth.R diff --git a/R/noobPipeline.R b/R/noobPipeline.R new file mode 100644 index 0000000..7b28e83 --- /dev/null +++ b/R/noobPipeline.R @@ -0,0 +1,14 @@ +# a "best practices" baseline for most studies +noobPipeline <- function(rgSet, pCutoff=0.01) { + pval <- detectionP(rgSet) + message("Running TCGA-style noob pipeline on ", ncol(rgSet), " samples...") + grSet <- mapToGenome(ratioConvert(preprocessNoob(rgSet))) + message("Masking probes with detection p-value > ", pCutoff, "...") + is.na(assays(grSet)$Beta) <- (pval[rownames(grSet),] >= pCutoff) + message("Dropping redundant \"M\" assay matrix...") + assays(grSet) <- assays(grSet)[c("Beta","CN")] + message("Placing SNP probe betas in metadata(grSet)$SNPs...") + metadata(grSet)$SNPs <- getSnpBeta(rgSet) + message("Done.") + return(grSet) +} diff --git a/R/preprocessNoob.R b/R/preprocessNoob.R index ff6c66d..7d04740 100644 --- a/R/preprocessNoob.R +++ b/R/preprocessNoob.R @@ -204,12 +204,3 @@ normexp.get.xcs <- function(xcf, params){ xcf[,i] <- normexp.signal(as.numeric((pars[i,])), xcf[,i] ) # from limma return( xcf + params[[grep('offset', names(params), value=TRUE)]][1] ) } - -# a "best practices" baseline for reading IDATs -noobPipeline <- function(rgSet, pCutoff=0.01) { - pval <- detectionP(rgSet) - grSet <- mapToGenome(ratioConvert(preprocessNoob(rgSet))) - is.na(assays(grSet)$Beta) <- pval[rownames(grSet),] >= pCutoff - metadata(grSet)$SNPs <- getSnpBeta(rgSet) - return(grSet) -} From 142f42ef2688cb7cfb29552a233aa178de10b0d9 Mon Sep 17 00:00:00 2001 From: ttriche Date: Thu, 20 Apr 2017 12:16:22 -0700 Subject: [PATCH 09/12] add unit tests for noobPipeline and getSNPs --- inst/unitTests/test_noobPipeline.R | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 inst/unitTests/test_noobPipeline.R diff --git a/inst/unitTests/test_noobPipeline.R b/inst/unitTests/test_noobPipeline.R new file mode 100644 index 0000000..5f18441 --- /dev/null +++ b/inst/unitTests/test_noobPipeline.R @@ -0,0 +1,7 @@ +test_noobPipeline <- function() { + stopifnot(require(minfiData)) + grSetEx <- noobPipeline(updateObject(RGsetEx)) + checkIdentical(getSNPs(grSetEx), getSNPs(updateObject(RGsetEx))) + checkIdentical(preprocessMethod(grSetEx), + c(mu.norm="Noob, dyeCorr=TRUE, dyeMethod=single")) +} From c3f1dca5e2c420164e181e5048b3de59e5243961 Mon Sep 17 00:00:00 2001 From: ttriche Date: Thu, 20 Apr 2017 12:51:30 -0700 Subject: [PATCH 10/12] add documentation for noobPipeline and default to TCGA settings (p>0.05) --- R/noobPipeline.R | 2 +- man/noobPipeline.Rd | 45 +++++++++++++++++++++++++++++++++++++++++++ man/preprocessNoob.Rd | 2 +- 3 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 man/noobPipeline.Rd diff --git a/R/noobPipeline.R b/R/noobPipeline.R index 7b28e83..bf5e357 100644 --- a/R/noobPipeline.R +++ b/R/noobPipeline.R @@ -1,5 +1,5 @@ # a "best practices" baseline for most studies -noobPipeline <- function(rgSet, pCutoff=0.01) { +noobPipeline <- function(rgSet, pCutoff=0.05) { pval <- detectionP(rgSet) message("Running TCGA-style noob pipeline on ", ncol(rgSet), " samples...") grSet <- mapToGenome(ratioConvert(preprocessNoob(rgSet))) diff --git a/man/noobPipeline.Rd b/man/noobPipeline.Rd new file mode 100644 index 0000000..4553a55 --- /dev/null +++ b/man/noobPipeline.Rd @@ -0,0 +1,45 @@ +\name{noobPipeline} +\alias{noobPipeline} +\alias{noobPipeline} +\title{ + A TCGA-style preprocessing pipeline for Infinium methylation microarrays. +} +\description{ + The standard processing pipeline for the Cancer Genome Atlas (TCGA) was + the methylumi implementation of preprocessNoob (which returns identical beta/M + values to the ssNoob implementation now standard in minfi). Probes with + detection p-values > 0.05 were masked as NA, SNP probes for label swap QC + tabulated, and the results uploaded as Level 2 data after certain masking + steps (related to common SNPs and repetitive regions) had been applied. + This function wraps the individual steps to turn an \code{RGChannelSet} into + a \code{GenomicRatioSet} directly comparable to canonical TCGA datasets. +} +\usage{ +noobPipeline(rgSet, pCutoff=0.05) +} +\arguments{ + \item{rgSet}{An object of class \code{RGChannelSet}.} + \item{pCutoff}{Above what detection pvalue should probes be marked NA? (0.05)} +} +\value{ + An object of class \code{GenomicRatioSet} with additional SNP metadata. +} +\references{ + TJ Triche, DJ Weisenberger, D Van Den Berg, PW Laird and KD Siegmund + \emph{Low-level processing of Illumina Infinium DNA Methylation + BeadArrays}. + Nucleic Acids Res (2013) 41, e90. + doi:\href{http://www.dx.doi.org/10.1093/nar/gkt090}{10.1093/nar/gkt090}. +} +\author{ + Tim Triche, Jr. +} +\seealso{ + \code{\linkS4class{RGChannelSet}} +} +\examples{ +if (require(minfiData)) { + grSetEx <- noobPipeline(RGsetEx) + identical(getSNPs(grSetEx), getSNPs(RGsetEx)) +} +} diff --git a/man/preprocessNoob.Rd b/man/preprocessNoob.Rd index d3d76b2..b9f3a82 100644 --- a/man/preprocessNoob.Rd +++ b/man/preprocessNoob.Rd @@ -1,6 +1,5 @@ \name{preprocessNoob} \alias{preprocessNoob} -\alias{noobPipeline} \title{ The Noob/ssNoob preprocessing method for Infinium methylation microarrays. } @@ -41,6 +40,7 @@ noobPipeline(rgSet, pCutoff = 0.01) as well as \code{\linkS4class{IlluminaMethylationManifest}} for the basic classes involved in these functions. \code{\link{preprocessRaw}} and \code{\link{preprocessQuantile}} are other preprocessing functions. + \code{\link{noobPipeline}} for an end-to-end processing pipeline with results directly comparable to the Cancer Genome Atlas (TCGA) DNA methylation data. } \examples{ if (require(minfiData)) { From 8b0a033b8d5bbeec5808c7f5b31de059517bec31 Mon Sep 17 00:00:00 2001 From: ttriche Date: Wed, 10 May 2017 15:10:23 -0700 Subject: [PATCH 11/12] tidying up per Kasper; hello, tcgaPipeline() --- DESCRIPTION | 4 +-- NAMESPACE | 6 ++-- R/generics.R | 2 +- R/grset.R | 2 +- R/rgset.R | 49 +++++++++++++------------ R/{noobPipeline.R => tcgaPipeline.R} | 8 ++--- inst/unitTests/test_noobPipeline.R | 7 ---- inst/unitTests/test_tcgaPipeline.R | 7 ++++ man/noobPipeline.Rd | 45 ----------------------- man/tcgaPipeline.Rd | 53 ++++++++++++++++++++++++++++ 10 files changed, 94 insertions(+), 89 deletions(-) rename R/{noobPipeline.R => tcgaPipeline.R} (55%) delete mode 100644 inst/unitTests/test_noobPipeline.R create mode 100644 inst/unitTests/test_tcgaPipeline.R delete mode 100644 man/noobPipeline.Rd create mode 100644 man/tcgaPipeline.Rd diff --git a/DESCRIPTION b/DESCRIPTION index cb81a6f..7e3390b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: minfi -Version: 1.23.2 +Version: 1.23.3 Title: Analyze Illumina Infinium DNA methylation arrays Description: Tools to analyze & visualize Illumina Infinium methylation arrays. Authors@R: c(person(c("Kasper", "Daniel"), "Hansen", role = c("cre", "aut"), @@ -74,7 +74,7 @@ Collate: generics.R preprocessQuantile.R preprocessNoob.R preprocessFunnorm.R - noobPipeline.R + tcgaPipeline.R qc.R read.450k.R read.meth.R diff --git a/NAMESPACE b/NAMESPACE index d53e137..9467074 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -48,7 +48,7 @@ export(getManifest, getProbeInfo, getManifestInfo, IlluminaMethylationManifest, IlluminaMethylationAnnotation, getControlAddress, getRed, getGreen, getNBeads, getMeth, getUnmeth, - getBeta, getM, getCN, getSNPs, getMethSignal, + getBeta, getM, getCN, getSnpBeta, getMethSignal, getOOB, getSnpBeta, dropMethylationLoci, getLocations, getAnnotation, getAnnotationObject, @@ -57,7 +57,7 @@ export(getManifest, getProbeInfo, getManifestInfo, normalize.illumina.control, bgcorrect.illumina, preprocessRaw, preprocessIllumina, preprocessSWAN, preprocessQuantile, - preprocessFunnorm, preprocessNoob, noobPipeline, + preprocessFunnorm, preprocessNoob, tcgaPipeline, RGChannelSet, RGChannelSetExtended, MethylSet, RatioSet, GenomicMethylSet, GenomicRatioSet, @@ -81,7 +81,7 @@ exportClasses(RGChannelSet, RGChannelSetExtended, IlluminaMethylationManifest, IlluminaMethylationAnnotation) exportMethods(show, initialize, - getBeta, getM, getCN, getSNPs, getMeth, getUnmeth, getManifest, + getBeta, getM, getCN, getSnpBeta, getMeth, getUnmeth, getManifest, annotation, "annotation<-", preprocessMethod, combine, mapToGenome, ratioConvert, bumphunter, pData, "pData<-", sampleNames, "sampleNames<-", diff --git a/R/generics.R b/R/generics.R index f1004d5..611457e 100644 --- a/R/generics.R +++ b/R/generics.R @@ -8,6 +8,6 @@ setGeneric("getManifest", function(object) standardGeneric("getManifest")) ## setGeneric("getLocations", function(object, ...) standardGeneric("getLocations")) setGeneric("preprocessMethod", function(object) standardGeneric("preprocessMethod")) setGeneric("getCN", function(object, ...) standardGeneric("getCN")) -setGeneric("getSNPs", function(object) standardGeneric("getSNPs")) +setGeneric("getSnpBeta", function(object) standardGeneric("getSnpBeta")) setGeneric("convertArray", function(object, ...) standardGeneric("convertArray")) setGeneric("combineArrays", function(object1, object2, ...) standardGeneric("combineArrays")) diff --git a/R/grset.R b/R/grset.R index a3e2a4d..c297654 100644 --- a/R/grset.R +++ b/R/grset.R @@ -71,7 +71,7 @@ setMethod("getCN", signature(object = "GenomicRatioSet"), }) # FIXME: enforce something a bit less lackadaisical? -setMethod("getSNPs", signature(object = "GenomicRatioSet"), +setMethod("getSnpBeta", signature(object = "GenomicRatioSet"), function (object) { if (!all(colnames(object) %in% colnames(metadata(object)$SNPs))) { missed <- setdiff(colnames(object), diff --git a/R/rgset.R b/R/rgset.R index 81acd1a..6be1e65 100644 --- a/R/rgset.R +++ b/R/rgset.R @@ -112,27 +112,32 @@ getNBeads <- function(object) { assay(object, "NBeads") } +setMethod("getSnpBeta", signature(object = "RGChannelSet"), + function (object) { -getSnpBeta <- function(object){ - .isRGOrStop(object) + .isRGOrStop(object) - snpProbesII <- getProbeInfo(object, type = "SnpII")$Name - M.II <- getGreen(object)[getProbeInfo(object, type = "SnpII")$AddressA,,drop=FALSE] - U.II <- getRed(object)[getProbeInfo(object, type = "SnpII")$AddressA,,drop=FALSE] - - snpProbesI.Green <- getProbeInfo(object, type = "SnpI")[getProbeInfo(object, type = "SnpI")$Color=="Grn",,drop=FALSE] - snpProbesI.Red <- getProbeInfo(object, type = "SnpI")[getProbeInfo(object, type = "SnpI")$Color=="Red",,drop=FALSE] - M.I.Red <- getRed(object)[snpProbesI.Red$AddressB,,drop=FALSE] - U.I.Red <- getRed(object)[snpProbesI.Red$AddressA,,drop=FALSE] - M.I.Green <- getGreen(object)[snpProbesI.Green$AddressB,,drop=FALSE] - U.I.Green <- getGreen(object)[snpProbesI.Green$AddressA,,drop=FALSE] - - M <- rbind(M.II, M.I.Red, M.I.Green) - U <- rbind(U.II, U.I.Red, U.I.Green) - rownames(M) <- rownames(U) <- c(snpProbesII, snpProbesI.Red$Name, snpProbesI.Green$Name) - beta <- M/(U+M+100) - return(beta) -} + snpInfoII <- getProbeInfo(object, type="SnpII") + snpProbesII <- snpInfoII$Name + M.II <- getGreen(object)[snpInfoII$AddressA, , drop=FALSE] + U.II <- getRed(object)[snpInfoII$AddressA, , drop=FALSE] + + snpInfoI <- getProbeInfo(object, type="SnpI") + snpProbesI.Green <- snpInfoI[snpInfoI$Color=="Grn", , drop=FALSE] + M.I.Green <- getGreen(object)[snpProbesI.Green$AddressB, , drop=F] + U.I.Green <- getGreen(object)[snpProbesI.Green$AddressA, , drop=F] + snpProbesI.Red <- snpInfoI[snpInfoI$Color=="Red", , drop=FALSE] + M.I.Red <- getRed(object)[snpProbesI.Red$AddressB, , drop=FALSE] + U.I.Red <- getRed(object)[snpProbesI.Red$AddressA, , drop=FALSE] + + M <- rbind(M.II, M.I.Red, M.I.Green) + U <- rbind(U.II, U.I.Red, U.I.Green) + rsID <- c(snpProbesII, snpProbesI.Red$Name, snpProbesI.Green$Name) + rownames(M) <- rownames(U) <- rsID + beta <- M/(U+M+100) # I hate this. + return(beta) + + }) setMethod("getManifest", signature(object = "RGChannelSet"), function(object) { @@ -148,12 +153,6 @@ setMethod("getBeta", signature(object = "RGChannelSet"), callGeneric(object, ...) }) -# wrap getSnpBeta function, for RGChannelSet objects -setMethod("getSNPs", signature(object = "RGChannelSet"), - function (object) { - getSnpBeta(object) - }) - setMethod("mapToGenome", signature(object = "RGChannelSet"), function(object, ...) { object <- preprocessRaw(object) diff --git a/R/noobPipeline.R b/R/tcgaPipeline.R similarity index 55% rename from R/noobPipeline.R rename to R/tcgaPipeline.R index bf5e357..788926e 100644 --- a/R/noobPipeline.R +++ b/R/tcgaPipeline.R @@ -1,12 +1,10 @@ # a "best practices" baseline for most studies -noobPipeline <- function(rgSet, pCutoff=0.05) { +tcgaPipeline <- function(rgSet, pCutoff=0.05) { pval <- detectionP(rgSet) - message("Running TCGA-style noob pipeline on ", ncol(rgSet), " samples...") - grSet <- mapToGenome(ratioConvert(preprocessNoob(rgSet))) + message("Running TCGA-style (noob) pipeline on ", ncol(rgSet), " samples...") + grSet <- ratioConvert(mapToGenome(preprocessNoob(rgSet))) message("Masking probes with detection p-value > ", pCutoff, "...") is.na(assays(grSet)$Beta) <- (pval[rownames(grSet),] >= pCutoff) - message("Dropping redundant \"M\" assay matrix...") - assays(grSet) <- assays(grSet)[c("Beta","CN")] message("Placing SNP probe betas in metadata(grSet)$SNPs...") metadata(grSet)$SNPs <- getSnpBeta(rgSet) message("Done.") diff --git a/inst/unitTests/test_noobPipeline.R b/inst/unitTests/test_noobPipeline.R deleted file mode 100644 index 5f18441..0000000 --- a/inst/unitTests/test_noobPipeline.R +++ /dev/null @@ -1,7 +0,0 @@ -test_noobPipeline <- function() { - stopifnot(require(minfiData)) - grSetEx <- noobPipeline(updateObject(RGsetEx)) - checkIdentical(getSNPs(grSetEx), getSNPs(updateObject(RGsetEx))) - checkIdentical(preprocessMethod(grSetEx), - c(mu.norm="Noob, dyeCorr=TRUE, dyeMethod=single")) -} diff --git a/inst/unitTests/test_tcgaPipeline.R b/inst/unitTests/test_tcgaPipeline.R new file mode 100644 index 0000000..d789d11 --- /dev/null +++ b/inst/unitTests/test_tcgaPipeline.R @@ -0,0 +1,7 @@ +test_tcgaPipeline <- function() { + stopifnot(require(minfiData)) + grSetEx <- tcgaPipeline(updateObject(RGsetEx)) + checkIdentical(getSnpBeta(grSetEx), getSnpBeta(updateObject(RGsetEx))) + checkIdentical(preprocessMethod(grSetEx), + c(mu.norm="Noob, dyeCorr=TRUE, dyeMethod=single")) +} diff --git a/man/noobPipeline.Rd b/man/noobPipeline.Rd deleted file mode 100644 index 4553a55..0000000 --- a/man/noobPipeline.Rd +++ /dev/null @@ -1,45 +0,0 @@ -\name{noobPipeline} -\alias{noobPipeline} -\alias{noobPipeline} -\title{ - A TCGA-style preprocessing pipeline for Infinium methylation microarrays. -} -\description{ - The standard processing pipeline for the Cancer Genome Atlas (TCGA) was - the methylumi implementation of preprocessNoob (which returns identical beta/M - values to the ssNoob implementation now standard in minfi). Probes with - detection p-values > 0.05 were masked as NA, SNP probes for label swap QC - tabulated, and the results uploaded as Level 2 data after certain masking - steps (related to common SNPs and repetitive regions) had been applied. - This function wraps the individual steps to turn an \code{RGChannelSet} into - a \code{GenomicRatioSet} directly comparable to canonical TCGA datasets. -} -\usage{ -noobPipeline(rgSet, pCutoff=0.05) -} -\arguments{ - \item{rgSet}{An object of class \code{RGChannelSet}.} - \item{pCutoff}{Above what detection pvalue should probes be marked NA? (0.05)} -} -\value{ - An object of class \code{GenomicRatioSet} with additional SNP metadata. -} -\references{ - TJ Triche, DJ Weisenberger, D Van Den Berg, PW Laird and KD Siegmund - \emph{Low-level processing of Illumina Infinium DNA Methylation - BeadArrays}. - Nucleic Acids Res (2013) 41, e90. - doi:\href{http://www.dx.doi.org/10.1093/nar/gkt090}{10.1093/nar/gkt090}. -} -\author{ - Tim Triche, Jr. -} -\seealso{ - \code{\linkS4class{RGChannelSet}} -} -\examples{ -if (require(minfiData)) { - grSetEx <- noobPipeline(RGsetEx) - identical(getSNPs(grSetEx), getSNPs(RGsetEx)) -} -} diff --git a/man/tcgaPipeline.Rd b/man/tcgaPipeline.Rd new file mode 100644 index 0000000..64a6bcb --- /dev/null +++ b/man/tcgaPipeline.Rd @@ -0,0 +1,53 @@ +\name{tcgaPipeline} +\alias{tcgaPipeline} +\alias{tcgaPipeline} +\title{ + A TCGA-style preprocessing pipeline for Infinium methylation microarrays. +} +\description{ + The standard DNAme processing pipeline for the Cancer Genome Atlas (TCGA) was + the methylumi implementation of preprocessNoob (which yields identical beta/M + values to the single-sample ssNoob implementation now standard in minfi, in + the common case where a balanced reference sample was identified in each TCGA + batch). Probes with detection p-values > 0.05 were masked as NA, and SNP (rs) + probes included on each array by Illumina for label swap detection tabulated. + After QC and certain masking steps (related to common SNPs and repetitive + regions) had been applied, the results were uploaded as Level 2 data for TCGA. + This function wraps the individual steps to turn an \code{RGChannelSet} into + a \code{GenomicRatioSet} directly comparable to canonical TCGA datasets, save + for the SNP/repeat masking steps (which, in hindsight, seem overzealous). The + resulting GenomicRatioSet is thus directly comparable to processed TCGA data, + and as of this writing (May 2017), no single preprocessing method (let alone + any single-sample preprocessing method) has emerged as superior for the + general case of mixed tumor and normal samples from varying tissues. Thus, + this approach may also serve as a useful benchmark for methods development. +} +\usage{ +tcgaPipeline(rgSet, pCutoff=0.05) +} +\arguments{ + \item{rgSet}{An object of class \code{RGChannelSet}.} + \item{pCutoff}{Above what detection pvalue should probes be marked NA? (0.05)} +} +\value{ + An object of class \code{GenomicRatioSet} with additional SNP metadata. +} +\references{ + TJ Triche, DJ Weisenberger, D Van Den Berg, PW Laird and KD Siegmund + \emph{Low-level processing of Illumina Infinium DNA Methylation + BeadArrays}. + Nucleic Acids Res (2013) 41, e90. + doi:\href{http://www.dx.doi.org/10.1093/nar/gkt090}{10.1093/nar/gkt090}. +} +\author{ + Tim Triche, Jr. +} +\seealso{ + \code{\linkS4class{RGChannelSet}} +} +\examples{ +if (require(minfiData)) { + grSetEx <- tcgaPipeline(RGsetEx) + stopifnot(identical(getSnpBeta(grSetEx), getSnpBeta(RGsetEx))) +} +} From 2ddbdab2053c53c2b25421e07310e214ce156a0c Mon Sep 17 00:00:00 2001 From: ttriche Date: Wed, 10 May 2017 15:25:03 -0700 Subject: [PATCH 12/12] update manpage for preprocessNoob to reference tcgaPipeline --- man/preprocessNoob.Rd | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/man/preprocessNoob.Rd b/man/preprocessNoob.Rd index b9f3a82..efdab02 100644 --- a/man/preprocessNoob.Rd +++ b/man/preprocessNoob.Rd @@ -10,8 +10,6 @@ \usage{ preprocessNoob(rgSet, offset = 15, dyeCorr = TRUE, verbose = TRUE, dyeMethod=c("single", "reference")) - -noobPipeline(rgSet, pCutoff = 0.01) } \arguments{ \item{rgSet}{An object of class \code{RGChannelSet}.} @@ -19,11 +17,9 @@ noobPipeline(rgSet, pCutoff = 0.01) \item{dyeCorr}{Should dye correction be done?} \item{verbose}{Should the function be verbose?} \item{dyeMethod}{How should dye bias correction be done: use a single sample approach (ssNoob), or a reference array?} - \item{pCutoff}{(for noobPipeline) Above what detection pvalue should probes be marked NA? (0.01)} } \value{ For preprocessNoob, an object of class \code{MethylSet}. - For noobPipeline, an object of class \code{GenomicRatioSet} with SNP metadata. } \references{ TJ Triche, DJ Weisenberger, D Van Den Berg, PW Laird and KD Siegmund @@ -40,14 +36,14 @@ noobPipeline(rgSet, pCutoff = 0.01) as well as \code{\linkS4class{IlluminaMethylationManifest}} for the basic classes involved in these functions. \code{\link{preprocessRaw}} and \code{\link{preprocessQuantile}} are other preprocessing functions. - \code{\link{noobPipeline}} for an end-to-end processing pipeline with results directly comparable to the Cancer Genome Atlas (TCGA) DNA methylation data. + \code{\link{tcgaPipeline}} for an end-to-end processing pipeline with results directly comparable to the Cancer Genome Atlas (TCGA) DNA methylation data. } \examples{ if (require(minfiData)) { ## RGsetEx.sub is a small subset of RGsetEx; ## only used for computational speed. MsetEx.sub.noob <- preprocessNoob(RGsetEx.sub) - grSet.sub.noob <- noobPipeline(RGsetEx.sub) + grSet.sub.noob <- tcgaPipeline(RGsetEx.sub) } \dontrun{ if (require(minfiData)) {