Skip to content

Commit

Permalink
Merge pull request #22 from p2m2/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
ofilangi authored Oct 26, 2022
2 parents cf97244 + 72d108f commit 2113ca2
Show file tree
Hide file tree
Showing 21 changed files with 448 additions and 407 deletions.
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,10 @@

Detection of Ions/Metabolites for Glucosinolates and Phenolics (organism Brassica napus).


[Identification and Quantification of Glucosinolates and Phenolics in a Large Panel of Brassica napus Highlight Valuable Genetic Resources for Chemical Ecology and Breeding](https://pubs.acs.org/doi/10.1021/acs.jafc.1c08118)

## Installation


### assembly

```bash
Expand Down
6 changes: 3 additions & 3 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,11 @@ publishMavenStyle := true
// Coverage

coverageMinimumStmtTotal := 70
coverageMinimumBranchTotal := 20
coverageMinimumBranchTotal := 30
coverageMinimumStmtPerPackage := 70
coverageMinimumBranchPerPackage := 20
coverageMinimumBranchPerPackage := 30
coverageMinimumStmtPerFile := 70
coverageMinimumBranchPerFile := 20
coverageMinimumBranchPerFile := 30
coverageFailOnMinimum := true
coverageHighlighting := true

Expand Down
7 changes: 4 additions & 3 deletions src/main/resources/default.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
{
"Glucosinolate" : {
"deltaMp0Mp2" : 1.996,
"numberSulfurMin" : 1,
"minAbundanceM1" : 0.09,
"maxAbundanceM1" : 0.4,
"numberCarbonMin" : 3,
"numberCarbonMax" : 35,
"numberSulfurMin" : 1.5,
"numberSulfurMax" : 5,
"minMzCoreStructure" : 317.995896,
"neutralLoss" : {
"thioglucose_s03" : 242.0,
Expand Down
18 changes: 2 additions & 16 deletions src/main/scala/fr/inrae/metabolomics/p2m2/MainClustering.scala
Original file line number Diff line number Diff line change
Expand Up @@ -65,32 +65,18 @@ object MainClustering extends App {
val v = ScanLoader.read(new File(ii.pathFile))
val r2 = ScanLoader.getDeltaNeutralLossesFromPeak(v._1, v._2, ii.ion, 100)

(idxF,mz, rt,r2)
(idxF,mz, rt,r2.sorted)
}
)
}
}.distinct

res.foreach(
row =>
println(f"${row._2}%4.2f \t ${row._3}%3.2f\t${row._1} \n\t${row._4}")
println(f"\n${row._2} \t ${row._3}%3.2f\t${row._1} \n\t${row._4}")

)

// println(r.head._2._1)

r.map( _._2._1.head) foreach {
case ion : IonsIdentification =>
val v = ScanLoader.read(new File(ion.pathFile))
val r2 = ScanLoader.getDeltaNeutralLossesFromPeak(v._1, v._2, ion.ion, 100)
println("=======R2=================",ion.ion)
println(r2)
println("========================")
}


//ScanLoader.getDeltaNeutralLossesFromPeak()

println("original size:"+r.map {
case (idxFile, v) => v._1.size
})
Expand Down
127 changes: 55 additions & 72 deletions src/main/scala/fr/inrae/metabolomics/p2m2/MainDetection.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@ object MainDetection extends App {
import scopt.OParser

case class Config(
mzfiles: Seq[File] = Seq(),
mzFiles: Seq[File] = Seq(),
jsonFamilyMetabolitesDetection: Option[File] = None,
thresholdIntensityFilter: Option[Int] = None,
thresholdAbundanceM0Filter: Double = 0.1,
overrepresentedPeakFilter: Int = 800,
noiseIntensity: Option[Double] = None,
startRT: Option[Double] = None,
endRT: Option[Double] = None,
overrepresentedPeak: Int = 800,
precisionMzh: Int = 1000,
toleranceMz: Double = 0.01,
warmup: Double = 0.50, // (30 sec)
outfile: Option[File] = None,
verbose: Boolean = false,
debug: Boolean = false
Expand All @@ -41,14 +41,14 @@ object MainDetection extends App {
.optional()
.action((x, c) => c.copy(jsonFamilyMetabolitesDetection = Some(x)))
.text(s"json configuration to detect metabolite family."),
opt[Int]('i', "thresholdIntensityFilter")
opt[Double]('i', "noiseIntensity")
.optional()
.action((x, c) => c.copy(thresholdIntensityFilter = Some(x)))
.action((x, c) => c.copy(noiseIntensity = Some(x)))
.text(s"Keep ions above a x intensity (calculation on start-up time)"),
opt[Int]('p', "overrepresentedPeakFilter")
opt[Int]('p', "overrepresentedPeak")
.optional()
.action((x, c) => c.copy(overrepresentedPeakFilter = x))
.text(s"filter about over represented peaks. default ${Config().overrepresentedPeakFilter}"),
.action((x, c) => c.copy(overrepresentedPeak = x))
.text(s"filter about over represented peaks. default ${Config().overrepresentedPeak}"),
opt[Double]('s', "startRT")
.optional()
.action((x, c) => c.copy(startRT = Some(x)))
Expand All @@ -57,6 +57,10 @@ object MainDetection extends App {
.optional()
.action((x, c) => c.copy(endRT = Some(x)))
.text(s"start RT"),
opt[Double]('w', "warm-up time to compute noise")
.optional()
.action((x, c) => c.copy(warmup = x))
.text(s"warmup time"),
opt[Int]('m', "precisionMzh")
.optional()
.action((x, c) => c.copy(precisionMzh = x))
Expand All @@ -80,7 +84,7 @@ object MainDetection extends App {

arg[File]("<file>...")
.unbounded()
.action((x, c) => c.copy(mzfiles = c.mzfiles :+ x)),
.action((x, c) => c.copy(mzFiles = c.mzFiles :+ x)),
help("help").text("prints this usage text"),
note("some notes." + sys.props("line.separator")),
checkConfig(_ => success)
Expand Down Expand Up @@ -114,7 +118,7 @@ object MainDetection extends App {
confJson.metabolites.foreach(
family => {
val allSelectedIons : Seq[(Double, ParSeq[(IonsIdentification,String)])] =
config.mzfiles
config.mzFiles
.par
.flatMap {
mzFile =>
Expand All @@ -136,31 +140,17 @@ object MainDetection extends App {
case false =>

val (source, index) = ScanLoader.read(mzFile)
val noiseIntensity: Double =
ScanLoader.calculBackgroundNoisePeak(
source,
index,
startDurationTime = 0.20)

val intensityFilter = config.thresholdIntensityFilter match {
/**
* intensity noise is computed with the warm up time or give by the user .
*/
val noiseIntensity : Double = config.noiseIntensity match {
case Some(v) => v
case None => noiseIntensity
case None => ScanLoader.calcBackgroundNoisePeak(source, index, startDurationTime = config.warmup)
}

val values = ionsDetection(
config,
source,
index,
intensityFilter,
confJson.deltaMp0Mp2(family),
confJson.numberSulfurMin(family),
confJson.minAbundanceM1(family),
confJson.maxAbundanceM1(family),
confJson.minMzCoreStructure(family),
confJson.neutralLoss(family),
confJson.daughterIons(family),
noiseIntensity
)
val values =
ionsDetection(family,config,confJson, source, index,noiseIntensity)

val f = config.outfile.getOrElse(new File(s"${baseName}.csv"))
f.delete()
Expand Down Expand Up @@ -190,61 +180,54 @@ object MainDetection extends App {


def ionsDetection(
config: Config,
source: MZXMLFile,
index: MZXMLIndex,
intensityFilter: Double,
deltaMp0Mp2: Double,
numberSulfurMin: Double,
minAbundanceM1: Double,
maxAbundanceM1: Double,
mzCoreStructure : Double,
neutralLoss: Map[String, Double],
daughterIons: Map[String, Double],
noiseIntensity : Double
): Seq[IonsIdentification] = {

family : String,
config: Config,
confJson : ConfigReader,
source: MZXMLFile,
index: MZXMLIndex,
noiseIntensity : Double): Seq[IonsIdentification] = {

/**
* Get Peaks with criteria DeltaM0M2, number of carbon min/max,Max, number of sulfur min/max
*/
val listSulfurMetabolites: Seq[PeakIdentification] =
ScanLoader.
getScanIdxAndSpectrumM0M2WithDelta(
selectEligibleIons(
source,
index,
config.startRT,
config.endRT,
config.thresholdAbundanceM0Filter,
intensityFilter.toInt,
filteringOnNbSulfur = numberSulfurMin.toInt,
minAbundanceM1,
maxAbundanceM1,
noiseIntensity,
nbCarbonMin = confJson.numberCarbonMin(family),
nbCarbonMax = confJson.numberCarbonMax(family),
nbSulfurMin = confJson.numberSulfurMin(family),
nbSulfurMax = confJson.numberSulfurMax(family),
config.toleranceMz,
deltaMOM2 = deltaMp0Mp2)

/* Diagnostics : Ions frequency on selected Scan peak detected ! */
deltaMOM2 = confJson.deltaMp0Mp2(family))

val frequencyOfMz: Seq[(Int, Int)] = Seq() // DaughterIonsDiag.IonsFrequencyOnSelectedScanPeakDetected(source,index,listSulfurMetabolites)
println(frequencyOfMz)
/* Attention c est lent..... peut etre a faire en option !!*/
println("\n\n\n============== Twenty Ions frequency on selected Scan peak detected =========================")
println(frequencyOfMz.reverse.slice(1, 20).map {
case (mz, freq) => (mz.toString + " m/z -> " + freq)
}.mkString(" , "))

val listSulfurMetabolitesSelected: Seq[PeakIdentification] = // listSulfurMetabolites
/**
* Merge All features (Ion/RT) that looks like !
*/
val listSulfurMetabolitesSelected: Seq[PeakIdentification] =
ScanLoader.keepSimilarMzWithMaxAbundance(listSulfurMetabolites, config.precisionMzh)

/**
* remove over represented peaks
*/
val m: IonsIdentificationBuilder =
ScanLoader.filterOverRepresentedPeak(
source,
index,
config.startRT,
config.endRT,
listSulfurMetabolitesSelected,
intensityFilter,
config.overrepresentedPeakFilter,
neutralLoss.toSeq,
daughterIons.toSeq,
noiseIntensity
noiseIntensity,
config.overrepresentedPeak,
confJson.neutralLoss(family).toSeq,
confJson.daughterIons(family).toSeq
)
m.findDiagnosticIonsAndNeutralLosses(config.precisionMzh,mzCoreStructure)

/**
* find Neutral loses and Diagnostic Ion
*/
m.findDiagnosticIonsAndNeutralLosses(0.1,confJson.minMzCoreStructure(family))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,21 @@ import java.io.File
case class IonsIdentificationBuilder(
source : MZXMLFile,
index : MZXMLIndex,
start: Option[Double],
end: Option[Double],
peaks : Seq[PeakIdentification],
nls : Seq[(String,Double)],
dis : Seq[(String,Double)],
noiseIntensity : Double = 0.0
) {
def getRelativePath(source : MZXMLFile) : String =
new File(source.getPath).getCanonicalPath.replace(new File(".").getCanonicalPath,".")
def getInfo( p :PeakIdentification,precisionMzh : Int, mzCoreStructure : Double) : Option[IonsIdentification] = p.peaks.nonEmpty match {
def getInfo( p :PeakIdentification,tolMzh : Double, mzCoreStructure : Double) : Option[IonsIdentification] = p.peaks.nonEmpty match {
case true =>
if ( p.peaks.head.mz >= mzCoreStructure )
Some(IonsIdentification(
getRelativePath(source),
p,
neutralLosses = ScanLoader.detectNeutralLoss(source,index,p,nls,noiseIntensity = noiseIntensity),
daughterIons = ScanLoader.detectDaughterIons(source,index,p,dis,noiseIntensity = noiseIntensity)
neutralLosses = ScanLoader.detectNeutralLoss(source,index,p,nls,tolMzh=tolMzh,noiseIntensity = noiseIntensity),
daughterIons = ScanLoader.detectDaughterIons(source,index,p,dis,tolMzh=tolMzh,noiseIntensity = noiseIntensity)
))
else
Some(IonsIdentification(
Expand All @@ -42,14 +40,14 @@ case class IonsIdentificationBuilder(
* @param mzCoreStructure minimum size of a metabolite according param family
* @return
*/
def findDiagnosticIonsAndNeutralLosses(precisionMzh : Int, mzCoreStructure : Double): Seq[IonsIdentification] = {
def findDiagnosticIonsAndNeutralLosses(tolMzh : Double, mzCoreStructure : Double): Seq[IonsIdentification] = {
println("\n== detectNeutralLoss/detectDaughterIons == ")

peaks.zipWithIndex
. flatMap {
case (x,idx) =>
print(s"\r===>$idx/${peaks.size}")
getInfo(x,precisionMzh,mzCoreStructure)
getInfo(x,tolMzh,mzCoreStructure)
}
.sortBy( x => (x.ion.rt,x.ion.peaks.head.mz) )
}
Expand Down
Loading

0 comments on commit 2113ca2

Please sign in to comment.