Skip to content

Commit

Permalink
Merge pull request #10 from p2m2/develop
Browse files Browse the repository at this point in the history
0.2
  • Loading branch information
ofilangi authored Oct 6, 2022
2 parents 9c0fceb + 42478fe commit 5e1abb2
Show file tree
Hide file tree
Showing 17 changed files with 436 additions and 128 deletions.
1 change: 1 addition & 0 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ libraryDependencies ++= Seq(
"com.lihaoyi" %% "ujson" % "2.0.0",
"org.scala-lang.modules" %% "scala-parser-combinators" % "2.1.1",
"com.lihaoyi" %% "utest" % "0.8.1" % Test,
"org.slf4j" % "slf4j-simple" % "2.0.3" % Test,
)

credentials += {
Expand Down
27 changes: 19 additions & 8 deletions src/main/resources/default.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,31 @@
"Glucosinolate" : {
"deltaMp0Mp2" : 1.996,
"numberSulfurMin" : 2,
"neutralLoss" :
{
"gluconolactone" : 178.0,
"sulfureTrioxide" : 80.0,
"anhydroglucose" : 162.0,
"minMzCoreStructure" : 317.995896,
"neutralLoss" : {
"thioglucose_s03" : 242.0,
"glucosinolate_223" : 223.0,
"thioglucose" : 196.0,
"glucosinolate_223" : 223.0
"gluconolactone" : 178.0,
"RCNO4S2-" : 163.0,
"anhydroglucose" : 162.0,
"sulfureTrioxide" : 80.0
},
"daughterIons" : {
"C6H11O9S_259" : 259.0,
"C6H11O8S2_275" : 275.0,
"C6H11O9S_259" : 259.0,
"C6H10O8S-_241" : 242.0,
"C6H9NO8S_241" : 241.0,
"C6H11O7S_227" : 227.0
"C6H11O7S-_227" : 227.0,
"C6H11O5S-_195" : 195.03,
"C6H11O2-_153" : 163.0,
"C2H4O5NS-_153" : 153.98,
"C2H3O5S-_138" : 138.97,
"C2H2O4S-_135" : 135.97,
"HO4S2-_128" : 128.93,
"HSO4-_97" : 96.95,
"SO4-_95" : 96.0,
"C2H3OS-" : 74.99
},
"databaseReference" : {
"G01" : 406.029,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package fr.inrae.metabolomics.p2m2

import fr.inrae.metabolomics.p2m2.`export`.CsvMetabolitesIdentificationFile
import fr.inrae.metabolomics.p2m2.builder.{MetaboliteIdentification, PeakIdentification, ScanLoader}
import fr.inrae.metabolomics.p2m2.config.ConfigReader
import fr.inrae.metabolomics.p2m2.output.CsvMetabolitesIdentification
import fr.inrae.metabolomics.p2m2.output.MetabolitesIdentification
import umich.ms.fileio.filetypes.mzxml.{MZXMLFile, MZXMLIndex}

import java.io.File
Expand All @@ -12,16 +14,16 @@ object Main extends App {
import scopt.OParser

case class Config(
mzfiles : Seq[File] = Seq(),
jsonFamilyMetabolitesDetection : Option[File] = None,
thresholdIntensityFilter : Option[Int] = None,
thresholdAbundanceM0Filter : Double = 0.1,
overrepresentedPeakFilter : Int = 800,
startRT : Option[Double] = None,
endRT : Option[Double] = None,
precisionMzh : Int = 1000,
toleranceMz : Double = 0.01,
outfile : Option[File] = None,
mzfiles: Seq[File] = Seq(),
jsonFamilyMetabolitesDetection: Option[File] = None,
thresholdIntensityFilter: Option[Int] = None,
thresholdAbundanceM0Filter: Double = 0.1,
overrepresentedPeakFilter: Int = 800,
startRT: Option[Double] = None,
endRT: Option[Double] = None,
precisionMzh: Int = 1000,
toleranceMz: Double = 0.01,
outfile: Option[File] = None,
verbose: Boolean = false,
debug: Boolean = false
)
Expand All @@ -36,11 +38,11 @@ object Main extends App {
.optional()
.action((x, c) => c.copy(jsonFamilyMetabolitesDetection = Some(x)))
.text(s"json configuration to detect metabolite family."),
opt[Int]('i',"thresholdIntensityFilter")
opt[Int]('i', "thresholdIntensityFilter")
.optional()
.action((x, c) => c.copy(thresholdIntensityFilter = Some(x)))
.text(s"Keep ions above a x intensity (calculation on start-up time)"),
opt[Int]('p',"overrepresentedPeakFilter")
opt[Int]('p', "overrepresentedPeakFilter")
.optional()
.action((x, c) => c.copy(overrepresentedPeakFilter = x))
.text(s"filter about over represented peaks. default ${Config().overrepresentedPeakFilter}"),
Expand All @@ -56,11 +58,11 @@ object Main extends App {
.optional()
.action((x, c) => c.copy(precisionMzh = x))
.text(s"precision/rounded Mzh (number to the right of the decimal point) . ${Config().precisionMzh}"),
opt[Double]('t',"toleranceMz")
opt[Double]('t', "toleranceMz")
.optional()
.action((x, c) => c.copy(toleranceMz = x))
.text(s"tolerance accepted. ${Config().toleranceMz}"),
opt[File]('o',"outputFile")
opt[File]('o', "outputFile")
.optional()
.action((x, c) => c.copy(outfile = Some(x)))
.text(s"output path file."),
Expand Down Expand Up @@ -91,12 +93,12 @@ object Main extends App {
// arguments are bad, error message will have been displayed
}

def process(config : Config): Unit = {
def process(config: Config): Unit = {

val confJson = config.jsonFamilyMetabolitesDetection match {
case Some(jsonFilePath) =>
val s = Source.fromFile (jsonFilePath)
val res = ConfigReader.read(s.getLines ().mkString)
val s = Source.fromFile(jsonFilePath)
val res = ConfigReader.read(s.getLines().mkString)
s.close()
res
case None =>
Expand Down Expand Up @@ -124,57 +126,69 @@ object Main extends App {
intensityFilter,
confJson.deltaMp0Mp2(family),
confJson.numberSulfurMin(family),
confJson.minMzCoreStructure(family),
confJson.neutralLoss(family),
confJson.daughterIons(family)
)
}
val f = config.outfile.getOrElse(new File( s"$family.csv"))
val f = config.outfile.getOrElse(new File(s"$family.csv"))
f.delete()
CsvMetabolitesIdentificationFile.build(values,family,confJson,f)
CsvMetabolitesIdentificationFile.build(values, family, confJson, f)
println(s"========= check ${f.getPath} ===============")
})
}
}


def analyse_metabolite(
config: Config,
source: MZXMLFile,
index: MZXMLIndex,
intensityFilter: Int,
deltaMp0Mp2: Double,
numberSulfurMin: Double,
neutralLoss: Map[String, Double],
daughterIons: Map[String, Double]
): Seq[CsvMetabolitesIdentification] = {

val listSulfurMetabolites: Seq[PeakIdentification] =
ScanLoader.
getScanIdxAndSpectrumM0M2WithDelta(
source,
index,
config.startRT,
config.endRT,
config.thresholdAbundanceM0Filter,
intensityFilter,
filteringOnNbSulfur = numberSulfurMin.toInt,
config.toleranceMz,
deltaMOM2 = deltaMp0Mp2)

val listSulfurMetabolitesSelected: Seq[PeakIdentification] = //listSulfurMetabolites
ScanLoader.keepSimilarMzWithMaxAbundance(listSulfurMetabolites, config.precisionMzh)

val m: MetaboliteIdentification =
ScanLoader.filterOverRepresentedPeak(
def analyse_metabolite(
config: Config,
source: MZXMLFile,
index: MZXMLIndex,
intensityFilter: Int,
deltaMp0Mp2: Double,
numberSulfurMin: Double,
mzCoreStructure : Double,
neutralLoss: Map[String, Double],
daughterIons: Map[String, Double]
): Seq[MetabolitesIdentification] = {

val listSulfurMetabolites: Seq[PeakIdentification] =
ScanLoader.
getScanIdxAndSpectrumM0M2WithDelta(
source,
index,
config.startRT,
config.endRT,
listSulfurMetabolitesSelected,
config.thresholdAbundanceM0Filter,
intensityFilter,
config.overrepresentedPeakFilter,
neutralLoss.toSeq,
daughterIons.toSeq
)
m.getInfos(config.precisionMzh)
}
}
filteringOnNbSulfur = numberSulfurMin.toInt,
config.toleranceMz,
deltaMOM2 = deltaMp0Mp2)

/* Diagnostics : Ions frequency on selected Scan peak detected ! */

val frequencyOfMz: Seq[(Int, Int)] = Seq() // DaughterIonsDiag.IonsFrequencyOnSelectedScanPeakDetected(source,index,listSulfurMetabolites)
println(frequencyOfMz)
/* Attention c est lent..... peut etre a faire en option !!*/
println("\n\n\n============== Twenty Ions frequency on selected Scan peak detected =========================")
println(frequencyOfMz.reverse.slice(1, 20).map {
case (mz, freq) => (mz.toString + " m/z -> " + freq)
}.mkString(" , "))

val listSulfurMetabolitesSelected: Seq[PeakIdentification] = // listSulfurMetabolites
ScanLoader.keepSimilarMzWithMaxAbundance(listSulfurMetabolites, config.precisionMzh)

val m: MetaboliteIdentification =
ScanLoader.filterOverRepresentedPeak(
source,
index,
config.startRT,
config.endRT,
listSulfurMetabolitesSelected,
intensityFilter,
config.overrepresentedPeakFilter,
neutralLoss.toSeq,
daughterIons.toSeq
)
m.findDiagnosticIonsAndNeutralLosses(config.precisionMzh,mzCoreStructure)
}
}
3 changes: 0 additions & 3 deletions src/main/scala/fr/inrae/metabolomics/p2m2/analyzer/Peak.scala

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package fr.inrae.metabolomics.p2m2.builder

import fr.inrae.metabolomics.p2m2.output.CsvMetabolitesIdentification
import fr.inrae.metabolomics.p2m2.output.MetabolitesIdentification
import umich.ms.fileio.filetypes.mzxml.{MZXMLFile, MZXMLIndex}

case class MetaboliteIdentification(
Expand All @@ -12,32 +12,48 @@ case class MetaboliteIdentification(
nls : Seq[(String,Double)],
dis : Seq[(String,Double)]
) {
def getInfo( p :PeakIdentification,precisionMzh : Int) : CsvMetabolitesIdentification = {
val mz = p.peaks.map(p2 => (p2.mz*precisionMzh ).round / precisionMzh.toDouble )
val intensities = p.peaks.map(_.intensity)
val abundance = p.peaks.map(_.abundance)
def getInfo( p :PeakIdentification,precisionMzh : Int, mzCoreStructure : Double) : Option[MetabolitesIdentification] = p.peaks.nonEmpty match {
case true =>
val mz = p.peaks.map(p2 => (p2.mz*precisionMzh ).round / precisionMzh.toDouble )
val intensities = p.peaks.map(_.intensity)
val abundance = p.peaks.map(_.abundance)

CsvMetabolitesIdentification(
mz,
intensities,
abundance,
p.rt,
neutralLosses = ScanLoader.detectNeutralLoss(source,index,start,end,p,nls),
daughterIons = ScanLoader.detectDaughterIons(source,index,start,end,p,dis)
)
if ( p.peaks.head.mz >= mzCoreStructure )
Some(MetabolitesIdentification(
mz,
intensities,
abundance,
p.rt,
neutralLosses = ScanLoader.detectNeutralLoss(source,index,start,end,p,nls),
daughterIons = ScanLoader.detectDaughterIons(source,index,start,end,p,dis)
))
else
Some(MetabolitesIdentification(
mz,
intensities,
abundance,
p.rt,
neutralLosses = Map(),
daughterIons = Map()
))
case false => None
}

def getInfos(precisionMzh : Int): Seq[CsvMetabolitesIdentification] = {
/**
*
* @param precisionMzh precision of mzh
* @param mzCoreStructure minimum size of a metabolite according param family
* @return
*/
def findDiagnosticIonsAndNeutralLosses(precisionMzh : Int, mzCoreStructure : Double): Seq[MetabolitesIdentification] = {
println("\n== detectNeutralLoss/detectDaughterIons == ")

peaks.zipWithIndex
. map {
. flatMap {
case (x,idx) =>
print(s"\r===>$idx/${peaks.size}")
getInfo(x,precisionMzh)
getInfo(x,precisionMzh,mzCoreStructure)
}
/* remove entry if none neutral and none daughters ions detected or big abundance (>60%)*/
.filter( csvM => (csvM.neutralLosses.values.flatten.nonEmpty && csvM.daughterIons.values.flatten.nonEmpty) )
.sortBy( x => (x.rt,x.mz.head) )
}
}
Loading

0 comments on commit 5e1abb2

Please sign in to comment.