From b511f7d40e4b8a06a5213605ed954a94846cf660 Mon Sep 17 00:00:00 2001 From: Olivier Filangi Date: Fri, 23 Sep 2022 17:16:03 +0200 Subject: [PATCH 01/11] add diagnostics information about daughterrs ions --- src/main/scala/Main.scala | 10 +++++ .../metabolomics/p2m2/analyzer/Peak.scala | 3 -- .../p2m2/builder/ScanLoader.scala | 3 ++ .../p2m2/diagnostic/DaughterIonsDiag.scala | 39 ++++++++++++++++++ .../diagnostic/DaughterIonsDiagTest.scala | 41 +++++++++++++++++++ 5 files changed, 93 insertions(+), 3 deletions(-) delete mode 100644 src/main/scala/fr/inrae/metabolomics/p2m2/analyzer/Peak.scala create mode 100644 src/main/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiag.scala create mode 100644 src/test/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiagTest.scala diff --git a/src/main/scala/Main.scala b/src/main/scala/Main.scala index 3c813cc..aeb412e 100644 --- a/src/main/scala/Main.scala +++ b/src/main/scala/Main.scala @@ -1,6 +1,7 @@ import fr.inrae.metabolomics.p2m2.`export`.CsvMetabolitesIdentificationFile import fr.inrae.metabolomics.p2m2.builder.{MetaboliteIdentification, PeakIdentification, ScanLoader} import fr.inrae.metabolomics.p2m2.config.ConfigReader +import fr.inrae.metabolomics.p2m2.diagnostic.DaughterIonsDiag import fr.inrae.metabolomics.p2m2.output.CsvMetabolitesIdentification import umich.ms.fileio.filetypes.mzxml.{MZXMLFile, MZXMLIndex} @@ -160,6 +161,15 @@ object Main extends App { config.toleranceMz, deltaMOM2 = deltaMp0Mp2) + /* Diagnostics : Ions frequency on selected Scan peak detected ! */ + val frequencyOfMz : Seq[(Int,Int)] = DaughterIonsDiag.IonsFrequencyOnSelectedScanPeakDetected(source,listSulfurMetabolites) + + /* Attention c est lent..... peut etre a faire en option !!*/ + println("\n\n\n============== Twenty Ions frequency on selected Scan peak detected =========================") + println(frequencyOfMz.reverse.slice(1,20).map { + case (mz, freq) => (mz.toString + " m/z -> " + freq) + }.mkString(" , ")) + val listSulfurMetabolitesSelected: Seq[PeakIdentification] = //listSulfurMetabolites ScanLoader.keepSimilarMzWithMaxAbundance(listSulfurMetabolites, config.precisionMzh) diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/analyzer/Peak.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/analyzer/Peak.scala deleted file mode 100644 index 99d7d63..0000000 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/analyzer/Peak.scala +++ /dev/null @@ -1,3 +0,0 @@ -package fr.inrae.metabolomics.p2m2.analyzer - -case class Peak() diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala index d17954d..cc36613 100644 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala @@ -243,6 +243,7 @@ case object ScanLoader { // println(countAllPeak) /* calcul distribution of Peak number */ + /* println("\n=========================================================") println("== Number of Peak detected on MS1 scans by M/z selected ") println("=========================================================\n") @@ -252,6 +253,8 @@ case object ScanLoader { println("\n\n=========================================================") println(" -- The thirty most detected peaks selected --") println(countAllPeak.sorted(Ordering[Int].reverse).distinct.slice(0,30)) + + */ /* val u = countAllPeak.foldLeft(Map[Int,Int]()) { case (acc, c) if acc.contains(c) => acc + (c -> (acc(c)+1)) diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiag.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiag.scala new file mode 100644 index 0000000..7d0f399 --- /dev/null +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiag.scala @@ -0,0 +1,39 @@ +package fr.inrae.metabolomics.p2m2.diagnostic + +import fr.inrae.metabolomics.p2m2.builder.PeakIdentification +import umich.ms.datatypes.scan.IScan +import umich.ms.fileio.filetypes.mzxml.MZXMLFile + +case object DaughterIonsDiag { + + def round(v : Double) = (v*1).round / 1.toDouble + + def getPeaksWithIntensitiesNoNull( + source: MZXMLFile, + p : PeakIdentification + ) : Seq[Int] = { + val scan : IScan = source.parseScan(p.numScan, true) + + (scan.getSpectrum().getMZs() zip scan.getSpectrum().getIntensities()).filter{ + case (_,y) => y>0 + } map { case (x,_) => round(x).toInt } + } + + /** + * Frequence des Ions dans les scans de la liste des peaks selectionnés avec un delta + * @param peaks + * @return + */ + def IonsFrequencyOnSelectedScanPeakDetected(source: MZXMLFile,peaks : Seq[PeakIdentification]) : Seq[(Int,Int)] = { + peaks.map( + p => DaughterIonsDiag.getPeaksWithIntensitiesNoNull(source, p) + ).foldLeft(Map[Int, Int]())( + (acc: Map[Int, Int], v: Seq[Int]) => { + v.map(p2 => acc.get(p2) match { + case Some(s) => (p2 -> (s + 1)) + case None => (p2 -> 1) + }).toMap + } + ).toSeq.sortBy(_._2) + } +} diff --git a/src/test/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiagTest.scala b/src/test/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiagTest.scala new file mode 100644 index 0000000..c15bd66 --- /dev/null +++ b/src/test/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiagTest.scala @@ -0,0 +1,41 @@ +package fr.inrae.metabolomics.p2m2.diagnostic +import fr.inrae.metabolomics.p2m2.builder.ScanLoader +import utest.{TestSuite, Tests, test} + +import java.io.File + +object DaughterIonsDiagTest extends TestSuite { + val tests : Tests = Tests { + + test("test") { + val v = ScanLoader.read(new File(getClass.getResource("/20181018-037.mzXML").getPath)) + val v2 = { + (ScanLoader.getScanIdxAndSpectrumM0M2WithDelta( + v._1, + v._2, + None, // RT start + None, // RT end + thresholdAbundanceM0Filter = 0.1, intensityFilter = 0, + precision = 0.01, + deltaMOM2 = 1.996 + )) + } + println("=============================================n peaks=",v2.size) + /* + Frequence des Ions dans les scans de la liste des peak selectionné avec le delta + */ + + v2.map( + p => DaughterIonsDiag.getPeaksWithIntensitiesNoNull(v._1,p) + ).foldLeft(Map[Int,Int]())( + (acc : Map[Int,Int],v : Seq[Int]) => { + v.map( p2 => acc.get(p2) match { + case Some(s) => (p2 -> (s + 1)) + case None => (p2 -> 1) + }).toMap + } + ).toSeq.sortBy(_._2) + } + + } +} \ No newline at end of file From dfdf181cf18ea2a87132f8cdc014d468acefcf77 Mon Sep 17 00:00:00 2001 From: Olivier Filangi Date: Fri, 30 Sep 2022 15:04:14 +0200 Subject: [PATCH 02/11] search on MS2 with a windows rt --- src/main/scala/Main.scala | 4 +-- .../p2m2/builder/ScanLoader.scala | 31 +++++++++---------- .../p2m2/diagnostic/DaughterIonsDiag.scala | 31 ++++++++++++++----- .../diagnostic/DaughterIonsDiagTest.scala | 2 +- 4 files changed, 41 insertions(+), 27 deletions(-) diff --git a/src/main/scala/Main.scala b/src/main/scala/Main.scala index aeb412e..e72570c 100644 --- a/src/main/scala/Main.scala +++ b/src/main/scala/Main.scala @@ -1,7 +1,6 @@ import fr.inrae.metabolomics.p2m2.`export`.CsvMetabolitesIdentificationFile import fr.inrae.metabolomics.p2m2.builder.{MetaboliteIdentification, PeakIdentification, ScanLoader} import fr.inrae.metabolomics.p2m2.config.ConfigReader -import fr.inrae.metabolomics.p2m2.diagnostic.DaughterIonsDiag import fr.inrae.metabolomics.p2m2.output.CsvMetabolitesIdentification import umich.ms.fileio.filetypes.mzxml.{MZXMLFile, MZXMLIndex} @@ -162,8 +161,9 @@ object Main extends App { deltaMOM2 = deltaMp0Mp2) /* Diagnostics : Ions frequency on selected Scan peak detected ! */ - val frequencyOfMz : Seq[(Int,Int)] = DaughterIonsDiag.IonsFrequencyOnSelectedScanPeakDetected(source,listSulfurMetabolites) + val frequencyOfMz : Seq[(Int,Int)] = Seq() // DaughterIonsDiag.IonsFrequencyOnSelectedScanPeakDetected(source,index,listSulfurMetabolites) + println(frequencyOfMz) /* Attention c est lent..... peut etre a faire en option !!*/ println("\n\n\n============== Twenty Ions frequency on selected Scan peak detected =========================") println(frequencyOfMz.reverse.slice(1,20).map { diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala index cc36613..74bf4db 100644 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala @@ -310,15 +310,15 @@ case object ScanLoader { p : PeakIdentification, nls : Seq[(String,Double)], /* name, distance */ precisionPeakDetection: Double = 0.9, - precisionRtTime : Double = 0.001 + precisionRtTime : Double = 0.02 ) : Map[String,Option[Double]] = { -/* - val scanMs2: Seq[IScan] = scansMs(source, index,start,end, 2) - .filter(scanMs2 => { - scanMs2.getRt == p.rt - }) -*/ - val scanMs2 : Seq[IScan]= Seq(source.parseScan(p.numScan, true)) + + val sc = source.parseScan(p.numScan, false) + val scanMs2: Seq[IScan] = ScanLoader.scansMs( + source, index, Some(sc.getRt - precisionRtTime), Some(sc.getRt + precisionRtTime), 2 + ) + + // val scanMs2 : Seq[IScan]= Seq(source.parseScan(p.numScan, true)) val mz = p.peaks.head.mz @@ -342,15 +342,14 @@ case object ScanLoader { p: PeakIdentification, dis: Seq[(String,Double)], /* name , mz */ precisionPeakDetection: Double = 0.3, - precisionRtTime: Double = 0.001 + precisionRtTime: Double = 0.02 ): Map[String, Option[Double]] = { -/* - val scanMs22 = scansMs(source, index,start,end, 2) - .filter(scanMs2 => { - (scanMs2.getRt - p.rt).abs < precisionRtTime - }) -*/ - val scanMs2 : Seq[IScan]= Seq(source.parseScan(p.numScan, true)) + + val sc = source.parseScan(p.numScan, false) + val scanMs2: Seq[IScan] = ScanLoader.scansMs( + source, index, Some(sc.getRt - precisionRtTime), Some(sc.getRt + precisionRtTime), 2 + ) + //val scanMs2 : Seq[IScan]= Seq(source.parseScan(p.numScan, true)) dis.map( di => { diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiag.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiag.scala index 7d0f399..edf6560 100644 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiag.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiag.scala @@ -1,8 +1,8 @@ package fr.inrae.metabolomics.p2m2.diagnostic -import fr.inrae.metabolomics.p2m2.builder.PeakIdentification +import fr.inrae.metabolomics.p2m2.builder.{PeakIdentification, ScanLoader} import umich.ms.datatypes.scan.IScan -import umich.ms.fileio.filetypes.mzxml.MZXMLFile +import umich.ms.fileio.filetypes.mzxml.{MZXMLFile, MZXMLIndex} case object DaughterIonsDiag { @@ -10,13 +10,25 @@ case object DaughterIonsDiag { def getPeaksWithIntensitiesNoNull( source: MZXMLFile, + index: MZXMLIndex, p : PeakIdentification ) : Seq[Int] = { - val scan : IScan = source.parseScan(p.numScan, true) - (scan.getSpectrum().getMZs() zip scan.getSpectrum().getIntensities()).filter{ - case (_,y) => y>0 - } map { case (x,_) => round(x).toInt } + val sc : IScan = source.parseScan(p.numScan, false) + + val step : Double = 0.02 + val scans : Seq[IScan] = ScanLoader.scansMs( + source,index,Some(sc.getRt()-step),Some(sc.getRt()+step),2 + ) + + + scans.flatMap { + iscan => + val scan : IScan = source.parseScan(iscan.getNum, true) + (scan.getSpectrum().getMZs() zip scan.getSpectrum().getIntensities()).filter { + case (_, y) => y > 0 + } map { case (x, _) => round(x).toInt } + } } /** @@ -24,9 +36,12 @@ case object DaughterIonsDiag { * @param peaks * @return */ - def IonsFrequencyOnSelectedScanPeakDetected(source: MZXMLFile,peaks : Seq[PeakIdentification]) : Seq[(Int,Int)] = { + def IonsFrequencyOnSelectedScanPeakDetected( + source: MZXMLFile, + index: MZXMLIndex, + peaks : Seq[PeakIdentification]) : Seq[(Int,Int)] = { peaks.map( - p => DaughterIonsDiag.getPeaksWithIntensitiesNoNull(source, p) + p => DaughterIonsDiag.getPeaksWithIntensitiesNoNull(source,index, p) ).foldLeft(Map[Int, Int]())( (acc: Map[Int, Int], v: Seq[Int]) => { v.map(p2 => acc.get(p2) match { diff --git a/src/test/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiagTest.scala b/src/test/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiagTest.scala index c15bd66..5660ec9 100644 --- a/src/test/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiagTest.scala +++ b/src/test/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiagTest.scala @@ -26,7 +26,7 @@ object DaughterIonsDiagTest extends TestSuite { */ v2.map( - p => DaughterIonsDiag.getPeaksWithIntensitiesNoNull(v._1,p) + p => DaughterIonsDiag.getPeaksWithIntensitiesNoNull(v._1, v._2,p) ).foldLeft(Map[Int,Int]())( (acc : Map[Int,Int],v : Seq[Int]) => { v.map( p2 => acc.get(p2) match { From b33b39e12a21a61fc117b0082b26f9c280d0e855 Mon Sep 17 00:00:00 2001 From: Olivier Filangi Date: Fri, 30 Sep 2022 15:19:21 +0200 Subject: [PATCH 03/11] fix duration test --- .../metabolomics/p2m2/diagnostic/DaughterIonsDiagTest.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/test/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiagTest.scala b/src/test/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiagTest.scala index 5660ec9..592a24d 100644 --- a/src/test/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiagTest.scala +++ b/src/test/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiagTest.scala @@ -13,8 +13,8 @@ object DaughterIonsDiagTest extends TestSuite { (ScanLoader.getScanIdxAndSpectrumM0M2WithDelta( v._1, v._2, - None, // RT start - None, // RT end + Some(2.0), // RT start + Some(2.2), // RT end thresholdAbundanceM0Filter = 0.1, intensityFilter = 0, precision = 0.01, deltaMOM2 = 1.996 From 8a91d0d5988572307d7116c88d3925b533dc143e Mon Sep 17 00:00:00 2001 From: Olivier Filangi Date: Wed, 5 Oct 2022 14:05:18 +0200 Subject: [PATCH 04/11] fix --- src/main/scala/Main.scala | 3 ++- .../metabolomics/p2m2/builder/Peak.scala | 2 +- .../p2m2/builder/ScanLoader.scala | 24 ++++++++++++------- .../metabolomics/p2m2/database/Chebi.scala | 2 +- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/main/scala/Main.scala b/src/main/scala/Main.scala index e72570c..cca665a 100644 --- a/src/main/scala/Main.scala +++ b/src/main/scala/Main.scala @@ -1,6 +1,7 @@ import fr.inrae.metabolomics.p2m2.`export`.CsvMetabolitesIdentificationFile import fr.inrae.metabolomics.p2m2.builder.{MetaboliteIdentification, PeakIdentification, ScanLoader} import fr.inrae.metabolomics.p2m2.config.ConfigReader +import fr.inrae.metabolomics.p2m2.diagnostic.DaughterIonsDiag import fr.inrae.metabolomics.p2m2.output.CsvMetabolitesIdentification import umich.ms.fileio.filetypes.mzxml.{MZXMLFile, MZXMLIndex} @@ -170,7 +171,7 @@ object Main extends App { case (mz, freq) => (mz.toString + " m/z -> " + freq) }.mkString(" , ")) - val listSulfurMetabolitesSelected: Seq[PeakIdentification] = //listSulfurMetabolites + val listSulfurMetabolitesSelected: Seq[PeakIdentification] = // listSulfurMetabolites ScanLoader.keepSimilarMzWithMaxAbundance(listSulfurMetabolites, config.precisionMzh) val m: MetaboliteIdentification = diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/builder/Peak.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/builder/Peak.scala index 2245486..4a06d82 100644 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/builder/Peak.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/builder/Peak.scala @@ -1,3 +1,3 @@ package fr.inrae.metabolomics.p2m2.builder -case class Peak(isotope : Int , intensity : Double, abundance : Double, mz : Double) +case class Peak(isotope : Int , intensity : Double, abundance : Double, mz : Double, rt: Double) diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala index 74bf4db..78d5f6f 100644 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala @@ -64,7 +64,9 @@ case object ScanLoader { isotopeNum, spectrum.getIntensities()(idx), spectrum.getIntensities()(idx) / scan.getBasePeakIntensity, - spectrum.getMZs()(idx))) + spectrum.getMZs()(idx), + scan.getRt + )) case None => None }}, scan.getRt @@ -189,13 +191,14 @@ case object ScanLoader { peaks.map { p => val mz = (p.peaks.head.mz * precisionMzh).round / precisionMzh.toDouble - (mz, p) - }.foldLeft(Map[Double, Seq[PeakIdentification]]()) { - case (acc, (mz, p)) if acc.contains(mz) => acc + (mz -> (acc(mz) ++ Seq(p))) - case (acc, (mz, p)) => acc + (mz -> Seq(p)) + val rt = (p.peaks.head.rt * 3).round / 3.toDouble // windows 0.3 sec + (mz, rt, p) + }.foldLeft(Map[(Double,Double), Seq[PeakIdentification]]()) { + case (acc, (mz, rt, p)) if acc.contains( (mz,rt) ) => acc + ( (mz,rt) -> (acc( (mz,rt) ) ++ Seq(p))) + case (acc, (mz, rt, p)) => acc + ( (mz,rt) -> Seq(p)) }.map { - case (_, listPeaks) => listPeaks.maxBy(_.peaks.head.abundance) + case ( (_,_), listPeaks) => listPeaks.maxBy(_.peaks.head.abundance) }.toSeq } @@ -288,10 +291,13 @@ case object ScanLoader { val scan2 = source.parseScan(scanMs2.getNum, true) scan2.getSpectrum match { case spectrum if (spectrum != null) => val v = (spectrum.findClosestMzIdx(mzSearch)) - if ((mzSearch - spectrum.getMZs()(v)).abs < precisionPeakDetection) + if ((mzSearch - spectrum.getMZs()(v)).abs < precisionPeakDetection) { // Some(spectrum.getIntensities()(v)) - Some(spectrum.getMZs()(v)) - else None + if (spectrum.getIntensities()(v)>0) + Some(spectrum.getMZs()(v)) + else + None + } else None case _ => None } }.sorted.lastOption // take the biggest value diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/database/Chebi.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/database/Chebi.scala index 63d3185..43a2665 100644 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/database/Chebi.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/database/Chebi.scala @@ -15,7 +15,7 @@ import scala.util.{Failure, Success, Try} case object Chebi { private val r = getClass.getResource("/glucosinolate_ChEBI.tsv") - private val massProton : Double = 0.007276 + private val massProton : Double = 1.007276 private val entries : Seq[Map[String,String]] = Try(r.getPath) match { case Success(_) => From a091bc32b09b83bd28215b577cce03f78df4d998 Mon Sep 17 00:00:00 2001 From: Olivier Filangi Date: Wed, 5 Oct 2022 14:48:07 +0200 Subject: [PATCH 05/11] add test --- .../builder/MetaboliteIdentification.scala | 13 +++--- .../MetaboliteIdentificationTest.scala | 39 +++++++++++++++++ .../p2m2/builder/ScanLoaderTest.scala | 15 +++++++ ...CsvMetabolitesIdentificationFileTest.scala | 42 +++++++++++++++++++ 4 files changed, 104 insertions(+), 5 deletions(-) create mode 100644 src/test/scala/fr/inrae/metabolomics/p2m2/builder/MetaboliteIdentificationTest.scala create mode 100644 src/test/scala/fr/inrae/metabolomics/p2m2/export/CsvMetabolitesIdentificationFileTest.scala diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/builder/MetaboliteIdentification.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/builder/MetaboliteIdentification.scala index d42ae7b..1110a26 100644 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/builder/MetaboliteIdentification.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/builder/MetaboliteIdentification.scala @@ -12,32 +12,35 @@ case class MetaboliteIdentification( nls : Seq[(String,Double)], dis : Seq[(String,Double)] ) { - def getInfo( p :PeakIdentification,precisionMzh : Int) : CsvMetabolitesIdentification = { + def getInfo( p :PeakIdentification,precisionMzh : Int) : Option[CsvMetabolitesIdentification] = p.peaks.nonEmpty match { + case true => val mz = p.peaks.map(p2 => (p2.mz*precisionMzh ).round / precisionMzh.toDouble ) val intensities = p.peaks.map(_.intensity) val abundance = p.peaks.map(_.abundance) - CsvMetabolitesIdentification( + Some(CsvMetabolitesIdentification( mz, intensities, abundance, p.rt, neutralLosses = ScanLoader.detectNeutralLoss(source,index,start,end,p,nls), daughterIons = ScanLoader.detectDaughterIons(source,index,start,end,p,dis) - ) + )) + case false => None } def getInfos(precisionMzh : Int): Seq[CsvMetabolitesIdentification] = { println("\n== detectNeutralLoss/detectDaughterIons == ") peaks.zipWithIndex - . map { + . flatMap { case (x,idx) => print(s"\r===>$idx/${peaks.size}") getInfo(x,precisionMzh) } /* remove entry if none neutral and none daughters ions detected or big abundance (>60%)*/ - .filter( csvM => (csvM.neutralLosses.values.flatten.nonEmpty && csvM.daughterIons.values.flatten.nonEmpty) ) + /* remove constraint with at least one DL and one NL */ + // .filter( csvM => (csvM.neutralLosses.values.flatten.nonEmpty && csvM.daughterIons.values.flatten.nonEmpty) ) .sortBy( x => (x.rt,x.mz.head) ) } } diff --git a/src/test/scala/fr/inrae/metabolomics/p2m2/builder/MetaboliteIdentificationTest.scala b/src/test/scala/fr/inrae/metabolomics/p2m2/builder/MetaboliteIdentificationTest.scala new file mode 100644 index 0000000..9318747 --- /dev/null +++ b/src/test/scala/fr/inrae/metabolomics/p2m2/builder/MetaboliteIdentificationTest.scala @@ -0,0 +1,39 @@ +package fr.inrae.metabolomics.p2m2.builder + +import fr.inrae.metabolomics.p2m2.builder.ScanLoaderTest.getClass +import utest.{TestSuite, Tests, test} + +import java.io.File + +object MetaboliteIdentificationTest extends TestSuite { + val v = ScanLoader.read(new File(getClass.getResource("/20181018-037.mzXML").getPath)) + + val tests: Tests = Tests { + test("instance") { + MetaboliteIdentification(v._1,v._2,None,None,Seq(),Seq(),Seq()) + } + + test("getInfo") { + MetaboliteIdentification(v._1,v._2,None,None,Seq(),Seq(),Seq()).getInfo(PeakIdentification(0,Seq(1501),Seq(),0.0),1) + } + + + + test("getInfo 2") { + MetaboliteIdentification(v._1, v._2, None, None, Seq(), Seq(), Seq()).getInfo( + PeakIdentification(3569, Seq(1501), + Seq(Peak(0,0.0,0,0,0)), 0.0), 1) + } + + test("getInfos") { + MetaboliteIdentification(v._1, v._2, None, None, Seq(), Seq(), Seq()).getInfos(1) + } + + test("getInfos 2") { + MetaboliteIdentification(v._1, v._2, None, None, + Seq(PeakIdentification(3569, Seq(1501), Seq(Peak(0, 0.0, 0, 0, 0)), 0.0)), + Seq(), Seq()).getInfos(1) + } + + } +} diff --git a/src/test/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoaderTest.scala b/src/test/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoaderTest.scala index b22ef5b..682e505 100644 --- a/src/test/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoaderTest.scala +++ b/src/test/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoaderTest.scala @@ -68,6 +68,21 @@ object ScanLoaderTest extends TestSuite { p.foreach(x => println(ScanLoader.detectNeutralLoss(v._1, v._2,x, 80.0)))*/ } + test("calculBackgroundNoisePeak") { + val v = read + ScanLoader.calculBackgroundNoisePeak( + v._1, + v._2, + Some(1.0), // RT start + Some(1.3), // RT end + ) + } + + test("keepSimilarMzWithMaxAbundance") { + ScanLoader.keepSimilarMzWithMaxAbundance( + Seq(PeakIdentification(3569, Seq(1501), Seq(Peak(0, 0.0, 0, 0, 0)), 0.0)),1 + ) + } } diff --git a/src/test/scala/fr/inrae/metabolomics/p2m2/export/CsvMetabolitesIdentificationFileTest.scala b/src/test/scala/fr/inrae/metabolomics/p2m2/export/CsvMetabolitesIdentificationFileTest.scala new file mode 100644 index 0000000..e58c4c4 --- /dev/null +++ b/src/test/scala/fr/inrae/metabolomics/p2m2/export/CsvMetabolitesIdentificationFileTest.scala @@ -0,0 +1,42 @@ +package fr.inrae.metabolomics.p2m2.`export` + +import fr.inrae.metabolomics.p2m2.builder.ScanLoader +import fr.inrae.metabolomics.p2m2.config.ConfigReader +import fr.inrae.metabolomics.p2m2.output.CsvMetabolitesIdentification + +import utest.{TestSuite, Tests, test} + +import java.io.File +import scala.io.Source + +object CsvMetabolitesIdentificationFileTest extends TestSuite { + val v = ScanLoader.read(new File(getClass.getResource("/20181018-037.mzXML").getPath)) + + val tests: Tests = Tests { + test("instance") { + val confJson = ConfigReader.read( + Source.fromInputStream( + getClass.getResource("/default.json") + .openStream()).getLines().mkString) + + val f = File.createTempFile("test",".csv") + + CsvMetabolitesIdentificationFile.build( + Seq( + CsvMetabolitesIdentification( + mz = Seq(1.0, 1.0), + intensity = Seq(1.0, 1.0), + abundance = Seq(1.0, 1.0), + rt = 0.1, + neutralLosses = Map(), + daughterIons = Map() + ) + ), + familyMetabolite = "Glucosinolate", + configJson = confJson, + out = f + ) + } + } + + } From faef0cdfcd8744ec1552eb0f2b0168ac0c7e74dd Mon Sep 17 00:00:00 2001 From: Olivier Filangi Date: Wed, 5 Oct 2022 15:54:25 +0200 Subject: [PATCH 06/11] - add column Hyp. identitifcation peptide - delete filter one DI one NL - add column "count of NL+DI" close #4 close #5 --- .../CsvMetabolitesIdentificationFile.scala | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/export/CsvMetabolitesIdentificationFile.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/export/CsvMetabolitesIdentificationFile.scala index d871e51..94ee3e9 100644 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/export/CsvMetabolitesIdentificationFile.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/export/CsvMetabolitesIdentificationFile.scala @@ -7,6 +7,9 @@ import fr.inrae.metabolomics.p2m2.output.CsvMetabolitesIdentification import java.io.{BufferedWriter, File, FileWriter} case object CsvMetabolitesIdentificationFile { + + val mzCoreStructureGlucosinolate : Double = 317.995896 + def build(list : Seq[CsvMetabolitesIdentification], familyMetabolite : String, configJson : ConfigReader, out: File) : Unit = { @@ -23,8 +26,12 @@ case object CsvMetabolitesIdentificationFile { bw.write(s"CHEBI ID;") bw.write(s"BRASSICA ID;") bw.write("RT;") + bw.write("Hyp. Identification Sulfur Polypeptide;") + bw.write("Nb (NL+DI);") + neutralLosses.foreach { name => bw.write(s"NL_$name;")} daughterIons.foreach { name => bw.write(s"DI_$name;")} + bw.write("\n") list.foreach( @@ -37,8 +44,15 @@ case object CsvMetabolitesIdentificationFile { bw.write(Chebi.getEntries(metabolitesIdentificationId.mz.head).map( entry => entry("ID")).mkString(",")+";") bw.write(configJson.getEntriesBaseRef(familyMetabolite,metabolitesIdentificationId.mz.head).mkString(",")+";") - bw.write(s"${metabolitesIdentificationId.rt};") + + if ( metabolitesIdentificationId.mz.head < mzCoreStructureGlucosinolate ) { + bw.write("*;") + } else + bw.write(";") + val c : Int = (metabolitesIdentificationId.neutralLosses.values.flatten.size)+(metabolitesIdentificationId.daughterIons.values.flatten.size) + bw.write(s"$c;") + neutralLosses.foreach { name => bw.write(s"${metabolitesIdentificationId.neutralLosses(name).getOrElse("")};")} daughterIons.foreach { name => bw.write(s"${metabolitesIdentificationId.daughterIons(name).getOrElse("")};")} bw.write("\n") From fc0a51913201c69bde30781941d0ce742903eaf7 Mon Sep 17 00:00:00 2001 From: Olivier Filangi Date: Wed, 5 Oct 2022 16:25:35 +0200 Subject: [PATCH 07/11] add coverage --- build.sbt | 1 + .../inrae/metabolomics/p2m2}/Main.scala | 137 +++++++++--------- .../fr/inrae/metabolomics/p2m2/MainTest.scala | 52 +++++++ 3 files changed, 122 insertions(+), 68 deletions(-) rename src/main/scala/{ => fr/inrae/metabolomics/p2m2}/Main.scala (57%) create mode 100644 src/test/scala/fr/inrae/metabolomics/p2m2/MainTest.scala diff --git a/build.sbt b/build.sbt index 6630063..05a6d0f 100644 --- a/build.sbt +++ b/build.sbt @@ -23,6 +23,7 @@ libraryDependencies ++= Seq( "com.lihaoyi" %% "ujson" % "2.0.0", "org.scala-lang.modules" %% "scala-parser-combinators" % "2.1.1", "com.lihaoyi" %% "utest" % "0.8.1" % Test, + "org.slf4j" % "slf4j-simple" % "2.0.3" % Test, ) credentials += { diff --git a/src/main/scala/Main.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/Main.scala similarity index 57% rename from src/main/scala/Main.scala rename to src/main/scala/fr/inrae/metabolomics/p2m2/Main.scala index cca665a..53ebd4a 100644 --- a/src/main/scala/Main.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/Main.scala @@ -1,7 +1,8 @@ +package fr.inrae.metabolomics.p2m2 + import fr.inrae.metabolomics.p2m2.`export`.CsvMetabolitesIdentificationFile import fr.inrae.metabolomics.p2m2.builder.{MetaboliteIdentification, PeakIdentification, ScanLoader} import fr.inrae.metabolomics.p2m2.config.ConfigReader -import fr.inrae.metabolomics.p2m2.diagnostic.DaughterIonsDiag import fr.inrae.metabolomics.p2m2.output.CsvMetabolitesIdentification import umich.ms.fileio.filetypes.mzxml.{MZXMLFile, MZXMLIndex} @@ -13,16 +14,16 @@ object Main extends App { import scopt.OParser case class Config( - mzfiles : Seq[File] = Seq(), - jsonFamilyMetabolitesDetection : Option[File] = None, - thresholdIntensityFilter : Option[Int] = None, - thresholdAbundanceM0Filter : Double = 0.1, - overrepresentedPeakFilter : Int = 800, - startRT : Option[Double] = None, - endRT : Option[Double] = None, - precisionMzh : Int = 1000, - toleranceMz : Double = 0.01, - outfile : Option[File] = None, + mzfiles: Seq[File] = Seq(), + jsonFamilyMetabolitesDetection: Option[File] = None, + thresholdIntensityFilter: Option[Int] = None, + thresholdAbundanceM0Filter: Double = 0.1, + overrepresentedPeakFilter: Int = 800, + startRT: Option[Double] = None, + endRT: Option[Double] = None, + precisionMzh: Int = 1000, + toleranceMz: Double = 0.01, + outfile: Option[File] = None, verbose: Boolean = false, debug: Boolean = false ) @@ -37,11 +38,11 @@ object Main extends App { .optional() .action((x, c) => c.copy(jsonFamilyMetabolitesDetection = Some(x))) .text(s"json configuration to detect metabolite family."), - opt[Int]('i',"thresholdIntensityFilter") + opt[Int]('i', "thresholdIntensityFilter") .optional() .action((x, c) => c.copy(thresholdIntensityFilter = Some(x))) .text(s"Keep ions above a x intensity (calculation on start-up time)"), - opt[Int]('p',"overrepresentedPeakFilter") + opt[Int]('p', "overrepresentedPeakFilter") .optional() .action((x, c) => c.copy(overrepresentedPeakFilter = x)) .text(s"filter about over represented peaks. default ${Config().overrepresentedPeakFilter}"), @@ -57,11 +58,11 @@ object Main extends App { .optional() .action((x, c) => c.copy(precisionMzh = x)) .text(s"precision/rounded Mzh (number to the right of the decimal point) . ${Config().precisionMzh}"), - opt[Double]('t',"toleranceMz") + opt[Double]('t', "toleranceMz") .optional() .action((x, c) => c.copy(toleranceMz = x)) .text(s"tolerance accepted. ${Config().toleranceMz}"), - opt[File]('o',"outputFile") + opt[File]('o', "outputFile") .optional() .action((x, c) => c.copy(outfile = Some(x))) .text(s"output path file."), @@ -92,12 +93,12 @@ object Main extends App { // arguments are bad, error message will have been displayed } - def process(config : Config): Unit = { + def process(config: Config): Unit = { val confJson = config.jsonFamilyMetabolitesDetection match { case Some(jsonFilePath) => - val s = Source.fromFile (jsonFilePath) - val res = ConfigReader.read(s.getLines ().mkString) + val s = Source.fromFile(jsonFilePath) + val res = ConfigReader.read(s.getLines().mkString) s.close() res case None => @@ -129,63 +130,63 @@ object Main extends App { confJson.daughterIons(family) ) } - val f = config.outfile.getOrElse(new File( s"$family.csv")) + val f = config.outfile.getOrElse(new File(s"$family.csv")) f.delete() - CsvMetabolitesIdentificationFile.build(values,family,confJson,f) + CsvMetabolitesIdentificationFile.build(values, family, confJson, f) println(s"========= check ${f.getPath} ===============") }) - } + } - def analyse_metabolite( - config: Config, - source: MZXMLFile, - index: MZXMLIndex, - intensityFilter: Int, - deltaMp0Mp2: Double, - numberSulfurMin: Double, - neutralLoss: Map[String, Double], - daughterIons: Map[String, Double] - ): Seq[CsvMetabolitesIdentification] = { - - val listSulfurMetabolites: Seq[PeakIdentification] = - ScanLoader. - getScanIdxAndSpectrumM0M2WithDelta( - source, - index, - config.startRT, - config.endRT, - config.thresholdAbundanceM0Filter, - intensityFilter, - filteringOnNbSulfur = numberSulfurMin.toInt, - config.toleranceMz, - deltaMOM2 = deltaMp0Mp2) - - /* Diagnostics : Ions frequency on selected Scan peak detected ! */ - - val frequencyOfMz : Seq[(Int,Int)] = Seq() // DaughterIonsDiag.IonsFrequencyOnSelectedScanPeakDetected(source,index,listSulfurMetabolites) - println(frequencyOfMz) - /* Attention c est lent..... peut etre a faire en option !!*/ - println("\n\n\n============== Twenty Ions frequency on selected Scan peak detected =========================") - println(frequencyOfMz.reverse.slice(1,20).map { - case (mz, freq) => (mz.toString + " m/z -> " + freq) - }.mkString(" , ")) - - val listSulfurMetabolitesSelected: Seq[PeakIdentification] = // listSulfurMetabolites - ScanLoader.keepSimilarMzWithMaxAbundance(listSulfurMetabolites, config.precisionMzh) - - val m: MetaboliteIdentification = - ScanLoader.filterOverRepresentedPeak( + def analyse_metabolite( + config: Config, + source: MZXMLFile, + index: MZXMLIndex, + intensityFilter: Int, + deltaMp0Mp2: Double, + numberSulfurMin: Double, + neutralLoss: Map[String, Double], + daughterIons: Map[String, Double] + ): Seq[CsvMetabolitesIdentification] = { + + val listSulfurMetabolites: Seq[PeakIdentification] = + ScanLoader. + getScanIdxAndSpectrumM0M2WithDelta( source, index, config.startRT, config.endRT, - listSulfurMetabolitesSelected, + config.thresholdAbundanceM0Filter, intensityFilter, - config.overrepresentedPeakFilter, - neutralLoss.toSeq, - daughterIons.toSeq - ) - m.getInfos(config.precisionMzh) - } -} \ No newline at end of file + filteringOnNbSulfur = numberSulfurMin.toInt, + config.toleranceMz, + deltaMOM2 = deltaMp0Mp2) + + /* Diagnostics : Ions frequency on selected Scan peak detected ! */ + + val frequencyOfMz: Seq[(Int, Int)] = Seq() // DaughterIonsDiag.IonsFrequencyOnSelectedScanPeakDetected(source,index,listSulfurMetabolites) + println(frequencyOfMz) + /* Attention c est lent..... peut etre a faire en option !!*/ + println("\n\n\n============== Twenty Ions frequency on selected Scan peak detected =========================") + println(frequencyOfMz.reverse.slice(1, 20).map { + case (mz, freq) => (mz.toString + " m/z -> " + freq) + }.mkString(" , ")) + + val listSulfurMetabolitesSelected: Seq[PeakIdentification] = // listSulfurMetabolites + ScanLoader.keepSimilarMzWithMaxAbundance(listSulfurMetabolites, config.precisionMzh) + + val m: MetaboliteIdentification = + ScanLoader.filterOverRepresentedPeak( + source, + index, + config.startRT, + config.endRT, + listSulfurMetabolitesSelected, + intensityFilter, + config.overrepresentedPeakFilter, + neutralLoss.toSeq, + daughterIons.toSeq + ) + m.getInfos(config.precisionMzh) + } +} diff --git a/src/test/scala/fr/inrae/metabolomics/p2m2/MainTest.scala b/src/test/scala/fr/inrae/metabolomics/p2m2/MainTest.scala new file mode 100644 index 0000000..dbb6b99 --- /dev/null +++ b/src/test/scala/fr/inrae/metabolomics/p2m2/MainTest.scala @@ -0,0 +1,52 @@ +package fr.inrae.metabolomics.p2m2 + +import fr.inrae.metabolomics.p2m2.Main.getClass +import utest.{TestSuite, Tests, test} + +import java.io.File +import scala.util.{Failure, Success, Try} + +object MainTest extends TestSuite { + + val tests: Tests = Tests { + + test("Main - help") { + /* + Try(fr.inrae.metabolomics.p2m2.Main.main(Array("--help"))) match { + case Success(_) => //assert(true) + case Failure(e) => //println(e); assert(false) + }*/ + } + + + test("Main") { + Try(fr.inrae.metabolomics.p2m2.Main.main(Array( + "-s","1.0", + "-e","1.5", + "-i","1000", + "-p","500", + "-m","100", + "-t","0.01", + "-o","test.csv", + getClass.getResource("/20181018-037.mzXML").getPath))) match { + case Success(_) => new File("test.csv").delete(); assert(true) + case Failure(e) => println(e); assert(false) + } + + } + + test("Main 2") { + Try(fr.inrae.metabolomics.p2m2.Main.main(Array( + "-j",getClass.getResource("/default.json").getPath, + "-s", "1.0", + "-e", "1.5", + "-o", "test.csv", + getClass.getResource("/20181018-037.mzXML").getPath))) match { + case Success(_) => new File("test.csv").delete(); assert(true) + case Failure(e) => println(e); assert(false) + } + } + + } + +} From 092e7eeef0854e9943c7cedd5ba5027784ca6b8d Mon Sep 17 00:00:00 2001 From: Olivier Filangi Date: Thu, 6 Oct 2022 15:42:52 +0200 Subject: [PATCH 08/11] add Diagnostics Fragment --- src/main/resources/default.json | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/src/main/resources/default.json b/src/main/resources/default.json index 9f41631..ced219e 100644 --- a/src/main/resources/default.json +++ b/src/main/resources/default.json @@ -4,18 +4,29 @@ "numberSulfurMin" : 2, "neutralLoss" : { - "gluconolactone" : 178.0, - "sulfureTrioxide" : 80.0, - "anhydroglucose" : 162.0, "thioglucose_s03" : 242.0, + "glucosinolate_223" : 223.0, "thioglucose" : 196.0, - "glucosinolate_223" : 223.0 + "gluconolactone" : 178.0, + "RCNO4S2-" : 163.0, + "anhydroglucose" : 162.0, + "sulfureTrioxide" : 80.0 }, "daughterIons" : { - "C6H11O9S_259" : 259.0, "C6H11O8S2_275" : 275.0, + "C6H11O9S_259" : 259.0, + "C6H10O8S-_241" : 242.0, "C6H9NO8S_241" : 241.0, - "C6H11O7S_227" : 227.0 + "C6H11O7S-_227" : 227.0, + "C6H11O5S-_195" : 195.03, + "C6H11O2-_153" : 163.0, + "C2H4O5NS-_153" : 153.98, + "C2H3O5S-_138" : 138.97, + "C2H2O4S-_135" : 135.97, + "HO4S2-_128" : 128.93, + "HSO4-_97" : 96.95, + "SO4-_95" : 96.0, + "C2H3OS-" : 74.99 }, "databaseReference" : { "G01" : 406.029, From 93b1b5af04271f079b1dfbb2d36ddcc1e43fd50c Mon Sep 17 00:00:00 2001 From: Olivier Filangi Date: Thu, 6 Oct 2022 15:49:41 +0200 Subject: [PATCH 09/11] remove redundance rt persitence --- src/main/scala/fr/inrae/metabolomics/p2m2/builder/Peak.scala | 2 +- .../fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala | 5 ++--- .../fr/inrae/metabolomics/p2m2/config/ConfigReader.scala | 1 - .../metabolomics/p2m2/diagnostic/DaughterIonsDiag.scala | 4 ++-- .../p2m2/builder/MetaboliteIdentificationTest.scala | 4 ++-- .../fr/inrae/metabolomics/p2m2/builder/ScanLoaderTest.scala | 2 +- 6 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/builder/Peak.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/builder/Peak.scala index 4a06d82..2245486 100644 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/builder/Peak.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/builder/Peak.scala @@ -1,3 +1,3 @@ package fr.inrae.metabolomics.p2m2.builder -case class Peak(isotope : Int , intensity : Double, abundance : Double, mz : Double, rt: Double) +case class Peak(isotope : Int , intensity : Double, abundance : Double, mz : Double) diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala index 78d5f6f..f1168b2 100644 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala @@ -64,8 +64,7 @@ case object ScanLoader { isotopeNum, spectrum.getIntensities()(idx), spectrum.getIntensities()(idx) / scan.getBasePeakIntensity, - spectrum.getMZs()(idx), - scan.getRt + spectrum.getMZs()(idx) )) case None => None }}, @@ -191,7 +190,7 @@ case object ScanLoader { peaks.map { p => val mz = (p.peaks.head.mz * precisionMzh).round / precisionMzh.toDouble - val rt = (p.peaks.head.rt * 3).round / 3.toDouble // windows 0.3 sec + val rt = (p.rt * 3).round / 3.toDouble // windows 0.3 sec (mz, rt, p) }.foldLeft(Map[(Double,Double), Seq[PeakIdentification]]()) { case (acc, (mz, rt, p)) if acc.contains( (mz,rt) ) => acc + ( (mz,rt) -> (acc( (mz,rt) ) ++ Seq(p))) diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/config/ConfigReader.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/config/ConfigReader.scala index bc274e2..9c22f76 100644 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/config/ConfigReader.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/config/ConfigReader.scala @@ -15,7 +15,6 @@ case object ConfigReader { } def read(config : String) : ConfigReader = { - //Source.fromFile("./glucosinolate.json").getLines().mkString val u = ujson.read(config) val metabolites: Map[String, Map[Params.Value, String]] = u.obj.map( diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiag.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiag.scala index edf6560..81c48d4 100644 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiag.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/diagnostic/DaughterIonsDiag.scala @@ -18,14 +18,14 @@ case object DaughterIonsDiag { val step : Double = 0.02 val scans : Seq[IScan] = ScanLoader.scansMs( - source,index,Some(sc.getRt()-step),Some(sc.getRt()+step),2 + source,index,Some(sc.getRt-step),Some(sc.getRt+step),2 ) scans.flatMap { iscan => val scan : IScan = source.parseScan(iscan.getNum, true) - (scan.getSpectrum().getMZs() zip scan.getSpectrum().getIntensities()).filter { + (scan.getSpectrum.getMZs zip scan.getSpectrum.getIntensities()).filter { case (_, y) => y > 0 } map { case (x, _) => round(x).toInt } } diff --git a/src/test/scala/fr/inrae/metabolomics/p2m2/builder/MetaboliteIdentificationTest.scala b/src/test/scala/fr/inrae/metabolomics/p2m2/builder/MetaboliteIdentificationTest.scala index 9318747..169dec6 100644 --- a/src/test/scala/fr/inrae/metabolomics/p2m2/builder/MetaboliteIdentificationTest.scala +++ b/src/test/scala/fr/inrae/metabolomics/p2m2/builder/MetaboliteIdentificationTest.scala @@ -22,7 +22,7 @@ object MetaboliteIdentificationTest extends TestSuite { test("getInfo 2") { MetaboliteIdentification(v._1, v._2, None, None, Seq(), Seq(), Seq()).getInfo( PeakIdentification(3569, Seq(1501), - Seq(Peak(0,0.0,0,0,0)), 0.0), 1) + Seq(Peak(0,0.0,0,0)), 0.0), 1) } test("getInfos") { @@ -31,7 +31,7 @@ object MetaboliteIdentificationTest extends TestSuite { test("getInfos 2") { MetaboliteIdentification(v._1, v._2, None, None, - Seq(PeakIdentification(3569, Seq(1501), Seq(Peak(0, 0.0, 0, 0, 0)), 0.0)), + Seq(PeakIdentification(3569, Seq(1501), Seq(Peak(0, 0.0, 0, 0)), 0.0)), Seq(), Seq()).getInfos(1) } diff --git a/src/test/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoaderTest.scala b/src/test/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoaderTest.scala index 682e505..020d0f9 100644 --- a/src/test/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoaderTest.scala +++ b/src/test/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoaderTest.scala @@ -80,7 +80,7 @@ object ScanLoaderTest extends TestSuite { test("keepSimilarMzWithMaxAbundance") { ScanLoader.keepSimilarMzWithMaxAbundance( - Seq(PeakIdentification(3569, Seq(1501), Seq(Peak(0, 0.0, 0, 0, 0)), 0.0)),1 + Seq(PeakIdentification(3569, Seq(1501), Seq(Peak(0, 0.0, 0, 0)), 0.0)),1 ) } From eb805c10eb3cd27c86af9cf74eb4adf6dca6a423 Mon Sep 17 00:00:00 2001 From: Olivier Filangi Date: Thu, 6 Oct 2022 17:04:57 +0200 Subject: [PATCH 10/11] unactive serach ions according minimum mz --- src/main/resources/default.json | 4 +- .../fr/inrae/metabolomics/p2m2/Main.scala | 4 +- .../builder/MetaboliteIdentification.scala | 47 ++++++++++++------- .../p2m2/builder/ScanLoader.scala | 5 +- .../p2m2/config/ConfigReader.scala | 7 +-- .../CsvMetabolitesIdentificationFile.scala | 17 ++++--- .../MetaboliteIdentificationTest.scala | 9 ++-- .../p2m2/config/ConfigReaderTest.scala | 1 + 8 files changed, 57 insertions(+), 37 deletions(-) diff --git a/src/main/resources/default.json b/src/main/resources/default.json index ced219e..01a0653 100644 --- a/src/main/resources/default.json +++ b/src/main/resources/default.json @@ -2,8 +2,8 @@ "Glucosinolate" : { "deltaMp0Mp2" : 1.996, "numberSulfurMin" : 2, - "neutralLoss" : - { + "minMzCoreStructure" : 317.995896, + "neutralLoss" : { "thioglucose_s03" : 242.0, "glucosinolate_223" : 223.0, "thioglucose" : 196.0, diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/Main.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/Main.scala index 53ebd4a..ca9c1fe 100644 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/Main.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/Main.scala @@ -126,6 +126,7 @@ object Main extends App { intensityFilter, confJson.deltaMp0Mp2(family), confJson.numberSulfurMin(family), + confJson.minMzCoreStructure(family), confJson.neutralLoss(family), confJson.daughterIons(family) ) @@ -145,6 +146,7 @@ object Main extends App { intensityFilter: Int, deltaMp0Mp2: Double, numberSulfurMin: Double, + mzCoreStructure : Double, neutralLoss: Map[String, Double], daughterIons: Map[String, Double] ): Seq[CsvMetabolitesIdentification] = { @@ -187,6 +189,6 @@ object Main extends App { neutralLoss.toSeq, daughterIons.toSeq ) - m.getInfos(config.precisionMzh) + m.findDiagnosticIonsAndNeutralLosses(config.precisionMzh,mzCoreStructure) } } diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/builder/MetaboliteIdentification.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/builder/MetaboliteIdentification.scala index 1110a26..b35ec63 100644 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/builder/MetaboliteIdentification.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/builder/MetaboliteIdentification.scala @@ -12,35 +12,48 @@ case class MetaboliteIdentification( nls : Seq[(String,Double)], dis : Seq[(String,Double)] ) { - def getInfo( p :PeakIdentification,precisionMzh : Int) : Option[CsvMetabolitesIdentification] = p.peaks.nonEmpty match { + def getInfo( p :PeakIdentification,precisionMzh : Int, mzCoreStructure : Double) : Option[CsvMetabolitesIdentification] = p.peaks.nonEmpty match { case true => - val mz = p.peaks.map(p2 => (p2.mz*precisionMzh ).round / precisionMzh.toDouble ) - val intensities = p.peaks.map(_.intensity) - val abundance = p.peaks.map(_.abundance) + val mz = p.peaks.map(p2 => (p2.mz*precisionMzh ).round / precisionMzh.toDouble ) + val intensities = p.peaks.map(_.intensity) + val abundance = p.peaks.map(_.abundance) - Some(CsvMetabolitesIdentification( - mz, - intensities, - abundance, - p.rt, - neutralLosses = ScanLoader.detectNeutralLoss(source,index,start,end,p,nls), - daughterIons = ScanLoader.detectDaughterIons(source,index,start,end,p,dis) - )) + if ( p.peaks.head.mz >= mzCoreStructure ) + Some(CsvMetabolitesIdentification( + mz, + intensities, + abundance, + p.rt, + neutralLosses = ScanLoader.detectNeutralLoss(source,index,start,end,p,nls), + daughterIons = ScanLoader.detectDaughterIons(source,index,start,end,p,dis) + )) + else + Some(CsvMetabolitesIdentification( + mz, + intensities, + abundance, + p.rt, + neutralLosses = Map(), + daughterIons = Map() + )) case false => None } - def getInfos(precisionMzh : Int): Seq[CsvMetabolitesIdentification] = { + /** + * + * @param precisionMzh precision of mzh + * @param mzCoreStructure minimum size of a metabolite according param family + * @return + */ + def findDiagnosticIonsAndNeutralLosses(precisionMzh : Int, mzCoreStructure : Double): Seq[CsvMetabolitesIdentification] = { println("\n== detectNeutralLoss/detectDaughterIons == ") peaks.zipWithIndex . flatMap { case (x,idx) => print(s"\r===>$idx/${peaks.size}") - getInfo(x,precisionMzh) + getInfo(x,precisionMzh,mzCoreStructure) } - /* remove entry if none neutral and none daughters ions detected or big abundance (>60%)*/ - /* remove constraint with at least one DL and one NL */ - // .filter( csvM => (csvM.neutralLosses.values.flatten.nonEmpty && csvM.daughterIons.values.flatten.nonEmpty) ) .sortBy( x => (x.rt,x.mz.head) ) } } diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala index f1168b2..4d16d55 100644 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/builder/ScanLoader.scala @@ -182,7 +182,7 @@ case object ScanLoader { } /** - * Merge all M/z and keep the Ions with the maximum abundance + * Merge all M/z in a short Windows RT and keep the Ions with the maximum abundance * @param peaks * @return */ @@ -190,7 +190,7 @@ case object ScanLoader { peaks.map { p => val mz = (p.peaks.head.mz * precisionMzh).round / precisionMzh.toDouble - val rt = (p.rt * 3).round / 3.toDouble // windows 0.3 sec + val rt = (p.rt * 3).round / 3.toDouble // windows 0.6 sec ... to check (mz, rt, p) }.foldLeft(Map[(Double,Double), Seq[PeakIdentification]]()) { case (acc, (mz, rt, p)) if acc.contains( (mz,rt) ) => acc + ( (mz,rt) -> (acc( (mz,rt) ) ++ Seq(p))) @@ -291,7 +291,6 @@ case object ScanLoader { scan2.getSpectrum match { case spectrum if (spectrum != null) => val v = (spectrum.findClosestMzIdx(mzSearch)) if ((mzSearch - spectrum.getMZs()(v)).abs < precisionPeakDetection) { - // Some(spectrum.getIntensities()(v)) if (spectrum.getIntensities()(v)>0) Some(spectrum.getMZs()(v)) else diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/config/ConfigReader.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/config/ConfigReader.scala index 9c22f76..598b32c 100644 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/config/ConfigReader.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/config/ConfigReader.scala @@ -9,8 +9,7 @@ case object ConfigReader { val deltaMp0Mp2, numberSulfurMin, - neutralLoss, - daughterIons + minMzCoreStructure = Value } @@ -20,7 +19,8 @@ case object ConfigReader { val metabolites: Map[String, Map[Params.Value, String]] = u.obj.map( k => k._1 -> Map( Params.deltaMp0Mp2 -> k._2("deltaMp0Mp2").value.toString, - Params.numberSulfurMin -> k._2("numberSulfurMin").value.toString + Params.numberSulfurMin -> k._2("numberSulfurMin").value.toString, + Params.minMzCoreStructure -> k._2("minMzCoreStructure").value.toString ) ).toMap @@ -51,6 +51,7 @@ case class ConfigReader( def metabolites : Seq[String] = metabolitesMap.keys.toSeq def deltaMp0Mp2(m: String) : Double = metabolitesMap(m)(Params.deltaMp0Mp2).toString.toDouble def numberSulfurMin(m: String) : Double = metabolitesMap(m)(Params.numberSulfurMin).toString.toDouble + def minMzCoreStructure(m: String) : Double = metabolitesMap(m)(Params.minMzCoreStructure).toString.toDouble def neutralLoss(m: String) : Map[String,Double] = nl(m) def daughterIons(m: String) : Map[String,Double] = di(m) def getEntriesBaseRef(m: String,monoIsotopicMassSearch: Double, tolerance: Double = 0.01): Seq[String] = { diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/export/CsvMetabolitesIdentificationFile.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/export/CsvMetabolitesIdentificationFile.scala index 94ee3e9..ce485bd 100644 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/export/CsvMetabolitesIdentificationFile.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/export/CsvMetabolitesIdentificationFile.scala @@ -8,15 +8,13 @@ import java.io.{BufferedWriter, File, FileWriter} case object CsvMetabolitesIdentificationFile { - val mzCoreStructureGlucosinolate : Double = 317.995896 - def build(list : Seq[CsvMetabolitesIdentification], familyMetabolite : String, configJson : ConfigReader, out: File) : Unit = { if ( list.nonEmpty ) { val size = list.last.mz.length - val neutralLosses = list.last.neutralLosses.keys - val daughterIons = list.last.daughterIons.keys + val neutralLosses = configJson.neutralLoss(familyMetabolite).keys + val daughterIons = configJson.daughterIons(familyMetabolite).keys val bw = new BufferedWriter(new FileWriter(out)) @@ -46,15 +44,20 @@ case object CsvMetabolitesIdentificationFile { bw.write(configJson.getEntriesBaseRef(familyMetabolite,metabolitesIdentificationId.mz.head).mkString(",")+";") bw.write(s"${metabolitesIdentificationId.rt};") - if ( metabolitesIdentificationId.mz.head < mzCoreStructureGlucosinolate ) { + if ( metabolitesIdentificationId.mz.head < configJson.minMzCoreStructure(familyMetabolite) ) { bw.write("*;") } else bw.write(";") + val c : Int = (metabolitesIdentificationId.neutralLosses.values.flatten.size)+(metabolitesIdentificationId.daughterIons.values.flatten.size) bw.write(s"$c;") - neutralLosses.foreach { name => bw.write(s"${metabolitesIdentificationId.neutralLosses(name).getOrElse("")};")} - daughterIons.foreach { name => bw.write(s"${metabolitesIdentificationId.daughterIons(name).getOrElse("")};")} + neutralLosses + .map( name => metabolitesIdentificationId.neutralLosses.getOrElse(name,None).getOrElse("")) + .foreach { value => bw.write(s"$value;")} + daughterIons + .map( name => metabolitesIdentificationId.daughterIons.getOrElse(name,None).getOrElse("")) + .foreach { value => bw.write(s"$value;")} bw.write("\n") } ) diff --git a/src/test/scala/fr/inrae/metabolomics/p2m2/builder/MetaboliteIdentificationTest.scala b/src/test/scala/fr/inrae/metabolomics/p2m2/builder/MetaboliteIdentificationTest.scala index 169dec6..08bc53c 100644 --- a/src/test/scala/fr/inrae/metabolomics/p2m2/builder/MetaboliteIdentificationTest.scala +++ b/src/test/scala/fr/inrae/metabolomics/p2m2/builder/MetaboliteIdentificationTest.scala @@ -14,7 +14,7 @@ object MetaboliteIdentificationTest extends TestSuite { } test("getInfo") { - MetaboliteIdentification(v._1,v._2,None,None,Seq(),Seq(),Seq()).getInfo(PeakIdentification(0,Seq(1501),Seq(),0.0),1) + MetaboliteIdentification(v._1,v._2,None,None,Seq(),Seq(),Seq()).getInfo(PeakIdentification(0,Seq(1501),Seq(),0.0),1,0.0) } @@ -22,17 +22,18 @@ object MetaboliteIdentificationTest extends TestSuite { test("getInfo 2") { MetaboliteIdentification(v._1, v._2, None, None, Seq(), Seq(), Seq()).getInfo( PeakIdentification(3569, Seq(1501), - Seq(Peak(0,0.0,0,0)), 0.0), 1) + Seq(Peak(0,0.0,0,0)), 0.0), 1,0.0) } test("getInfos") { - MetaboliteIdentification(v._1, v._2, None, None, Seq(), Seq(), Seq()).getInfos(1) + MetaboliteIdentification(v._1, v._2, None, None, Seq(), Seq(), Seq()).findDiagnosticIonsAndNeutralLosses(1,0.0) } test("getInfos 2") { MetaboliteIdentification(v._1, v._2, None, None, Seq(PeakIdentification(3569, Seq(1501), Seq(Peak(0, 0.0, 0, 0)), 0.0)), - Seq(), Seq()).getInfos(1) + Seq(), Seq()) + .findDiagnosticIonsAndNeutralLosses(1,0.0) } } diff --git a/src/test/scala/fr/inrae/metabolomics/p2m2/config/ConfigReaderTest.scala b/src/test/scala/fr/inrae/metabolomics/p2m2/config/ConfigReaderTest.scala index 73412ce..2f310a0 100644 --- a/src/test/scala/fr/inrae/metabolomics/p2m2/config/ConfigReaderTest.scala +++ b/src/test/scala/fr/inrae/metabolomics/p2m2/config/ConfigReaderTest.scala @@ -10,6 +10,7 @@ object ConfigReaderTest extends TestSuite { | "Glucosinolate" : { | "deltaMp0Mp2" : 1.996, | "numberSulfurMin" : 2, + | "minMzCoreStructure" : 0, | "neutralLoss" : | { | "gluconolactone" : 178.0 From f0e55407f858c9ee2035996ad59209090ac7b460 Mon Sep 17 00:00:00 2001 From: Olivier Filangi Date: Thu, 6 Oct 2022 17:10:58 +0200 Subject: [PATCH 11/11] refactor traitment --- src/main/scala/fr/inrae/metabolomics/p2m2/Main.scala | 4 ++-- .../p2m2/builder/MetaboliteIdentification.scala | 10 +++++----- .../p2m2/export/CsvMetabolitesIdentificationFile.scala | 7 +++---- ...ification.scala => MetabolitesIdentification.scala} | 6 ++++-- ....scala => MetabolitesIdentificationFileTest$.scala} | 6 +++--- 5 files changed, 17 insertions(+), 16 deletions(-) rename src/main/scala/fr/inrae/metabolomics/p2m2/output/{CsvMetabolitesIdentification.scala => MetabolitesIdentification.scala} (70%) rename src/test/scala/fr/inrae/metabolomics/p2m2/export/{CsvMetabolitesIdentificationFileTest.scala => MetabolitesIdentificationFileTest$.scala} (85%) diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/Main.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/Main.scala index ca9c1fe..f742e82 100644 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/Main.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/Main.scala @@ -3,7 +3,7 @@ package fr.inrae.metabolomics.p2m2 import fr.inrae.metabolomics.p2m2.`export`.CsvMetabolitesIdentificationFile import fr.inrae.metabolomics.p2m2.builder.{MetaboliteIdentification, PeakIdentification, ScanLoader} import fr.inrae.metabolomics.p2m2.config.ConfigReader -import fr.inrae.metabolomics.p2m2.output.CsvMetabolitesIdentification +import fr.inrae.metabolomics.p2m2.output.MetabolitesIdentification import umich.ms.fileio.filetypes.mzxml.{MZXMLFile, MZXMLIndex} import java.io.File @@ -149,7 +149,7 @@ object Main extends App { mzCoreStructure : Double, neutralLoss: Map[String, Double], daughterIons: Map[String, Double] - ): Seq[CsvMetabolitesIdentification] = { + ): Seq[MetabolitesIdentification] = { val listSulfurMetabolites: Seq[PeakIdentification] = ScanLoader. diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/builder/MetaboliteIdentification.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/builder/MetaboliteIdentification.scala index b35ec63..2473708 100644 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/builder/MetaboliteIdentification.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/builder/MetaboliteIdentification.scala @@ -1,6 +1,6 @@ package fr.inrae.metabolomics.p2m2.builder -import fr.inrae.metabolomics.p2m2.output.CsvMetabolitesIdentification +import fr.inrae.metabolomics.p2m2.output.MetabolitesIdentification import umich.ms.fileio.filetypes.mzxml.{MZXMLFile, MZXMLIndex} case class MetaboliteIdentification( @@ -12,14 +12,14 @@ case class MetaboliteIdentification( nls : Seq[(String,Double)], dis : Seq[(String,Double)] ) { - def getInfo( p :PeakIdentification,precisionMzh : Int, mzCoreStructure : Double) : Option[CsvMetabolitesIdentification] = p.peaks.nonEmpty match { + def getInfo( p :PeakIdentification,precisionMzh : Int, mzCoreStructure : Double) : Option[MetabolitesIdentification] = p.peaks.nonEmpty match { case true => val mz = p.peaks.map(p2 => (p2.mz*precisionMzh ).round / precisionMzh.toDouble ) val intensities = p.peaks.map(_.intensity) val abundance = p.peaks.map(_.abundance) if ( p.peaks.head.mz >= mzCoreStructure ) - Some(CsvMetabolitesIdentification( + Some(MetabolitesIdentification( mz, intensities, abundance, @@ -28,7 +28,7 @@ case class MetaboliteIdentification( daughterIons = ScanLoader.detectDaughterIons(source,index,start,end,p,dis) )) else - Some(CsvMetabolitesIdentification( + Some(MetabolitesIdentification( mz, intensities, abundance, @@ -45,7 +45,7 @@ case class MetaboliteIdentification( * @param mzCoreStructure minimum size of a metabolite according param family * @return */ - def findDiagnosticIonsAndNeutralLosses(precisionMzh : Int, mzCoreStructure : Double): Seq[CsvMetabolitesIdentification] = { + def findDiagnosticIonsAndNeutralLosses(precisionMzh : Int, mzCoreStructure : Double): Seq[MetabolitesIdentification] = { println("\n== detectNeutralLoss/detectDaughterIons == ") peaks.zipWithIndex diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/export/CsvMetabolitesIdentificationFile.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/export/CsvMetabolitesIdentificationFile.scala index ce485bd..c212b62 100644 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/export/CsvMetabolitesIdentificationFile.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/export/CsvMetabolitesIdentificationFile.scala @@ -2,13 +2,13 @@ package fr.inrae.metabolomics.p2m2.`export` import fr.inrae.metabolomics.p2m2.config.ConfigReader import fr.inrae.metabolomics.p2m2.database.Chebi -import fr.inrae.metabolomics.p2m2.output.CsvMetabolitesIdentification +import fr.inrae.metabolomics.p2m2.output.MetabolitesIdentification import java.io.{BufferedWriter, File, FileWriter} case object CsvMetabolitesIdentificationFile { - def build(list : Seq[CsvMetabolitesIdentification], + def build(list : Seq[MetabolitesIdentification], familyMetabolite : String, configJson : ConfigReader, out: File) : Unit = { if ( list.nonEmpty ) { @@ -49,8 +49,7 @@ case object CsvMetabolitesIdentificationFile { } else bw.write(";") - val c : Int = (metabolitesIdentificationId.neutralLosses.values.flatten.size)+(metabolitesIdentificationId.daughterIons.values.flatten.size) - bw.write(s"$c;") + bw.write(s"${metabolitesIdentificationId.scoreIdentification};") neutralLosses .map( name => metabolitesIdentificationId.neutralLosses.getOrElse(name,None).getOrElse("")) diff --git a/src/main/scala/fr/inrae/metabolomics/p2m2/output/CsvMetabolitesIdentification.scala b/src/main/scala/fr/inrae/metabolomics/p2m2/output/MetabolitesIdentification.scala similarity index 70% rename from src/main/scala/fr/inrae/metabolomics/p2m2/output/CsvMetabolitesIdentification.scala rename to src/main/scala/fr/inrae/metabolomics/p2m2/output/MetabolitesIdentification.scala index 23740f5..0f0be18 100644 --- a/src/main/scala/fr/inrae/metabolomics/p2m2/output/CsvMetabolitesIdentification.scala +++ b/src/main/scala/fr/inrae/metabolomics/p2m2/output/MetabolitesIdentification.scala @@ -1,10 +1,12 @@ package fr.inrae.metabolomics.p2m2.output -case class CsvMetabolitesIdentification( +case class MetabolitesIdentification( mz : Seq[Double], intensity : Seq[Double], abundance : Seq[Double], rt :Double, neutralLosses : Map[String,Option[Double]], daughterIons : Map[String,Option[Double]] - ) + ) { + def scoreIdentification: Int = neutralLosses.values.flatten.size + daughterIons.values.flatten.size +} diff --git a/src/test/scala/fr/inrae/metabolomics/p2m2/export/CsvMetabolitesIdentificationFileTest.scala b/src/test/scala/fr/inrae/metabolomics/p2m2/export/MetabolitesIdentificationFileTest$.scala similarity index 85% rename from src/test/scala/fr/inrae/metabolomics/p2m2/export/CsvMetabolitesIdentificationFileTest.scala rename to src/test/scala/fr/inrae/metabolomics/p2m2/export/MetabolitesIdentificationFileTest$.scala index e58c4c4..891af6e 100644 --- a/src/test/scala/fr/inrae/metabolomics/p2m2/export/CsvMetabolitesIdentificationFileTest.scala +++ b/src/test/scala/fr/inrae/metabolomics/p2m2/export/MetabolitesIdentificationFileTest$.scala @@ -2,14 +2,14 @@ package fr.inrae.metabolomics.p2m2.`export` import fr.inrae.metabolomics.p2m2.builder.ScanLoader import fr.inrae.metabolomics.p2m2.config.ConfigReader -import fr.inrae.metabolomics.p2m2.output.CsvMetabolitesIdentification +import fr.inrae.metabolomics.p2m2.output.MetabolitesIdentification import utest.{TestSuite, Tests, test} import java.io.File import scala.io.Source -object CsvMetabolitesIdentificationFileTest extends TestSuite { +object MetabolitesIdentificationFileTest$ extends TestSuite { val v = ScanLoader.read(new File(getClass.getResource("/20181018-037.mzXML").getPath)) val tests: Tests = Tests { @@ -23,7 +23,7 @@ object CsvMetabolitesIdentificationFileTest extends TestSuite { CsvMetabolitesIdentificationFile.build( Seq( - CsvMetabolitesIdentification( + MetabolitesIdentification( mz = Seq(1.0, 1.0), intensity = Seq(1.0, 1.0), abundance = Seq(1.0, 1.0),