Skip to content

Commit

Permalink
save dev. implement interface with bytearray to develop API stream ma… (
Browse files Browse the repository at this point in the history
#72)

* save dev. implement interface with bytearray to develop API stream management

* add test byte array management

* 0.2.2

* remove debug print

* update dependancies

* add test. fix coverage

---------

Co-authored-by: Olivier Filangi <[email protected]>
  • Loading branch information
ofilangi and Olivier Filangi authored Sep 1, 2023
1 parent 8d5ea9b commit bf41e65
Show file tree
Hide file tree
Showing 21 changed files with 266 additions and 133 deletions.
10 changes: 5 additions & 5 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,19 @@ scmInfo := Some(
)

versionScheme := Some("early-semver")
val static_version = "0.2.1"
val static_version = "0.2.2"
val version_build = scala.util.Properties.envOrElse("PROG_VERSION", static_version )

version := version_build

libraryDependencies ++= Seq(
"com.lihaoyi" %% "utest" % "0.8.1" % Test,
"org.apache.logging.log4j" % "log4j-to-slf4j" % "2.20.0" % Test,
"org.slf4j" % "slf4j-simple" % "2.0.5" % Test,
"org.apache.poi" % "poi-ooxml" % "5.2.2",
"org.slf4j" % "slf4j-simple" % "2.0.7" % Test,
"org.apache.poi" % "poi-ooxml" % "5.2.3",
"com.github.scopt" %% "scopt" % "4.1.0",
"com.lihaoyi" %% "upickle" % "3.0.0",
"org.scala-lang.modules" %% "scala-xml" % "2.1.0" % Provided,
"com.lihaoyi" %% "upickle" % "3.1.2",
"org.scala-lang.modules" %% "scala-xml" % "2.2.0" % Provided,
)

// Coverage
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ case class GCMSOutputFiles2IsocorInput(resolution : Int = 2000, separator_name :
println(gcmsInputFiles.mkString("\n"))

gcmsInputFiles.map(
fileName => GCMSParser.parse(fileName)
fileName => GCMSParser.parseFile(fileName)
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ case class MassLynxOutput2IsocorInput(
println(inputFiles.mkString("\n"))

inputFiles.map(
fileName => QuantifySummaryReportMassLynxParser.parse(fileName)
fileName => QuantifySummaryReportMassLynxParser.parseFile(fileName)
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ case class OpenLabCDS2CompilCsv(target_head : String ) {
println(inputFiles.mkString("\n"))

inputFiles.map(
fileName => OpenLabCDSParser.parse(fileName)
fileName => OpenLabCDSParser.parseFile(fileName)
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,6 @@ object FormatConversions {
}

implicit def XcaliburToGenericP2M2(x: Xcalibur) : GenericP2M2 = {
println("***********************XcaliburToGenericP2M2***************************")
GenericP2M2(
samples = x.results
.flatMap(
Expand All @@ -258,9 +257,7 @@ object FormatConversions {
//Tue Jun 20 14:53:08 CEST 2017
ZonedDateTime.parse(d, dtf) }) match {
case Success(v) => Some(v.format(DateTimeFormatter.ofPattern(formatGenericP2M2)))
case Failure(_) =>
System.err.println(s"Can't not apply conversion with FormatConversions.formatDate1 [$d]")
formatDateWithLocalDateTime(Some(d),formatDateXcalibur2)
case Failure(_) => formatDateWithLocalDateTime(Some(d),formatDateXcalibur2)
}
case _ => None
}) ,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package fr.inrae.metabolomics.p2m2.parser

import scala.io.Codec

trait FormatSniffer {
def extensionIsCompatible(filename : String) : Boolean
def sniffFile(filename : String) : Boolean
def sniffByteArray(content: Array[Byte], encode : Codec = Codec("ISO-8859-1") ): Boolean
def sniffFile(filename : String,encode : Codec = Codec("ISO-8859-1") ) : Boolean
}
46 changes: 30 additions & 16 deletions src/main/scala/fr/inrae/metabolomics/p2m2/parser/GCMSParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -104,37 +104,51 @@ object GCMSParser extends Parser[GCMS] with FormatSniffer {
)
}

def parse(filename : String) : GCMS = {
val source = Source.fromFile(filename)(Codec("ISO-8859-1"))
def parse(source : Source): GCMS = {
val lines = source.getLines()
val ret = get(
filename,
source.descr,
lines.toList
.map( _.trim )
.filter( _.nonEmpty)
.filter( ! _.startsWith("#") )
.map(_.trim)
.filter(_.nonEmpty)
.filter(!_.startsWith("#"))
)
source.close()
ret
}

def parseByteArray(content: Array[Byte], encode : Codec = Codec("ISO-8859-1")) : GCMS =
parse(Source.fromBytes(content)(encode))

def parseFile(filename: String, encode : Codec = Codec("ISO-8859-1")) : GCMS =
parse(Source.fromFile(filename)(encode))


override def extensionIsCompatible(filename: String): Boolean = {
filename.split("\\.").lastOption match {
case Some(a) if a.trim!="" => true
case _ => false
}
}

override def sniffFile(filename: String): Boolean = {
Try({
val source = Source.fromFile(filename)(Codec("ISO-8859-1"))
val lines = source.getLines().slice(0,20).toList
source.close()
Try(parseHeader(lines)) match {
case Success(m) if m.nonEmpty => true
case _ => false
}
}) match {
private def testHeader(source : Source) : Boolean = {
val trunkLines = source.getLines().slice(0, 20).toList
source.close()
Try(parseHeader(trunkLines)) match {
case Success(m) if m.nonEmpty => true
case _ => false
}
}

override def sniffByteArray(content: Array[Byte], encode : Codec): Boolean = {
Try(testHeader(Source.fromBytes(content)(encode))) match {
case Success(v) => v
case Failure(_) => false
}
}

override def sniffFile(filename: String, encode : Codec ): Boolean = {
Try(testHeader(Source.fromFile(filename)(encode))) match {
case Success(v) => v
case Failure(_) => false
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,19 +115,22 @@ object OpenLabCDSParser extends Parser[OpenLabCDS] with FormatSniffer {
results = parseResults(toParse)
)
}

def parse(filename : String) : OpenLabCDS = {
val source = Source.fromFile(filename)(Codec("ISO-8859-1"))
def parse(source : Source): OpenLabCDS = {
val lines = source.getLines()
val ret = get(
filename,
source.descr,
lines.toList
.map( _.trim )
.filter( _.nonEmpty)
.map(_.trim)
.filter(_.nonEmpty)
)
source.close()
ret
}
def parseByteArray(content: Array[Byte], encode : Codec = Codec("ISO-8859-1")) : OpenLabCDS =
parse(Source.fromBytes(content)(encode))

def parseFile(filename : String, encode : Codec = Codec("ISO-8859-1")) : OpenLabCDS =
parse(Source.fromFile(filename)(encode))

override def extensionIsCompatible(filename: String): Boolean = {
filename.split("\\.").lastOption match {
Expand All @@ -136,16 +139,24 @@ object OpenLabCDSParser extends Parser[OpenLabCDS] with FormatSniffer {
}
}

override def sniffFile(filename: String): Boolean = {
Try({
val source = Source.fromFile(filename)(Codec("ISO-8859-1"))
val lines = source.getLines().slice(0,20).toList
source.close()
Try(parseHeader(lines)) match {
case Success(m) if m.nonEmpty => true
case _ => false
}
}) match {
private def testHeader(source: Source): Boolean = {
val trunkLines = source.getLines().slice(0, 20).toList
source.close()
Try(parseHeader(trunkLines)) match {
case Success(m) if m.nonEmpty => true
case _ => false
}
}

override def sniffByteArray(content: Array[Byte], encode: Codec): Boolean = {
Try(testHeader(Source.fromBytes(content)(encode))) match {
case Success(v) => v
case Failure(_) => false
}
}

override def sniffFile(filename: String, encode: Codec): Boolean = {
Try(testHeader(Source.fromFile(filename)(encode))) match {
case Success(v) => v
case Failure(_) => false
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
package fr.inrae.metabolomics.p2m2.parser

import scala.io.Codec

trait Parser[OutputFormat] {
def parse(filename : String) : OutputFormat
def parseByteArray(content: Array[Byte], encode : Codec = Codec("ISO-8859-1")) : OutputFormat
def parseFile(filename : String, encode : Codec = Codec("ISO-8859-1")) : OutputFormat
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,27 @@ package fr.inrae.metabolomics.p2m2.parser
import fr.inrae.metabolomics.p2m2.format.ms.MassSpectrometryResultSet

case object ParserManager {
def buildMassSpectrometryObject(path : String) : Option[MassSpectrometryResultSet] = path match {
case _ if GCMSParser.sniffFile(path) =>
Some(GCMSParser.parse(path))
case _ if OpenLabCDSParser.sniffFile(path) =>
Some(OpenLabCDSParser.parse(path))
case _ if QuantifySummaryReportMassLynxParser.sniffFile(path) =>
Some(QuantifySummaryReportMassLynxParser.parse(path))
case _ if XcaliburXlsParser.sniffFile(path) =>
Some(XcaliburXlsParser.parse(path))
def buildMassSpectrometryObject(filepath : String) : Option[MassSpectrometryResultSet] = filepath match {
case _ if GCMSParser.sniffFile(filepath) =>
Some(GCMSParser.parseFile(filepath))
case _ if OpenLabCDSParser.sniffFile(filepath) =>
Some(OpenLabCDSParser.parseFile(filepath))
case _ if QuantifySummaryReportMassLynxParser.sniffFile(filepath) =>
Some(QuantifySummaryReportMassLynxParser.parseFile(filepath))
case _ if XcaliburXlsParser.sniffFile(filepath) =>
Some(XcaliburXlsParser.parseFile(filepath))
case _ => None
}

def buildMassSpectrometryObject(contentFile: Array[Byte]): Option[MassSpectrometryResultSet] = contentFile match {
case _ if GCMSParser.sniffByteArray(contentFile) =>
Some(GCMSParser.parseByteArray(contentFile))
case _ if OpenLabCDSParser.sniffByteArray(contentFile) =>
Some(OpenLabCDSParser.parseByteArray(contentFile))
case _ if QuantifySummaryReportMassLynxParser.sniffByteArray(contentFile) =>
Some(QuantifySummaryReportMassLynxParser.parseByteArray(contentFile))
case _ if XcaliburXlsParser.sniffByteArray(contentFile) =>
Some(XcaliburXlsParser.parseByteArray(contentFile))
case _ => None
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import fr.inrae.metabolomics.p2m2
import fr.inrae.metabolomics.p2m2.format.ms.{QuantifyCompoundSummaryReportMassLynx, QuantifySampleSummaryReportMassLynx, QuantifySummaryReportMassLynx}

import scala.io.{Codec, Source}
import scala.util.{Failure, Success, Try}
object QuantifySummaryReportMassLynxParser
extends Parser[QuantifySummaryReportMassLynx]
with FormatSniffer {
Expand Down Expand Up @@ -66,7 +67,7 @@ object QuantifySummaryReportMassLynxParser
})
}

def parseArrayCompound[HeaderField <: Enumeration](t:HeaderField, toParse : Seq[String] ) :Seq[Map[HeaderField#Value,String]] = {
private def parseArrayCompound[HeaderField <: Enumeration](t:HeaderField, toParse : Seq[String] ) :Seq[Map[HeaderField#Value,String]] = {
toParse.filter(_.trim.nonEmpty).find( x => x.trim.startsWith("Name") || x.trim.startsWith("#")) match {
case Some(headerString) =>
/* first value of array in the number corresponding to the injection*/
Expand Down Expand Up @@ -98,51 +99,61 @@ object QuantifySummaryReportMassLynxParser
)
}

def getSampleSummaryReport(filename: String, toParse: Seq[String]): QuantifySampleSummaryReportMassLynx = {
private def getSampleSummaryReport(filename: String, toParse: Seq[String]): QuantifySampleSummaryReportMassLynx = {
QuantifySampleSummaryReportMassLynx(
origin = filename,
header = parseHeader(toParse),
resultsBySample = parseResultsByElement(QuantifySampleSummaryReportMassLynx.HeaderField, toParse)
)
}

def parse(filename : String) : QuantifySummaryReportMassLynx = {
val s = Source.fromFile(filename)(Codec("ISO-8859-1"))
val lines = s.getLines().toList
s.close()
def parse(source : Source): QuantifySummaryReportMassLynx = {
val lines = source.getLines().toList
source.close()

"""Sample\s+Name:""".r.findFirstMatchIn(lines.slice(0,10).mkString("\n")) match {
"""Sample\s+Name:""".r.findFirstMatchIn(lines.slice(0, 10).mkString("\n")) match {
case Some(_) => getSampleSummaryReport(
filename,
source.descr,
lines
.map(_.trim)
.filter(_.nonEmpty)
)
case None => getCompoundSummaryReport(
filename,
source.descr,
lines
.map(_.trim)
.filter(_.nonEmpty)
)
}
}

def parseByteArray(content: Array[Byte], encode : Codec = Codec("ISO-8859-1")) : QuantifySummaryReportMassLynx =
parse(Source.fromBytes(content)(encode))
def parseFile(filename : String, encode : Codec = Codec("ISO-8859-1")) : QuantifySummaryReportMassLynx =
parse(Source.fromFile(filename)(encode))

override def extensionIsCompatible(filename: String): Boolean = {
filename.split("\\.").lastOption match {
case Some(a) if a.trim!="" => true
case _ => false
}
}

override def sniffFile(filename: String): Boolean = {
try {

val source = Source.fromFile(filename)(Codec("ISO-8859-1"))
val lines = source.getLines().slice(0,20).toList
source.close()
parseHeader(lines).dateStr.isDefined
} catch {
case e : Throwable => System.err.println(e.toString) ; false
private def testHeader(source: Source): Boolean = {
val trunkLines = source.getLines().slice(0, 20).toList
source.close()
parseHeader(trunkLines).dateStr.isDefined
}
override def sniffByteArray(content: Array[Byte], encode: Codec): Boolean = {
Try(testHeader(Source.fromBytes(content)(encode))) match {
case Success(v) => v
case Failure(_) => false
}
}
override def sniffFile(filename: String, encode: Codec): Boolean = {
Try(testHeader(Source.fromFile(filename)(encode))) match {
case Success(v) => v
case Failure(_) => false
}
}
}
Loading

0 comments on commit bf41e65

Please sign in to comment.