diff --git a/build.sbt b/build.sbt index 2ba5a9c..604b863 100644 --- a/build.sbt +++ b/build.sbt @@ -51,17 +51,17 @@ publishLocalConfiguration := publishLocalConfiguration.value.withOverwrite(true) pomIncludeRepository := { _ => false } publishMavenStyle := true -lazy val rdf4jVersion = "4.0.2" -lazy val slf4jVersion = "1.7.36" -lazy val uTestVersion = "0.7.11" -val sparkVersion = "3.1.3" +lazy val rdf4jVersion = "4.2.3" +lazy val slf4jVersion = "2.0.5" +lazy val uTestVersion = "0.8.1" +val sparkVersion = "3.3.2" crossScalaVersions := supportedScalaVersions libraryDependencies ++= Seq( "org.apache.spark" %% "spark-sql" % sparkVersion % "provided,test", - "com.github.scopt" %% "scopt" % "4.0.1", + "com.github.scopt" %% "scopt" % "4.1.0", "org.eclipse.rdf4j" % "rdf4j-storage" % rdf4jVersion, "com.github.jsonld-java" % "jsonld-java" % "0.13.4", "org.slf4j" % "slf4j-api" % slf4jVersion, diff --git a/src/main/scala/fr/inrae/metabolomics/services/ServiceRdfDatabaseDeployment.scala b/src/main/scala/fr/inrae/metabolomics/services/ServiceRdfDatabaseDeployment.scala index 083b9e3..faac955 100755 --- a/src/main/scala/fr/inrae/metabolomics/services/ServiceRdfDatabaseDeployment.scala +++ b/src/main/scala/fr/inrae/metabolomics/services/ServiceRdfDatabaseDeployment.scala @@ -137,7 +137,7 @@ case object ServiceRdfDatabaseDeployment extends App { } def buildScript( - files: Seq[String], + filesIn: Seq[String], output: File, category: String, databaseName: String, @@ -150,6 +150,7 @@ case object ServiceRdfDatabaseDeployment extends App { startDate : String ): Unit = { + val files = filesIn.map(_.trim()).filter(_.nonEmpty).filter(_!="\\") new File(output.getPath).delete() val bw = new BufferedWriter(new FileWriter(new File(output.getPath))) @@ -159,6 +160,7 @@ case object ServiceRdfDatabaseDeployment extends App { val dirProvData = s"${rootPathDatabasesHdfsCluster}/prov/" val wgetOpt = "--spider -nv -r -nd --no-parent -e robots=off" bw.write("#!/bin/bash\n") + bw.write("set -e\n") bw.write(s"$hdfs dfs -mkdir -p ${dirData}\n") bw.write(s"$hdfs dfs -mkdir -p ${dirAskOmicsAbstraction}\n") @@ -171,9 +173,16 @@ case object ServiceRdfDatabaseDeployment extends App { ).foreach( x => { /* get files names */ - bw.write("FILES=$(wget "+s"$wgetOpt -A "+ "\"$(basename "+x+")\"" + - " $(dirname "+x+")/ 2>&1 | egrep \"200[[:blank:]]+OK$\" | awk '{print $4}')\n") - + // only if joker "*." on the base name + if (x.split("/").lastOption.exists(_.contains("*"))) { + bw.write("## USING JOKER METHOD : " + x + "\n") + bw.write("FILES=$(wget " + s"$wgetOpt -A " + "\"$(basename " + x + ")\"" + + " $(dirname " + x + ")/ 2>&1 | egrep \"200[[:blank:]]+OK$\" | awk '{print $4}')\n") + bw.write("[ -z \"$FILES\" ] && echo \"Can not find FILES with the special character '*' " + + "and this url=" + x + "\" && exit 1\n") + } else { + bw.write("FILES=\""+x+"\"\n") + } bw.write("for file in $FILES\n") bw.write("do\n") bw.write("if [ \"${file: -3}\" == \".gz\" ]; then\n") @@ -188,27 +197,26 @@ case object ServiceRdfDatabaseDeployment extends App { ) files.filter( - x => ! x.matches("^(http|https|ftp)://.*$") - ).foreach( - x => x match { - case file if file.endsWith(".gz") => { - bw.write("file_expr=$(basename "+file+")\n") - bw.write("for file in $(ls $file_expr 2>/dev/null)\n") - bw.write("do\n") - bw.write("gunzip -c $file "+s"| $hdfs dfs -put - " + - s"${dirData}"+"/$(basename ${file%.gz})\n") - bw.write("done\n") - } - case _ => { - bw.write(s"$hdfs"+" dfs -put -f $(basename "+x+") "+s"${dirData}\n") - } + x => !x.matches("^(http|https|ftp)://.*$") + ).foreach { + case file if file.endsWith(".gz") => { + bw.write("file_expr=$(basename " + file + ")\n") + bw.write("for file in $(ls $file_expr 2>/dev/null)\n") + bw.write("do\n") + bw.write("gunzip -c $file " + s"| $hdfs dfs -put - " + + s"${dirData}" + "/$(basename ${file%.gz})\n") + bw.write("done\n") + } + case x => + bw.write(s"$hdfs" + " dfs -put -f $(basename " + x + ") " + s"${dirData}\n") - }) + } - abstraction_askomics match { + abstraction_askomics.map(_.trim()).filter(_.nonEmpty) match { case Some(file) if file.endsWith(".ttl") => if ( file.matches("^(http|https|ftp)://.*$")) bw.write(s"wget $file\n") + bw.write(s"$hdfs dfs -put -f "+"$("+s"basename $file) " + s"${dirAskOmicsAbstraction}/"+"$("+s"basename $file)\n") case Some( f ) => System.err.println(s"Can not manage this Askomics extension file ${f}")