Skip to content

Commit

Permalink
Merge pull request #61 from p2m2/fixGithubDownload_60
Browse files Browse the repository at this point in the history
fix simple url to download
  • Loading branch information
ofilangi authored Jun 19, 2023
2 parents 8d6d2fb + 98ef609 commit a788f47
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 25 deletions.
10 changes: 5 additions & 5 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,17 @@ publishLocalConfiguration := publishLocalConfiguration.value.withOverwrite(true)
pomIncludeRepository := { _ => false }
publishMavenStyle := true

lazy val rdf4jVersion = "4.0.2"
lazy val slf4jVersion = "1.7.36"
lazy val uTestVersion = "0.7.11"
val sparkVersion = "3.1.3"
lazy val rdf4jVersion = "4.2.3"
lazy val slf4jVersion = "2.0.5"
lazy val uTestVersion = "0.8.1"
val sparkVersion = "3.3.2"

crossScalaVersions := supportedScalaVersions


libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-sql" % sparkVersion % "provided,test",
"com.github.scopt" %% "scopt" % "4.0.1",
"com.github.scopt" %% "scopt" % "4.1.0",
"org.eclipse.rdf4j" % "rdf4j-storage" % rdf4jVersion,
"com.github.jsonld-java" % "jsonld-java" % "0.13.4",
"org.slf4j" % "slf4j-api" % slf4jVersion,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ case object ServiceRdfDatabaseDeployment extends App {
}

def buildScript(
files: Seq[String],
filesIn: Seq[String],
output: File,
category: String,
databaseName: String,
Expand All @@ -150,6 +150,7 @@ case object ServiceRdfDatabaseDeployment extends App {
startDate : String
): Unit = {

val files = filesIn.map(_.trim()).filter(_.nonEmpty).filter(_!="\\")
new File(output.getPath).delete()

val bw = new BufferedWriter(new FileWriter(new File(output.getPath)))
Expand All @@ -159,6 +160,7 @@ case object ServiceRdfDatabaseDeployment extends App {
val dirProvData = s"${rootPathDatabasesHdfsCluster}/prov/"
val wgetOpt = "--spider -nv -r -nd --no-parent -e robots=off"
bw.write("#!/bin/bash\n")
bw.write("set -e\n")

bw.write(s"$hdfs dfs -mkdir -p ${dirData}\n")
bw.write(s"$hdfs dfs -mkdir -p ${dirAskOmicsAbstraction}\n")
Expand All @@ -171,9 +173,16 @@ case object ServiceRdfDatabaseDeployment extends App {
).foreach(
x => {
/* get files names */
bw.write("FILES=$(wget "+s"$wgetOpt -A "+ "\"$(basename "+x+")\"" +
" $(dirname "+x+")/ 2>&1 | egrep \"200[[:blank:]]+OK$\" | awk '{print $4}')\n")

// only if joker "*." on the base name
if (x.split("/").lastOption.exists(_.contains("*"))) {
bw.write("## USING JOKER METHOD : " + x + "\n")
bw.write("FILES=$(wget " + s"$wgetOpt -A " + "\"$(basename " + x + ")\"" +
" $(dirname " + x + ")/ 2>&1 | egrep \"200[[:blank:]]+OK$\" | awk '{print $4}')\n")
bw.write("[ -z \"$FILES\" ] && echo \"Can not find FILES with the special character '*' " +
"and this url=" + x + "\" && exit 1\n")
} else {
bw.write("FILES=\""+x+"\"\n")
}
bw.write("for file in $FILES\n")
bw.write("do\n")
bw.write("if [ \"${file: -3}\" == \".gz\" ]; then\n")
Expand All @@ -188,27 +197,26 @@ case object ServiceRdfDatabaseDeployment extends App {
)

files.filter(
x => ! x.matches("^(http|https|ftp)://.*$")
).foreach(
x => x match {
case file if file.endsWith(".gz") => {
bw.write("file_expr=$(basename "+file+")\n")
bw.write("for file in $(ls $file_expr 2>/dev/null)\n")
bw.write("do\n")
bw.write("gunzip -c $file "+s"| $hdfs dfs -put - " +
s"${dirData}"+"/$(basename ${file%.gz})\n")
bw.write("done\n")
}
case _ => {
bw.write(s"$hdfs"+" dfs -put -f $(basename "+x+") "+s"${dirData}\n")
}
x => !x.matches("^(http|https|ftp)://.*$")
).foreach {
case file if file.endsWith(".gz") => {
bw.write("file_expr=$(basename " + file + ")\n")
bw.write("for file in $(ls $file_expr 2>/dev/null)\n")
bw.write("do\n")
bw.write("gunzip -c $file " + s"| $hdfs dfs -put - " +
s"${dirData}" + "/$(basename ${file%.gz})\n")
bw.write("done\n")
}
case x =>
bw.write(s"$hdfs" + " dfs -put -f $(basename " + x + ") " + s"${dirData}\n")

})
}

abstraction_askomics match {
abstraction_askomics.map(_.trim()).filter(_.nonEmpty) match {
case Some(file) if file.endsWith(".ttl") =>
if ( file.matches("^(http|https|ftp)://.*$"))
bw.write(s"wget $file\n")

bw.write(s"$hdfs dfs -put -f "+"$("+s"basename $file) " +
s"${dirAskOmicsAbstraction}/"+"$("+s"basename $file)\n")
case Some( f ) => System.err.println(s"Can not manage this Askomics extension file ${f}")
Expand Down

0 comments on commit a788f47

Please sign in to comment.