Skip to content

Commit

Permalink
Merge pull request #53 from p2m2/maven_dependency
Browse files Browse the repository at this point in the history
Maven dependency
  • Loading branch information
ofilangi authored Jun 23, 2022
2 parents f1d262f + 1efcbfb commit 9dcc6c2
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 25 deletions.
9 changes: 5 additions & 4 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
lazy val scala212 = "2.12.16"
lazy val scala213 = "2.13.8"
lazy val supportedScalaVersions = List(scala212, scala213)
lazy val supportedScalaVersions = List(scala212)

scalaVersion := scala213
scalaVersion := scala212
name := "service-rdf-database-deployment"
organization := "com.github.p2m2"
organizationName := "p2m2"
Expand All @@ -22,7 +21,7 @@ developers := List(
val static_version_build = "0.4.1"
val version_build = scala.util.Properties.envOrElse("VERSION", static_version_build)

version := "test"
version := version_build

credentials += {
val realm = scala.util.Properties.envOrElse("REALM_CREDENTIAL", "" )
Expand Down Expand Up @@ -54,11 +53,13 @@ publishMavenStyle := true
lazy val rdf4jVersion = "4.0.2"
lazy val slf4jVersion = "1.7.36"
lazy val uTestVersion = "0.7.11"
val sparkVersion = "3.1.3"

crossScalaVersions := supportedScalaVersions


libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-sql" % sparkVersion % "provided,test",
"com.github.scopt" %% "scopt" % "4.0.1",
"org.eclipse.rdf4j" % "rdf4j-storage" % rdf4jVersion,
"com.github.jsonld-java" % "jsonld-java" % "0.13.4",
Expand Down
98 changes: 77 additions & 21 deletions src/main/scala/fr/inrae/semantic_web/ProvenanceBuilder.scala
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
package fr.inrae.semantic_web

import org.apache.spark.sql.SparkSession
import org.eclipse.rdf4j.model.util.ModelBuilder
import org.eclipse.rdf4j.model.util.Values.literal
import org.eclipse.rdf4j.model.vocabulary.{DCAT, DCTERMS, PROV, RDF, XSD}
import org.eclipse.rdf4j.model.vocabulary._
import org.eclipse.rdf4j.rio.{RDFFormat, Rio, WriterConfig}

import java.io.StringWriter
import java.text.SimpleDateFormat
import java.util.Date

case object ProvenanceBuilder {
Expand All @@ -18,12 +20,84 @@ case object ProvenanceBuilder {
"" -> "http://www.metabohub.fr/msd#"
)

val formatterXsd = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss")

val builder : ModelBuilder = new ModelBuilder()

mapPrefix map { case (k,v) => {
builder.setNamespace(k,v)
}}

val baseUrlSparkHistory : String ="https://spark.metabolomics-datalake.ara.inrae.fr/history"


def getStringFromModelBuilder(builder : ModelBuilder,extension:String="ttl") : String = {
val config : WriterConfig = new WriterConfig()
// config.set(BasicWriterSettings.PRETTY_PRINT, true)

val stringWriter = new StringWriter()

val format : RDFFormat = extension match {
case "jsonld" => RDFFormat.JSONLD
case "ttl" => RDFFormat.TURTLE
case "trig" => RDFFormat.TRIG
case "nt" => RDFFormat.NTRIPLES
case "n3" => RDFFormat.N3
case "rdf" => RDFFormat.RDFXML
case _ => throw new IllegalArgumentException(s"Unknown extension : $extension ")
}

Rio.write(builder.build(), stringWriter, format, config)
stringWriter.toString()
}

def provSparkSubmit(
projectUrl : String = "https://services.pfem.clermont.inrae.fr/gitlab/metabosemdatalake",
category : String,
database : String,
release : String,
startDate:Date,
spark : SparkSession) : String = {

val ciProjectUrl = projectUrl+"/"+category+"/"+database
val ciPipelineUrl = baseUrlSparkHistory+"/"+spark.sparkContext.applicationId
val urlRelease = projectUrl+"/"+category+"/"+database + "/" + release

builder
.subject(s"$ciProjectUrl")
.add(RDF.TYPE, PROV.ENTITY)
.add(RDF.TYPE, DCAT.DATASET)
.add(DCTERMS.TITLE,database)
.add(DCTERMS.DESCRIPTION,s"Category $category / Database $database" )
.add(DCTERMS.MODIFIED,literal(formatterXsd.format(startDate),XSD.DATETIME) )
.add(PROV.WAS_GENERATED_BY, spark.sparkContext.sparkUser)
.add(DCAT.DISTRIBUTION,s"$urlRelease")
/*
Release is a Prov:Entity
*/

builder
.subject(s"$urlRelease")
.add(RDF.TYPE, PROV.ENTITY)
.add(RDF.TYPE, DCAT.DISTRIBUTION)
.add(DCTERMS.TITLE,release)
.add(DCTERMS.MODIFIED,literal(formatterXsd.format(startDate),XSD.DATETIME) )
.add(PROV.WAS_GENERATED_BY, s"$ciPipelineUrl")
.add(DCAT.ACCESS_URL,s"hdfs://rdf/${category}/${database}/${release}")

val endString : String = formatterXsd.format(new Date())

builder
.subject(s"$ciPipelineUrl")
.add(RDF.TYPE, PROV.ACTIVITY)
.add(PROV.USED,s"$ciProjectUrl")
.add(PROV.STARTED_AT_TIME,literal(formatterXsd.format(startDate),XSD.DATETIME))
.add(PROV.ENDED_AT_TIME,literal(endString,XSD.DATETIME))

getStringFromModelBuilder(builder)

}

def build(
ciProjectUrl:String,
ciPipelineUrl:String,
Expand Down Expand Up @@ -58,7 +132,7 @@ case object ProvenanceBuilder {
.add(PROV.WAS_GENERATED_BY, s"$ciPipelineUrl")
.add(DCAT.ACCESS_URL,s"hdfs://rdf/${category}/${database}/${release}")

val endString : String = new java.text.SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(new Date())
val endString : String = formatterXsd.format(new Date())

builder
.subject(s"$ciPipelineUrl")
Expand All @@ -67,24 +141,6 @@ case object ProvenanceBuilder {
.add(PROV.STARTED_AT_TIME,literal(startDate,XSD.DATETIME))
.add(PROV.ENDED_AT_TIME,literal(endString,XSD.DATETIME))


val config : WriterConfig = new WriterConfig()
// config.set(BasicWriterSettings.PRETTY_PRINT, true)

val stringWriter = new StringWriter()

val format : RDFFormat = extension match {
case "jsonld" => RDFFormat.JSONLD
case "ttl" => RDFFormat.TURTLE
case "trig" => RDFFormat.TRIG
case "nt" => RDFFormat.NTRIPLES
case "n3" => RDFFormat.N3
case "rdf" => RDFFormat.RDFXML
case _ => throw new IllegalArgumentException(s"Unknown extension : $extension ")
}

Rio.write(builder.build(), stringWriter, format, config)

stringWriter.toString()
getStringFromModelBuilder(builder,extension)
}
}

0 comments on commit 9dcc6c2

Please sign in to comment.