Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

simplified curie logic #792

Draft
wants to merge 3 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 28 additions & 88 deletions dataload/linker/src/main/java/LinkerPass2.java
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,16 @@ private static void writeEntityArray(JsonReader jsonReader, JsonWriter jsonWrite
Set<String> stringsInEntity = new HashSet<String>();
String entityIri = null;

EntityDefinitionSet defOfThisEntity = pass1Result.iriToDefinitions.get(entityIri);

String curie = null;
if(defOfThisEntity.definingDefinitions.size() > 0) {
// always use the defining ontology's curie, as the defining
// ontology knows the base URI and we might not
//
curie = defOfThisEntity.definingDefinitions.iterator().next().curie.getAsString();
}

while(jsonReader.peek() != JsonToken.END_OBJECT) {

String name = jsonReader.nextName();
Expand All @@ -153,17 +163,28 @@ private static void writeEntityArray(JsonReader jsonReader, JsonWriter jsonWrite
if(name.equals("iri")) {
entityIri = jsonReader.nextString();
jsonWriter.value(entityIri);
} else if (name.equalsIgnoreCase("curie")) {
processCurieObject(jsonReader, jsonWriter, pass1Result, entityIri);
} else if (name.equalsIgnoreCase("shortForm")) {
processShortFormObject(jsonReader, jsonWriter, pass1Result, entityIri);
} else {
CopyJsonGatheringStrings.copyJsonGatheringStrings(jsonReader, jsonWriter, stringsInEntity);
continue;
}

if(name == "curie") {
if(curie != null) {
// use the defining ontology curie
jsonReader.skipValue();
jsonWriter.value(curie);
} else {
// fallthrough to using the curie from rdf2json
curie = jsonReader.nextString();
jsonWriter.value(curie);
}
continue;
}

CopyJsonGatheringStrings.copyJsonGatheringStrings(jsonReader, jsonWriter, stringsInEntity);
}

jsonWriter.name("shortForm");
jsonWriter.value(curie.replaceFirst(":", "_"));

EntityDefinitionSet defOfThisEntity = pass1Result.iriToDefinitions.get(entityIri);
if(defOfThisEntity != null) {

jsonWriter.name(IS_DEFINING_ONTOLOGY.getText());
Expand Down Expand Up @@ -441,85 +462,4 @@ private static class CurieMapResult {
public String url;
public String source;
}

private static void processShortFormObject(JsonReader jsonReader, JsonWriter jsonWriter, LinkerPass1.LinkerPass1Result pass1Result, String entityIri) throws IOException {
jsonReader.beginObject();
JsonObject shortFormObject = new JsonObject();

while (jsonReader.peek() != JsonToken.END_OBJECT) {
String shortFormFieldName = jsonReader.nextName();
if (shortFormFieldName.equals("type")) {
JsonArray typeArray = new JsonArray();
jsonReader.beginArray();
while (jsonReader.peek() != JsonToken.END_ARRAY) {
typeArray.add(jsonReader.nextString());
}
jsonReader.endArray();
shortFormObject.add("type", typeArray);
} else if (shortFormFieldName.equals("value")) {
String shortFormValue = jsonReader.nextString();
// Modify the value attribute
shortFormValue = getProcessedCurieValue(pass1Result, entityIri).replace(":", "_");
shortFormObject.addProperty("value", shortFormValue);
}
}
jsonReader.endObject();

// Write the modified short form object
jsonWriter.beginObject();
jsonWriter.name("type");
jsonWriter.beginArray();
for (JsonElement typeElement : shortFormObject.getAsJsonArray("type")) {
jsonWriter.value(typeElement.getAsString());
}
jsonWriter.endArray();
jsonWriter.name("value").value(shortFormObject.get("value").getAsString());
jsonWriter.endObject();
}

private static void processCurieObject(JsonReader jsonReader, JsonWriter jsonWriter, LinkerPass1.LinkerPass1Result pass1Result, String entityIri) throws IOException {
jsonReader.beginObject();
JsonObject curieObject = new JsonObject();

while (jsonReader.peek() != JsonToken.END_OBJECT) {
String curieFieldName = jsonReader.nextName();
if (curieFieldName.equals("type")) {
JsonArray typeArray = new JsonArray();
jsonReader.beginArray();
while (jsonReader.peek() != JsonToken.END_ARRAY) {
typeArray.add(jsonReader.nextString());
}
jsonReader.endArray();
curieObject.add("type", typeArray);
} else if (curieFieldName.equals("value")) {
String curieValue = jsonReader.nextString();
// Modify the value attribute
curieValue = getProcessedCurieValue(pass1Result, entityIri);
curieObject.addProperty("value", curieValue);
}
}
jsonReader.endObject();

// Write the modified curie object
jsonWriter.beginObject();
jsonWriter.name("type");
jsonWriter.beginArray();
for (JsonElement typeElement : curieObject.getAsJsonArray("type")) {
jsonWriter.value(typeElement.getAsString());
}
jsonWriter.endArray();
jsonWriter.name("value").value(curieObject.get("value").getAsString());
jsonWriter.endObject();
}

private static String getProcessedCurieValue(LinkerPass1.LinkerPass1Result pass1Result, String entityIri) {
var def = pass1Result.iriToDefinitions.get(entityIri);
if (def.definitions.iterator().hasNext()) {
JsonObject defCurieObject = def.definitions.iterator().next().curie.getAsJsonObject();
if (defCurieObject.has("value")) {
return defCurieObject.get("value").getAsString();
}
}
return "";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -262,14 +262,14 @@ private String urlToFilename(String url) {
HierarchicalParentsAnnotator.annotateHierarchicalParents(this); // must run after RelatedAnnotator
AncestorsAnnotator.annotateAncestors(this);
HierarchyMetricsAnnotator.annotateHierarchyMetrics(this); // must run after HierarchicalParentsAnnotator
ShortFormAnnotator.annotateShortForms(this);
CurieAnnotator.annotateCuries(this);
DefinitionAnnotator.annotateDefinitions(this);
SynonymAnnotator.annotateSynonyms(this);
ReifiedPropertyAnnotator.annotateReifiedProperties(this);
OntologyMetadataAnnotator.annotateOntologyMetadata(this);
HierarchyFlagsAnnotator.annotateHierarchyFlags(this); // must run after DirectParentsAnnotator and HierarchicalParentsAnnotator
IsObsoleteAnnotator.annotateIsObsolete(this);
LabelAnnotator.annotateLabels(this); // must run after ShortFormAnnotator
LabelAnnotator.annotateLabels(this); // must run after CurieAnnotator
ConfigurablePropertyAnnotator.annotateConfigurableProperties(this);
PreferredRootsAnnotator.annotatePreferredRoots(this);
DisjointWithAnnotator.annotateDisjointWith(this);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package uk.ac.ebi.rdf2json.annotators;

import java.util.Set;
import java.util.regex.Pattern;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import uk.ac.ebi.rdf2json.OntologyNode;
import uk.ac.ebi.rdf2json.OntologyGraph;
import uk.ac.ebi.rdf2json.annotators.helpers.OntologyBaseUris;
import uk.ac.ebi.rdf2json.properties.PropertyValueLiteral;

public class CurieAnnotator {
private static final Logger logger = LoggerFactory.getLogger(CurieAnnotator.class);

public static void annotateCuries(OntologyGraph graph) {

long startTime3 = System.nanoTime();

Set<String> ontologyBaseUris = OntologyBaseUris.getOntologyBaseUris(graph);
String preferredPrefix = (String)graph.config.get("preferredPrefix");

for(String id : graph.nodes.keySet()) {
OntologyNode c = graph.nodes.get(id);
if (c.types.contains(OntologyNode.NodeType.CLASS) ||
c.types.contains(OntologyNode.NodeType.PROPERTY) ||
c.types.contains(OntologyNode.NodeType.INDIVIDUAL) ||
c.types.contains(OntologyNode.NodeType.DATATYPE)
) {

// skip bnodes
if(c.uri == null)
continue;

if (preferredPrefix == null || preferredPrefix.isEmpty()) {
preferredPrefix = graph.config.get("id").toString().toUpperCase();
}

String curie = extractCurie(graph, ontologyBaseUris, preferredPrefix, c.uri);
c.properties.addProperty("curie", PropertyValueLiteral.fromString(curie));
}
}
long endTime3 = System.nanoTime();
logger.info("annotate curies: {}", ((endTime3 - startTime3) / 1000 / 1000 / 1000));


}

private static String extractCurie(OntologyGraph graph, Set<String> ontologyBaseUris, String preferredPrefix,
String uri) {

if (uri.startsWith("urn:")) {
return uri.substring(4);
}

for (String baseUri : ontologyBaseUris) {
if (uri.startsWith(baseUri) && preferredPrefix != null) {
return preferredPrefix + ":" + uri.substring(baseUri.length());
}
}

if (uri.contains("/") || uri.contains("#")) {

return uri.substring(
Math.max(
uri.lastIndexOf('/'),
uri.lastIndexOf('#')) + 1);

} else {

return uri;
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public static Set<String> getLabelProperties(OntologyGraph graph) {
}

public static void annotateLabels(OntologyGraph graph) {
collateProperties(graph, "label", getLabelProperties(graph), List.of("shortForm"));
collateProperties(graph, "label", getLabelProperties(graph), List.of("curie"));
}

private static void collateProperties(OntologyGraph graph, String destProp, Collection<String> sourceProps, Collection<String> fallbackProps) {
Expand Down

This file was deleted.

Loading