Skip to content

Commit

Permalink
Merge branch 'EMC-19-gemet-themes' into 'develop'
Browse files Browse the repository at this point in the history
Add GEMET vocabulary

Closes EMC-19

See merge request eip/catalogue!541
  • Loading branch information
rodscott committed Oct 2, 2023
2 parents 921a8a5 + 6c9ba8b commit 836cf63
Show file tree
Hide file tree
Showing 6 changed files with 587 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.springframework.web.client.RestTemplate;
import uk.ac.ceh.gateway.catalogue.vocabularies.KeywordVocabulary;
import uk.ac.ceh.gateway.catalogue.vocabularies.SparqlKeywordVocabulary;
import uk.ac.ceh.gateway.catalogue.vocabularies.HttpKeywordVocabulary;

import java.util.List;

Expand Down Expand Up @@ -162,6 +163,37 @@ public KeywordVocabulary elterCLVocabulary(
);
}

@Profile("server:eidc")
@Bean
public KeywordVocabulary gemetVocabulary(
SolrClient solrClient,
@Value("${gemet.concepturl}") String gemetConceptUrl,
@Value("${gemet.themeurl}") String gemetThemeUrl
) {
/* GEMET is the GEneral Multilingual Environmental Thesaurus
*
* This vocabulary was implemented using the documentation located at
* https://www.eionet.europa.eu/gemet/en/webservices/
*
* Its purpose is to harvest the GEMET Themes and Concepts only,
* so NOT groups and supergroups.
*
* See EMC-6 in Jira for details.
*/
val catalogueIds = List.of("eidc");
val gemetUrls = List.of(gemetConceptUrl, gemetThemeUrl);
return new HttpKeywordVocabulary(
"gemet",
"GEMET",
gemetUrls,
"",
"/uri",
"/preferredLabel/string",
solrClient,
catalogueIds
);
}

@Profile("server:inms")
@Bean
public KeywordVocabulary inmsVocabulary(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
package uk.ac.ceh.gateway.catalogue.vocabularies;

import com.fasterxml.jackson.core.JsonPointer;
import com.fasterxml.jackson.databind.ObjectMapper;

import java.io.IOException;
import java.net.URL;
import java.util.List;
import java.util.Optional;
import java.util.stream.StreamSupport;

import lombok.SneakyThrows;
import lombok.ToString;
import lombok.extern.slf4j.Slf4j;
import lombok.val;

import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrServerException;

import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.web.client.HttpStatusCodeException;

import static java.lang.String.format;

import static uk.ac.ceh.gateway.catalogue.TimeConstants.ONE_MINUTE;
import static uk.ac.ceh.gateway.catalogue.TimeConstants.SEVEN_DAYS;

@Slf4j
@ToString(exclude = "solrClient")
public class HttpKeywordVocabulary implements KeywordVocabulary {
private final String vocabularyId;
private final String vocabularyName;
private final List<String> httpEndpoints;
private final List<String> catalogueIds;
private final SolrClient solrClient;
private final ObjectMapper objectMapper;
private final JsonPointer resultsArrayPointer;
private final JsonPointer uriPointer;
private final JsonPointer labelPointer;

private static final String COLLECTION = "keywords";

// single endpoint constructor
public HttpKeywordVocabulary(
String vocabularyId,
String vocabularyName,
String httpEndpoint,
String resultsPath,
String uriPath,
String labelPath,
SolrClient solrClient,
List<String> catalogueIds
) {
this.vocabularyId = vocabularyId;
this.vocabularyName = vocabularyName;
this.httpEndpoints = List.of(httpEndpoint);
this.catalogueIds = catalogueIds;
this.solrClient = solrClient;
this.objectMapper = new ObjectMapper();
resultsArrayPointer = JsonPointer.compile(resultsPath);
uriPointer = JsonPointer.compile(uriPath);
labelPointer = JsonPointer.compile(labelPath);
log.info("Creating {}", this);
}

// multiple endpoint constructor
public HttpKeywordVocabulary(
String vocabularyId,
String vocabularyName,
List<String> httpEndpoints,
String resultsPath,
String uriPath,
String labelPath,
SolrClient solrClient,
List<String> catalogueIds
) {
this.vocabularyId = vocabularyId;
this.vocabularyName = vocabularyName;
this.httpEndpoints = httpEndpoints;
this.catalogueIds = catalogueIds;
this.solrClient = solrClient;
this.objectMapper = new ObjectMapper();
resultsArrayPointer = JsonPointer.compile(resultsPath);
uriPointer = JsonPointer.compile(uriPath);
labelPointer = JsonPointer.compile(labelPath);
log.info("Creating {}", this);
}

@Override
@SneakyThrows
@Scheduled(initialDelay = ONE_MINUTE, fixedDelay = SEVEN_DAYS)
public void retrieve() {
log.info("Retrieving vocabulary ({}) {}", vocabularyId, vocabularyName);

// not ideal always deleting because of the possibilty of errors
// but can add logic around this later
solrClient.deleteByQuery(COLLECTION, "vocabId:" + vocabularyId);
for (String endpoint : httpEndpoints){
try {
val vocabularyNode = Optional.ofNullable(objectMapper.readTree(new URL(endpoint)))
.orElseThrow(() -> new KeywordVocabularyException("Cannot get response body"))
.at(resultsArrayPointer);
log.debug(vocabularyNode.toString());

if (vocabularyNode.isArray()) {
log.info("Retrieved {} terms", vocabularyNode.size());
StreamSupport.stream(vocabularyNode.spliterator(), false)
.map(node -> {
val url = node.at(uriPointer).asText();
val label = node.at(labelPointer).asText();
return new Keyword(label, vocabularyId, url);
})
.forEach(keyword -> {
try {
solrClient.addBean(COLLECTION, keyword);
} catch (IOException | SolrServerException ex) {
throw new KeywordVocabularyException("Failed to index " + keyword + "for " + vocabularyId, ex);
}
});
}
} catch (HttpStatusCodeException ex) {
log.error(format("Cannot retrieve %s from vocab server, error: %s %s", vocabularyId, ex.getRawStatusCode(), ex.getResponseBodyAsString()));
throw new KeywordVocabularyException(
format("Cannot retrieve %s from vocab server, error: %s %s", vocabularyId, ex.getRawStatusCode(), ex.getResponseBodyAsString()),
ex
);
} catch (IOException ex) {
throw new KeywordVocabularyException(
format("Failed to communicate with Solr for %s", vocabularyId),
ex
);
}
}
solrClient.commit(COLLECTION);
}

@Override
public String getName() {
return vocabularyName;
}

@Override
public String getId() {
return vocabularyId;
}

@Override
public String getGraph() {
return "N/A";
}

@Override
public boolean usedInCatalogue(String catalogueId) {
return catalogueIds.contains(catalogueId);
}
}
2 changes: 2 additions & 0 deletions java/src/main/resources/application.properties
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ doi.publisher=NERC EDS Environmental Information Data Centre
doi.legacyPublisher=NERC Environmental Information Data Centre
doi.templateLocation=/datacite/datacite.ftlx
doi.username=BL.NERC
gemet.themeurl=https://www.eionet.europa.eu/gemet/getTopmostConcepts?thesaurus_uri=http://www.eionet.europa.eu/gemet/theme/&language=en
gemet.concepturl=https://www.eionet.europa.eu/gemet/getConceptsMatchingRegexByThesaurus?regex=^&thesaurus_uri=http://www.eionet.europa.eu/gemet/concept/&language=en
hubbub.location=/var/ceh-catalogue/dropbox
hubbub.url=https://hubbub.ceh.ac.uk/v7
hubbub.username=eidc_hubbub
Expand Down
Loading

0 comments on commit 836cf63

Please sign in to comment.