-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
EVA3454 - Retrieve and Update Md5checksum for assemblies (#121)
* created an end-point as well as a scheduler for retrieving and saving md5 checksum
- Loading branch information
Showing
16 changed files
with
356 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
14 changes: 14 additions & 0 deletions
14
src/main/java/uk/ac/ebi/eva/contigalias/conf/ContigAliasConfiguration.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
package uk.ac.ebi.eva.contigalias.conf; | ||
|
||
import org.springframework.context.annotation.Bean; | ||
import org.springframework.context.annotation.Configuration; | ||
import org.springframework.web.client.RestTemplate; | ||
|
||
@Configuration | ||
public class ContigAliasConfiguration { | ||
|
||
@Bean | ||
public RestTemplate getRestTemplate() { | ||
return new RestTemplate(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
123 changes: 123 additions & 0 deletions
123
src/main/java/uk/ac/ebi/eva/contigalias/scheduler/ChecksumSetter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
package uk.ac.ebi.eva.contigalias.scheduler; | ||
|
||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
import org.springframework.beans.factory.annotation.Autowired; | ||
import org.springframework.data.domain.PageRequest; | ||
import org.springframework.data.domain.Pageable; | ||
import org.springframework.data.domain.Slice; | ||
import org.springframework.scheduling.annotation.Scheduled; | ||
import org.springframework.stereotype.Component; | ||
import uk.ac.ebi.eva.contigalias.entities.ChromosomeEntity; | ||
import uk.ac.ebi.eva.contigalias.service.ChromosomeService; | ||
|
||
import java.util.HashMap; | ||
import java.util.HashSet; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Set; | ||
import java.util.concurrent.CompletableFuture; | ||
import java.util.concurrent.ConcurrentHashMap; | ||
import java.util.concurrent.ExecutionException; | ||
import java.util.stream.Collectors; | ||
|
||
@Component | ||
public class ChecksumSetter { | ||
private final Logger logger = LoggerFactory.getLogger(ChecksumSetter.class); | ||
private final Map<String, CompletableFuture<Void>> runningMD5ChecksumUpdateTasks = new ConcurrentHashMap<>(); | ||
private Set<String> scheduledToRunMD5ChecksumUpdateTasks = new HashSet<>(); | ||
private int DEFAULT_PAGE_SIZE = 10000; | ||
private ChromosomeService chromosomeService; | ||
private Md5ChecksumRetriever md5ChecksumRetriever; | ||
|
||
@Autowired | ||
public ChecksumSetter(ChromosomeService chromosomeService, Md5ChecksumRetriever md5ChecksumRetriever) { | ||
this.chromosomeService = chromosomeService; | ||
this.md5ChecksumRetriever = md5ChecksumRetriever; | ||
} | ||
|
||
@Scheduled(cron = "0 0 0 ? * TUE") | ||
public void updateMd5CheckSumForAllAssemblies() { | ||
scheduledToRunMD5ChecksumUpdateTasks = new HashSet<>(); | ||
List<String> assemblyList = chromosomeService.getAssembliesWhereChromosomeMd5ChecksumIsNull(); | ||
logger.info("List of assemblies to be updated for MD5 Checksum: " + assemblyList); | ||
scheduledToRunMD5ChecksumUpdateTasks.addAll(assemblyList.stream().collect(Collectors.toSet())); | ||
|
||
for (String assembly : assemblyList) { | ||
CompletableFuture<Void> future = updateMd5CheckSumForAssemblyAsync(assembly); | ||
try { | ||
future.get(); | ||
} catch (InterruptedException | ExecutionException e) { | ||
logger.error("Encountered an error when running MD5Checksum update for assembly: " + assembly); | ||
} finally { | ||
scheduledToRunMD5ChecksumUpdateTasks.remove(assembly); | ||
} | ||
} | ||
} | ||
|
||
public CompletableFuture<Void> updateMd5CheckSumForAssemblyAsync(String assembly) { | ||
logger.info("Submitted job for updating MD5 Checksum for assembly (asynchronously)"); | ||
// Check if the async task for this assembly is already running | ||
CompletableFuture<Void> existingTask = runningMD5ChecksumUpdateTasks.get(assembly); | ||
if (existingTask != null && !existingTask.isDone()) { | ||
logger.info("Async task is still running for assembly: " + assembly); | ||
return existingTask; | ||
} | ||
// Start the async task (removing existing run if present) | ||
runningMD5ChecksumUpdateTasks.remove(assembly); | ||
CompletableFuture<Void> future = CompletableFuture.runAsync(() -> { | ||
updateMD5ChecksumForAllChromosomesInAssembly(assembly); | ||
}); | ||
// Store the future in the map for the given assembly | ||
runningMD5ChecksumUpdateTasks.put(assembly, future); | ||
|
||
// check the status of task upon completion and remove from running tasks | ||
future.whenComplete((result, exception) -> { | ||
if (exception != null) { | ||
logger.error("Async task (MD5Checksum setter) failed for assembly: " + assembly, exception); | ||
} else { | ||
logger.info("Async task (MD5Checksum setter) completed successfully for assembly: " + assembly); | ||
} | ||
runningMD5ChecksumUpdateTasks.remove(assembly); | ||
}); | ||
|
||
return future; | ||
} | ||
|
||
public void updateMD5ChecksumForAllChromosomesInAssembly(String assembly) { | ||
logger.info("Trying to update md5checksum for assembly: " + assembly); | ||
Slice<ChromosomeEntity> chrSlice; | ||
Pageable pageable = PageRequest.of(0, DEFAULT_PAGE_SIZE); | ||
long chromosomeUpdated = 0; | ||
do { | ||
chrSlice = chromosomeService.getChromosomesByAssemblyInsdcAccessionWhereMd5ChecksumIsNull(assembly, pageable); | ||
List<ChromosomeEntity> chromosomeEntityList = chrSlice.getContent(); | ||
updateMd5ChecksumForChromosome(chromosomeEntityList); | ||
|
||
chromosomeUpdated += chromosomeEntityList.size(); | ||
logger.info("Chromosomes Updated till now: " + chromosomeUpdated); | ||
} while (chrSlice.hasNext()); | ||
|
||
logger.info("Updating md5checksum for assembly " + assembly + " completed"); | ||
} | ||
|
||
public void updateMd5ChecksumForChromosome(List<ChromosomeEntity> chromosomesList) { | ||
chromosomesList.parallelStream().forEach(chromosome -> { | ||
try { | ||
String md5Checksum = md5ChecksumRetriever.retrieveMd5Checksum(chromosome.getInsdcAccession()); | ||
chromosome.setMd5checksum(md5Checksum); | ||
} catch (Exception e) { | ||
logger.info("Could not retrieve md5Checksum for insdc accession: " + chromosome.getInsdcAccession()); | ||
} | ||
}); | ||
|
||
chromosomeService.updateMd5ChecksumForAll(chromosomesList); | ||
} | ||
|
||
public Map<String, Set<String>> getMD5ChecksumUpdateTaskStatus() { | ||
Map<String, Set<String>> taskStatus = new HashMap<>(); | ||
taskStatus.put("running", runningMD5ChecksumUpdateTasks.keySet()); | ||
taskStatus.put("scheduled", scheduledToRunMD5ChecksumUpdateTasks); | ||
return taskStatus; | ||
} | ||
} |
32 changes: 32 additions & 0 deletions
32
src/main/java/uk/ac/ebi/eva/contigalias/scheduler/Md5ChecksumRetriever.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
package uk.ac.ebi.eva.contigalias.scheduler; | ||
|
||
import com.fasterxml.jackson.databind.JsonNode; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
import org.springframework.beans.factory.annotation.Autowired; | ||
import org.springframework.retry.annotation.Backoff; | ||
import org.springframework.retry.annotation.Retryable; | ||
import org.springframework.stereotype.Component; | ||
import org.springframework.web.client.RestTemplate; | ||
|
||
@Component | ||
public class Md5ChecksumRetriever { | ||
private final Logger logger = LoggerFactory.getLogger(Md5ChecksumRetriever.class); | ||
private String INSDC_ACCESSION_PLACE_HOLDER = "INSDC_ACCESSION_PLACE_HOLDER"; | ||
private String INSDC_CHECKSUM_URL = "https://www.ebi.ac.uk/ena/cram/sequence/insdc:" + INSDC_ACCESSION_PLACE_HOLDER + "/metadata"; | ||
|
||
private RestTemplate restTemplate; | ||
|
||
@Autowired | ||
public Md5ChecksumRetriever(RestTemplate restTemplate) { | ||
this.restTemplate = restTemplate; | ||
} | ||
|
||
@Retryable(value = Exception.class, maxAttempts = 5, backoff = @Backoff(delay = 2000, multiplier = 2)) | ||
public String retrieveMd5Checksum(String insdcAccession) { | ||
String apiURL = INSDC_CHECKSUM_URL.replace(INSDC_ACCESSION_PLACE_HOLDER, insdcAccession); | ||
JsonNode jsonResponse = restTemplate.getForObject(apiURL, JsonNode.class); | ||
String md5Checksum = jsonResponse.get("metadata").get("md5").asText(); | ||
return md5Checksum; | ||
} | ||
} |
Oops, something went wrong.