diff --git a/pom.xml b/pom.xml
index f63b0907..48572f74 100644
--- a/pom.xml
+++ b/pom.xml
@@ -144,7 +144,7 @@
org.springframework.retry
spring-retry
- 1.2.5.RELEASE
+ 1.3.1
diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/ContigAliasApplication.java b/src/main/java/uk/ac/ebi/eva/contigalias/ContigAliasApplication.java
index 0bc6cdba..3f286d2e 100644
--- a/src/main/java/uk/ac/ebi/eva/contigalias/ContigAliasApplication.java
+++ b/src/main/java/uk/ac/ebi/eva/contigalias/ContigAliasApplication.java
@@ -22,7 +22,9 @@
import org.springframework.boot.web.servlet.support.SpringBootServletInitializer;
import org.springframework.hateoas.config.EnableHypermediaSupport;
import org.springframework.retry.annotation.EnableRetry;
+import org.springframework.scheduling.annotation.EnableScheduling;
+@EnableScheduling
@SpringBootApplication
@EnableRetry
@EnableHypermediaSupport(type = EnableHypermediaSupport.HypermediaType.HAL)
diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/conf/ContigAliasConfiguration.java b/src/main/java/uk/ac/ebi/eva/contigalias/conf/ContigAliasConfiguration.java
new file mode 100644
index 00000000..87df2dd5
--- /dev/null
+++ b/src/main/java/uk/ac/ebi/eva/contigalias/conf/ContigAliasConfiguration.java
@@ -0,0 +1,14 @@
+package uk.ac.ebi.eva.contigalias.conf;
+
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.web.client.RestTemplate;
+
+@Configuration
+public class ContigAliasConfiguration {
+
+ @Bean
+ public RestTemplate getRestTemplate() {
+ return new RestTemplate();
+ }
+}
diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/controller/admin/AdminController.java b/src/main/java/uk/ac/ebi/eva/contigalias/controller/admin/AdminController.java
index 1312301c..37e2d29a 100644
--- a/src/main/java/uk/ac/ebi/eva/contigalias/controller/admin/AdminController.java
+++ b/src/main/java/uk/ac/ebi/eva/contigalias/controller/admin/AdminController.java
@@ -21,16 +21,20 @@
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.DeleteMapping;
+import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PathVariable;
import org.springframework.web.bind.annotation.PutMapping;
import org.springframework.web.bind.annotation.RequestBody;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
+import uk.ac.ebi.eva.contigalias.exception.AssemblyNotFoundException;
import java.io.IOException;
import java.util.List;
import java.util.Map;
+import java.util.Set;
+import java.util.stream.Collectors;
@RequestMapping("/v1/admin")
@RestController
@@ -87,6 +91,36 @@ public ResponseEntity> fetchAndInsertAssemblyByAccession(
return new ResponseEntity<>("Accession Processing Result : " + accessionResult, HttpStatus.MULTI_STATUS);
}
+ @ApiOperation(value = "Given an assembly accession, retrieve MD5 checksum for all chromosomes belonging to assembly and update")
+ @PutMapping(value = "assemblies/{accession}/md5checksum")
+ public ResponseEntity retrieveAndInsertMd5ChecksumForAssembly(@PathVariable(name = "accession")
+ @ApiParam(value = "INSDC or RefSeq assembly accession. Eg: " +
+ "GCA_000001405.10") String asmAccession) {
+ try {
+ handler.getAssemblyByAccession(asmAccession);
+ handler.retrieveAndInsertMd5ChecksumForAssembly(asmAccession);
+ return ResponseEntity.ok("A task has been submitted for updating md5checksum for all chromosomes " +
+ "in assembly " + asmAccession + ". Depending upon the number of chromosomes present in assembly, " +
+ "this might take some time to complete");
+ } catch (AssemblyNotFoundException e) {
+ return ResponseEntity.ok("Could not find assembly " + asmAccession +
+ ". Please insert the assembly first (md5checksum will be updated as part of the insertion process");
+ }
+ }
+
+ @ApiOperation(value = "Retrieve list of assemblies for which MD5 Checksum updates are running/going-to-run ")
+ @GetMapping(value = "assemblies/md5checksum/status")
+ public ResponseEntity getMD5ChecksumUpdateTaskStatus() {
+ Map> md5ChecksumUpdateTasks = handler.getMD5ChecksumUpdateTaskStatus();
+ Set runningTasks = md5ChecksumUpdateTasks.get("running");
+ Set scheduledTasks = md5ChecksumUpdateTasks.get("scheduled");
+ String runningTaskRes = runningTasks == null || runningTasks.isEmpty() ? "No running MD5 checksum update tasks" :
+ runningTasks.stream().collect(Collectors.joining(","));
+ String scheduledTaskRes = scheduledTasks == null || scheduledTasks.isEmpty() ? "No scheduled MD5 checksum update tasks" :
+ scheduledTasks.stream().collect(Collectors.joining(","));
+ return ResponseEntity.ok("running: " + runningTaskRes + "\nscheduled: " + scheduledTaskRes);
+ }
+
// This endpoint can be enabled in the future when checksums for assemblies are added to the project.
// @ApiOperation(value = "Add MD5 and TRUNC512 checksums to an assembly by accession.",
// notes = "Given an INSDC or RefSeq accession along with a MD5 or a TRUNC512 checksum, this endpoint will
diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/controller/admin/AdminHandler.java b/src/main/java/uk/ac/ebi/eva/contigalias/controller/admin/AdminHandler.java
index 9cc50eb9..2007cd17 100644
--- a/src/main/java/uk/ac/ebi/eva/contigalias/controller/admin/AdminHandler.java
+++ b/src/main/java/uk/ac/ebi/eva/contigalias/controller/admin/AdminHandler.java
@@ -27,6 +27,8 @@
import java.io.IOException;
import java.util.List;
import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
@Service
public class AdminHandler {
@@ -46,6 +48,10 @@ public AdminHandler(AssemblyService assemblyService,
this.assemblyAssembler = assemblyAssembler;
}
+ public Optional getAssemblyByAccession(String accession) {
+ return assemblyService.getAssemblyByAccession(accession);
+ }
+
public void fetchAndInsertAssemblyByAccession(String accession) throws IOException {
assemblyService.fetchAndInsertAssembly(accession);
}
@@ -54,6 +60,14 @@ public Map> fetchAndInsertAssemblyByAccession(List
return assemblyService.fetchAndInsertAssembly(accessions);
}
+ public void retrieveAndInsertMd5ChecksumForAssembly(String accession) {
+ assemblyService.retrieveAndInsertMd5ChecksumForAssembly(accession);
+ }
+
+ public Map> getMD5ChecksumUpdateTaskStatus() {
+ return assemblyService.getMD5ChecksumUpdateTaskStatus();
+ }
+
public void deleteAssemblyByAccession(String accession) {
assemblyService.deleteAssemblyByAccession(accession);
}
diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/datasource/ENAAssemblyDataSource.java b/src/main/java/uk/ac/ebi/eva/contigalias/datasource/ENAAssemblyDataSource.java
index d739dc7f..37a5c791 100644
--- a/src/main/java/uk/ac/ebi/eva/contigalias/datasource/ENAAssemblyDataSource.java
+++ b/src/main/java/uk/ac/ebi/eva/contigalias/datasource/ENAAssemblyDataSource.java
@@ -123,20 +123,17 @@ public Optional downloadAssemblyReport(ENABrowser enaBrowser, String acces
* @param optional {@link AssemblyEntity} to add ENA sequence names to
* @throws IOException Passes IOException thrown by {@link #getAssemblyByAccession(String)}
*/
- public void addENASequenceNamesToAssembly(Optional optional) throws IOException {
- if (optional.isPresent()) {
- AssemblyEntity targetAssembly = optional.get();
- if (!hasAllEnaSequenceNames(targetAssembly)) {
- String insdcAccession = targetAssembly.getInsdcAccession();
- Optional enaAssembly = getAssemblyByAccession(insdcAccession);
-
- if (enaAssembly.isPresent()) {
- AssemblyEntity sourceAssembly = enaAssembly.get();
- addENASequenceNames(Objects.nonNull(sourceAssembly.getChromosomes()) ?
- sourceAssembly.getChromosomes() : Collections.emptyList(),
- Objects.nonNull(targetAssembly.getChromosomes()) ?
- targetAssembly.getChromosomes() : Collections.emptyList());
- }
+ public void addENASequenceNamesToAssembly(AssemblyEntity targetAssembly) throws IOException {
+ if (!hasAllEnaSequenceNames(targetAssembly)) {
+ String insdcAccession = targetAssembly.getInsdcAccession();
+ Optional enaAssembly = getAssemblyByAccession(insdcAccession);
+
+ if (enaAssembly.isPresent()) {
+ AssemblyEntity sourceAssembly = enaAssembly.get();
+ addENASequenceNames(Objects.nonNull(sourceAssembly.getChromosomes()) ?
+ sourceAssembly.getChromosomes() : Collections.emptyList(),
+ Objects.nonNull(targetAssembly.getChromosomes()) ?
+ targetAssembly.getChromosomes() : Collections.emptyList());
}
}
}
diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/repo/ChromosomeRepository.java b/src/main/java/uk/ac/ebi/eva/contigalias/repo/ChromosomeRepository.java
index 0b6f5bd7..2a1ae338 100644
--- a/src/main/java/uk/ac/ebi/eva/contigalias/repo/ChromosomeRepository.java
+++ b/src/main/java/uk/ac/ebi/eva/contigalias/repo/ChromosomeRepository.java
@@ -19,11 +19,16 @@
import org.springframework.data.domain.Page;
import org.springframework.data.domain.Pageable;
import org.springframework.data.jpa.repository.JpaRepository;
+import org.springframework.data.jpa.repository.Modifying;
+import org.springframework.data.jpa.repository.Query;
+import org.springframework.data.repository.query.Param;
import org.springframework.stereotype.Repository;
import uk.ac.ebi.eva.contigalias.entities.AssemblyEntity;
import uk.ac.ebi.eva.contigalias.entities.ChromosomeEntity;
+import java.util.List;
+
@Repository
public interface ChromosomeRepository extends JpaRepository {
@@ -35,6 +40,16 @@ public interface ChromosomeRepository extends JpaRepository findChromosomeEntitiesByAssembly_InsdcAccession(String asmInsdcAccession, Pageable request);
+ @Query("SELECT c FROM ChromosomeEntity c WHERE c.assembly.insdcAccession = :asmInsdcAccession AND (c.md5checksum IS NULL OR c.md5checksum = '')")
+ Page findChromosomeEntitiesByAssembly_InsdcAccessionAndMd5checksumIsNullOrEmpty(@Param("asmInsdcAccession") String asmInsdcAccession, Pageable pageable);
+
+ @Query("SELECT distinct c.assembly.insdcAccession FROM ChromosomeEntity c WHERE c.md5checksum IS NULL OR c.md5checksum = ''")
+ List findAssembliesWhereChromosomeMd5checksumIsNullOrEmpty();
+
+ @Modifying
+ @Query("UPDATE ChromosomeEntity c SET c.md5checksum = :md5Checksum WHERE c.assembly.insdcAccession= :asmInsdcAccession AND c.insdcAccession = :insdcAccession")
+ void updateMd5ChecksumByInsdcAccession(@Param("asmInsdcAccession") String asmInsdcAccession, @Param("insdcAccession") String insdcAccession, @Param("md5Checksum") String md5Checksum);
+
Page findChromosomeEntitiesByAssembly_Refseq(String asmRefseq, Pageable request);
Page findChromosomeEntitiesByGenbankSequenceNameAndAssembly_Taxid(String genbankName, long asmTaxid, Pageable request);
diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/ChecksumSetter.java b/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/ChecksumSetter.java
new file mode 100644
index 00000000..c1063c50
--- /dev/null
+++ b/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/ChecksumSetter.java
@@ -0,0 +1,123 @@
+package uk.ac.ebi.eva.contigalias.scheduler;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.data.domain.PageRequest;
+import org.springframework.data.domain.Pageable;
+import org.springframework.data.domain.Slice;
+import org.springframework.scheduling.annotation.Scheduled;
+import org.springframework.stereotype.Component;
+import uk.ac.ebi.eva.contigalias.entities.ChromosomeEntity;
+import uk.ac.ebi.eva.contigalias.service.ChromosomeService;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ExecutionException;
+import java.util.stream.Collectors;
+
+@Component
+public class ChecksumSetter {
+ private final Logger logger = LoggerFactory.getLogger(ChecksumSetter.class);
+ private final Map> runningMD5ChecksumUpdateTasks = new ConcurrentHashMap<>();
+ private Set scheduledToRunMD5ChecksumUpdateTasks = new HashSet<>();
+ private int DEFAULT_PAGE_SIZE = 10000;
+ private ChromosomeService chromosomeService;
+ private Md5ChecksumRetriever md5ChecksumRetriever;
+
+ @Autowired
+ public ChecksumSetter(ChromosomeService chromosomeService, Md5ChecksumRetriever md5ChecksumRetriever) {
+ this.chromosomeService = chromosomeService;
+ this.md5ChecksumRetriever = md5ChecksumRetriever;
+ }
+
+ @Scheduled(cron = "0 0 0 ? * TUE")
+ public void updateMd5CheckSumForAllAssemblies() {
+ scheduledToRunMD5ChecksumUpdateTasks = new HashSet<>();
+ List assemblyList = chromosomeService.getAssembliesWhereChromosomeMd5ChecksumIsNull();
+ logger.info("List of assemblies to be updated for MD5 Checksum: " + assemblyList);
+ scheduledToRunMD5ChecksumUpdateTasks.addAll(assemblyList.stream().collect(Collectors.toSet()));
+
+ for (String assembly : assemblyList) {
+ CompletableFuture future = updateMd5CheckSumForAssemblyAsync(assembly);
+ try {
+ future.get();
+ } catch (InterruptedException | ExecutionException e) {
+ logger.error("Encountered an error when running MD5Checksum update for assembly: " + assembly);
+ } finally {
+ scheduledToRunMD5ChecksumUpdateTasks.remove(assembly);
+ }
+ }
+ }
+
+ public CompletableFuture updateMd5CheckSumForAssemblyAsync(String assembly) {
+ logger.info("Submitted job for updating MD5 Checksum for assembly (asynchronously)");
+ // Check if the async task for this assembly is already running
+ CompletableFuture existingTask = runningMD5ChecksumUpdateTasks.get(assembly);
+ if (existingTask != null && !existingTask.isDone()) {
+ logger.info("Async task is still running for assembly: " + assembly);
+ return existingTask;
+ }
+ // Start the async task (removing existing run if present)
+ runningMD5ChecksumUpdateTasks.remove(assembly);
+ CompletableFuture future = CompletableFuture.runAsync(() -> {
+ updateMD5ChecksumForAllChromosomesInAssembly(assembly);
+ });
+ // Store the future in the map for the given assembly
+ runningMD5ChecksumUpdateTasks.put(assembly, future);
+
+ // check the status of task upon completion and remove from running tasks
+ future.whenComplete((result, exception) -> {
+ if (exception != null) {
+ logger.error("Async task (MD5Checksum setter) failed for assembly: " + assembly, exception);
+ } else {
+ logger.info("Async task (MD5Checksum setter) completed successfully for assembly: " + assembly);
+ }
+ runningMD5ChecksumUpdateTasks.remove(assembly);
+ });
+
+ return future;
+ }
+
+ public void updateMD5ChecksumForAllChromosomesInAssembly(String assembly) {
+ logger.info("Trying to update md5checksum for assembly: " + assembly);
+ Slice chrSlice;
+ Pageable pageable = PageRequest.of(0, DEFAULT_PAGE_SIZE);
+ long chromosomeUpdated = 0;
+ do {
+ chrSlice = chromosomeService.getChromosomesByAssemblyInsdcAccessionWhereMd5ChecksumIsNull(assembly, pageable);
+ List chromosomeEntityList = chrSlice.getContent();
+ updateMd5ChecksumForChromosome(chromosomeEntityList);
+
+ chromosomeUpdated += chromosomeEntityList.size();
+ logger.info("Chromosomes Updated till now: " + chromosomeUpdated);
+ } while (chrSlice.hasNext());
+
+ logger.info("Updating md5checksum for assembly " + assembly + " completed");
+ }
+
+ public void updateMd5ChecksumForChromosome(List chromosomesList) {
+ chromosomesList.parallelStream().forEach(chromosome -> {
+ try {
+ String md5Checksum = md5ChecksumRetriever.retrieveMd5Checksum(chromosome.getInsdcAccession());
+ chromosome.setMd5checksum(md5Checksum);
+ } catch (Exception e) {
+ logger.info("Could not retrieve md5Checksum for insdc accession: " + chromosome.getInsdcAccession());
+ }
+ });
+
+ chromosomeService.updateMd5ChecksumForAll(chromosomesList);
+ }
+
+ public Map> getMD5ChecksumUpdateTaskStatus() {
+ Map> taskStatus = new HashMap<>();
+ taskStatus.put("running", runningMD5ChecksumUpdateTasks.keySet());
+ taskStatus.put("scheduled", scheduledToRunMD5ChecksumUpdateTasks);
+ return taskStatus;
+ }
+}
diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/Md5ChecksumRetriever.java b/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/Md5ChecksumRetriever.java
new file mode 100644
index 00000000..1077f38b
--- /dev/null
+++ b/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/Md5ChecksumRetriever.java
@@ -0,0 +1,32 @@
+package uk.ac.ebi.eva.contigalias.scheduler;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.retry.annotation.Backoff;
+import org.springframework.retry.annotation.Retryable;
+import org.springframework.stereotype.Component;
+import org.springframework.web.client.RestTemplate;
+
+@Component
+public class Md5ChecksumRetriever {
+ private final Logger logger = LoggerFactory.getLogger(Md5ChecksumRetriever.class);
+ private String INSDC_ACCESSION_PLACE_HOLDER = "INSDC_ACCESSION_PLACE_HOLDER";
+ private String INSDC_CHECKSUM_URL = "https://www.ebi.ac.uk/ena/cram/sequence/insdc:" + INSDC_ACCESSION_PLACE_HOLDER + "/metadata";
+
+ private RestTemplate restTemplate;
+
+ @Autowired
+ public Md5ChecksumRetriever(RestTemplate restTemplate) {
+ this.restTemplate = restTemplate;
+ }
+
+ @Retryable(value = Exception.class, maxAttempts = 5, backoff = @Backoff(delay = 2000, multiplier = 2))
+ public String retrieveMd5Checksum(String insdcAccession) {
+ String apiURL = INSDC_CHECKSUM_URL.replace(INSDC_ACCESSION_PLACE_HOLDER, insdcAccession);
+ JsonNode jsonResponse = restTemplate.getForObject(apiURL, JsonNode.class);
+ String md5Checksum = jsonResponse.get("metadata").get("md5").asText();
+ return md5Checksum;
+ }
+}
diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/service/AssemblyService.java b/src/main/java/uk/ac/ebi/eva/contigalias/service/AssemblyService.java
index 58a68d34..e97adc2f 100644
--- a/src/main/java/uk/ac/ebi/eva/contigalias/service/AssemblyService.java
+++ b/src/main/java/uk/ac/ebi/eva/contigalias/service/AssemblyService.java
@@ -29,6 +29,7 @@
import uk.ac.ebi.eva.contigalias.exception.AssemblyNotFoundException;
import uk.ac.ebi.eva.contigalias.exception.DuplicateAssemblyException;
import uk.ac.ebi.eva.contigalias.repo.AssemblyRepository;
+import uk.ac.ebi.eva.contigalias.scheduler.ChecksumSetter;
import javax.transaction.Transactional;
import java.io.IOException;
@@ -38,8 +39,7 @@
import java.util.List;
import java.util.Map;
import java.util.Optional;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
+import java.util.Set;
@Service
@@ -51,14 +51,17 @@ public class AssemblyService {
private final ENAAssemblyDataSource enaDataSource;
+ private final ChecksumSetter checksumSetter;
+
private final Logger logger = LoggerFactory.getLogger(AssemblyService.class);
@Autowired
- public AssemblyService(
- AssemblyRepository repository, NCBIAssemblyDataSource ncbiDataSource, ENAAssemblyDataSource enaDataSource) {
+ public AssemblyService(AssemblyRepository repository, NCBIAssemblyDataSource ncbiDataSource,
+ ENAAssemblyDataSource enaDataSource, ChecksumSetter checksumSetter) {
this.repository = repository;
this.ncbiDataSource = ncbiDataSource;
this.enaDataSource = enaDataSource;
+ this.checksumSetter = checksumSetter;
}
public Optional getAssemblyByInsdcAccession(String insdcAccession) {
@@ -99,15 +102,30 @@ public void fetchAndInsertAssembly(String accession) throws IOException {
if (!fetchAssembly.isPresent()) {
throw new AssemblyNotFoundException(accession);
}
- enaDataSource.addENASequenceNamesToAssembly(fetchAssembly);
- if (fetchAssembly.get().getChromosomes() != null && fetchAssembly.get().getChromosomes().size() > 0) {
- insertAssembly(fetchAssembly.get());
- logger.info("Successfully inserted assembly for accession " + accession);
+ if (fetchAssembly.isPresent()) {
+ AssemblyEntity assemblyEntity = fetchAssembly.get();
+ enaDataSource.addENASequenceNamesToAssembly(assemblyEntity);
+ if (assemblyEntity.getChromosomes() != null && assemblyEntity.getChromosomes().size() > 0) {
+ insertAssembly(assemblyEntity);
+ logger.info("Successfully inserted assembly for accession " + accession);
+ // submit job for retrieving and updating MD5 Checksum for assembly (asynchronously)
+ checksumSetter.updateMd5CheckSumForAssemblyAsync(accession);
+ } else {
+ logger.error("Skipping inserting assembly : No chromosome in assembly " + accession);
+ }
} else {
- logger.error("Skipping inserting assembly : No chromosome in assembly " + accession);
+ logger.error("Could not get assembly from NCBI");
}
}
+ public void retrieveAndInsertMd5ChecksumForAssembly(String assembly) {
+ checksumSetter.updateMd5CheckSumForAssemblyAsync(assembly);
+ }
+
+ public Map> getMD5ChecksumUpdateTaskStatus() {
+ return checksumSetter.getMD5ChecksumUpdateTaskStatus();
+ }
+
public Optional getAssemblyByAccession(String accession) {
Optional entity = repository.findAssemblyEntityByAccession(accession);
if (entity.isPresent()) {
diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/service/ChromosomeService.java b/src/main/java/uk/ac/ebi/eva/contigalias/service/ChromosomeService.java
index 93679963..898ae64b 100644
--- a/src/main/java/uk/ac/ebi/eva/contigalias/service/ChromosomeService.java
+++ b/src/main/java/uk/ac/ebi/eva/contigalias/service/ChromosomeService.java
@@ -25,6 +25,7 @@
import uk.ac.ebi.eva.contigalias.entities.ChromosomeEntity;
import uk.ac.ebi.eva.contigalias.repo.ChromosomeRepository;
+import javax.transaction.Transactional;
import java.util.LinkedList;
import java.util.List;
@@ -54,6 +55,22 @@ public Page getChromosomesByAssemblyInsdcAccession(String asmI
return stripAssembliesFromChromosomes(chromosomes);
}
+ public List getAssembliesWhereChromosomeMd5ChecksumIsNull() {
+ return repository.findAssembliesWhereChromosomeMd5checksumIsNullOrEmpty();
+ }
+
+ public Page getChromosomesByAssemblyInsdcAccessionWhereMd5ChecksumIsNull(String asmInsdcAccession, Pageable request) {
+ Page chrPage = repository.findChromosomeEntitiesByAssembly_InsdcAccessionAndMd5checksumIsNullOrEmpty(asmInsdcAccession, request);
+ return chrPage;
+ }
+
+ @Transactional
+ public void updateMd5ChecksumForAll(List chromosomeEntityList) {
+ for (ChromosomeEntity chromosome : chromosomeEntityList) {
+ repository.updateMd5ChecksumByInsdcAccession(chromosome.getAssembly().getInsdcAccession(), chromosome.getInsdcAccession(), chromosome.getMd5checksum());
+ }
+ }
+
public Page getChromosomesByAssemblyRefseq(String asmRefseq, Pageable request) {
Page chromosomes = repository.findChromosomeEntitiesByAssembly_Refseq(asmRefseq, request);
return stripAssembliesFromChromosomes(chromosomes);
diff --git a/src/test/java/uk/ac/ebi/eva/contigalias/datasource/ENAAssemblyDataSourceTest.java b/src/test/java/uk/ac/ebi/eva/contigalias/datasource/ENAAssemblyDataSourceTest.java
index 89de6d66..11f7bd5c 100644
--- a/src/test/java/uk/ac/ebi/eva/contigalias/datasource/ENAAssemblyDataSourceTest.java
+++ b/src/test/java/uk/ac/ebi/eva/contigalias/datasource/ENAAssemblyDataSourceTest.java
@@ -56,7 +56,7 @@ public void getAssemblyByAccessionGCAHavingChromosomes() throws IOException {
@Test
public void getENASequenceNamesForAssembly() throws IOException {
Optional assembly = ncbiDataSource.getAssemblyByAccession(GCA_ACCESSION_HAVING_CHROMOSOMES);
- enaDataSource.addENASequenceNamesToAssembly(assembly);
+ enaDataSource.addENASequenceNamesToAssembly(assembly.get());
assertTrue(assembly.isPresent());
assertTrue(enaDataSource.hasAllEnaSequenceNames(assembly.get()));
}
diff --git a/src/test/java/uk/ac/ebi/eva/contigalias/datasource/RetryTest.java b/src/test/java/uk/ac/ebi/eva/contigalias/datasource/RetryTest.java
index 2ba4895a..8c8440df 100644
--- a/src/test/java/uk/ac/ebi/eva/contigalias/datasource/RetryTest.java
+++ b/src/test/java/uk/ac/ebi/eva/contigalias/datasource/RetryTest.java
@@ -1,5 +1,6 @@
package uk.ac.ebi.eva.contigalias.datasource;
+import com.fasterxml.jackson.databind.JsonNode;
import org.apache.commons.net.ftp.FTPFile;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
@@ -7,10 +8,13 @@
import org.mockito.Mockito;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
+import org.springframework.boot.test.mock.mockito.MockBean;
import org.springframework.retry.annotation.EnableRetry;
import org.springframework.test.context.TestPropertySource;
+import org.springframework.web.client.RestTemplate;
import uk.ac.ebi.eva.contigalias.dus.NCBIBrowser;
import uk.ac.ebi.eva.contigalias.exception.DownloadFailedException;
+import uk.ac.ebi.eva.contigalias.scheduler.Md5ChecksumRetriever;
import java.io.IOException;
import java.nio.file.Path;
@@ -20,6 +24,8 @@
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.mockito.ArgumentMatchers.anyString;
+import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
@@ -38,6 +44,12 @@ public class RetryTest {
@Autowired
private NCBIAssemblyDataSource dataSource;
+ @Autowired
+ private Md5ChecksumRetriever md5ChecksumRetriever;
+
+ @MockBean
+ private RestTemplate restTemplate;
+
@Test
public void fileDownloadSuccessfulTest() throws IOException {
@@ -108,4 +120,20 @@ public void fileDownloadFailedRetryTest2() throws IOException {
verify(ncbiBrowser, times(5)).getGenomeReportDirectory(mockAccession);
}
+
+ @Test
+ public void retrieveMd5ChecksumRetry() {
+ String insdcAccession = "TEST_ACCESSION";
+ when(restTemplate.getForObject(anyString(), eq(JsonNode.class)))
+ .thenThrow(new RuntimeException("Simulated network issue"));
+
+ Md5ChecksumRetriever anotherObjSpy = Mockito.spy(md5ChecksumRetriever);
+ RuntimeException thrown = Assertions.assertThrows(RuntimeException.class, () -> {
+ anotherObjSpy.retrieveMd5Checksum(insdcAccession);
+ });
+
+ assertEquals("Simulated network issue", thrown.getMessage());
+ verify(restTemplate, times(5)).getForObject(anyString(), eq(JsonNode.class));
+ }
+
}
diff --git a/src/test/java/uk/ac/ebi/eva/contigalias/entitygenerator/ChromosomeGenerator.java b/src/test/java/uk/ac/ebi/eva/contigalias/entitygenerator/ChromosomeGenerator.java
index c9a96857..ccf08e5e 100644
--- a/src/test/java/uk/ac/ebi/eva/contigalias/entitygenerator/ChromosomeGenerator.java
+++ b/src/test/java/uk/ac/ebi/eva/contigalias/entitygenerator/ChromosomeGenerator.java
@@ -40,7 +40,7 @@ public static ChromosomeEntity generate(long id) {
.setRefseq("refseq" + id)
.setUcscName("ucsc" + id)
.setEnaSequenceName("ena" + id)
- .setMd5checksum("md5" + id)
+ .setMd5checksum(null)
.setTrunc512checksum("trunc512" + id)
.setAssembly(null);
}
@@ -80,7 +80,7 @@ void generateTest() {
assertTrue(entity.getInsdcAccession().endsWith(sId));
assertTrue(entity.getRefseq().endsWith(sId));
assertTrue(entity.getUcscName().endsWith(sId));
- assertTrue(entity.getMd5checksum().endsWith(sId));
+ assertEquals(null, entity.getMd5checksum());
assertTrue(entity.getTrunc512checksum().endsWith(sId));
assertNull(entity.getAssembly());
}
diff --git a/src/test/java/uk/ac/ebi/eva/contigalias/service/AssemblyServiceIntegrationTest.java b/src/test/java/uk/ac/ebi/eva/contigalias/service/AssemblyServiceIntegrationTest.java
index 2d0c8542..d713e2e9 100644
--- a/src/test/java/uk/ac/ebi/eva/contigalias/service/AssemblyServiceIntegrationTest.java
+++ b/src/test/java/uk/ac/ebi/eva/contigalias/service/AssemblyServiceIntegrationTest.java
@@ -31,10 +31,12 @@
import uk.ac.ebi.eva.contigalias.entities.AssemblyEntity;
import uk.ac.ebi.eva.contigalias.entitygenerator.AssemblyGenerator;
import uk.ac.ebi.eva.contigalias.repo.AssemblyRepository;
+import uk.ac.ebi.eva.contigalias.scheduler.ChecksumSetter;
import java.io.IOException;
import java.util.List;
import java.util.Optional;
+import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;
import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -62,6 +64,7 @@ public class AssemblyServiceIntegrationTest {
void setup() throws IOException {
NCBIAssemblyDataSource mockNcbiDataSource = mock(NCBIAssemblyDataSource.class);
ENAAssemblyDataSource mockEnaDataSource = mock(ENAAssemblyDataSource.class);
+ ChecksumSetter mockChecksumSetter = mock(ChecksumSetter.class);
for (int i = 0; i < entities.length; i++) {
AssemblyEntity generate = AssemblyGenerator.generate(i);
entities[i] = generate;
@@ -69,8 +72,10 @@ void setup() throws IOException {
.thenReturn(Optional.of(generate));
Mockito.when(mockNcbiDataSource.getAssemblyByAccession(generate.getRefseq()))
.thenReturn(Optional.of(generate));
+ Mockito.when(mockChecksumSetter.updateMd5CheckSumForAssemblyAsync(generate.getInsdcAccession()))
+ .thenReturn(new CompletableFuture<>());
}
- service = new AssemblyService(repository, mockNcbiDataSource, mockEnaDataSource);
+ service = new AssemblyService(repository, mockNcbiDataSource, mockEnaDataSource, mockChecksumSetter);
}
@AfterEach
diff --git a/src/test/java/uk/ac/ebi/eva/contigalias/service/ChromosomeServiceIntegrationTest.java b/src/test/java/uk/ac/ebi/eva/contigalias/service/ChromosomeServiceIntegrationTest.java
index 67eda556..e73cda35 100644
--- a/src/test/java/uk/ac/ebi/eva/contigalias/service/ChromosomeServiceIntegrationTest.java
+++ b/src/test/java/uk/ac/ebi/eva/contigalias/service/ChromosomeServiceIntegrationTest.java
@@ -22,6 +22,7 @@
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.data.domain.Page;
+import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
import org.springframework.test.context.ActiveProfiles;
@@ -29,6 +30,9 @@
import uk.ac.ebi.eva.contigalias.entitygenerator.AssemblyGenerator;
import uk.ac.ebi.eva.contigalias.entitygenerator.ChromosomeGenerator;
+import java.util.Collections;
+import java.util.List;
+
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -80,6 +84,31 @@ void putChromosomeChecksumsByAccession() {
});
}
+ @Test
+ void testGetAssemblyWhereChromosomeMd5ChecksumIsNullOrEmpty() {
+ List asmList = service.getAssembliesWhereChromosomeMd5ChecksumIsNull();
+ assertEquals(entity.getAssembly().getInsdcAccession(), asmList.get(0));
+ }
+
+ @Test
+ void testGetChromosomesByAssemblyInsdcAccessionWhereMd5ChecksumIsNull() {
+ Page chrPage = service.getChromosomesByAssemblyInsdcAccessionWhereMd5ChecksumIsNull(entity.getAssembly().getInsdcAccession(), PageRequest.of(0, 100));
+ assertChromosomePageIdenticalToEntity(chrPage);
+ assertEquals(null, chrPage.getContent().get(0).getMd5checksum());
+ }
+
+ @Test
+ void testUpdateMD5ChecksumForAllChromosomesInAssembly() {
+ String testMD5Checksum = "testmd5checksum";
+ entity.setMd5checksum(testMD5Checksum);
+ service.updateMd5ChecksumForAll(Collections.singletonList(entity));
+
+ Page chrPage = service.getChromosomesByInsdcAccession(entity.getInsdcAccession(), Pageable.unpaged());
+ assertChromosomePageIdenticalToEntity(chrPage);
+ assertEquals(testMD5Checksum, chrPage.getContent().get(0).getMd5checksum());
+
+ }
+
void assertChromosomePageIdenticalToEntity(Page page) {
assertNotNull(page);
assertTrue(page.getTotalElements() > 0);