diff --git a/pom.xml b/pom.xml index f63b0907..48572f74 100644 --- a/pom.xml +++ b/pom.xml @@ -144,7 +144,7 @@ org.springframework.retry spring-retry - 1.2.5.RELEASE + 1.3.1 diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/ContigAliasApplication.java b/src/main/java/uk/ac/ebi/eva/contigalias/ContigAliasApplication.java index 0bc6cdba..3f286d2e 100644 --- a/src/main/java/uk/ac/ebi/eva/contigalias/ContigAliasApplication.java +++ b/src/main/java/uk/ac/ebi/eva/contigalias/ContigAliasApplication.java @@ -22,7 +22,9 @@ import org.springframework.boot.web.servlet.support.SpringBootServletInitializer; import org.springframework.hateoas.config.EnableHypermediaSupport; import org.springframework.retry.annotation.EnableRetry; +import org.springframework.scheduling.annotation.EnableScheduling; +@EnableScheduling @SpringBootApplication @EnableRetry @EnableHypermediaSupport(type = EnableHypermediaSupport.HypermediaType.HAL) diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/conf/ContigAliasConfiguration.java b/src/main/java/uk/ac/ebi/eva/contigalias/conf/ContigAliasConfiguration.java new file mode 100644 index 00000000..87df2dd5 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/contigalias/conf/ContigAliasConfiguration.java @@ -0,0 +1,14 @@ +package uk.ac.ebi.eva.contigalias.conf; + +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.web.client.RestTemplate; + +@Configuration +public class ContigAliasConfiguration { + + @Bean + public RestTemplate getRestTemplate() { + return new RestTemplate(); + } +} diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/controller/admin/AdminController.java b/src/main/java/uk/ac/ebi/eva/contigalias/controller/admin/AdminController.java index 1312301c..37e2d29a 100644 --- a/src/main/java/uk/ac/ebi/eva/contigalias/controller/admin/AdminController.java +++ b/src/main/java/uk/ac/ebi/eva/contigalias/controller/admin/AdminController.java @@ -21,16 +21,20 @@ import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; import org.springframework.web.bind.annotation.DeleteMapping; +import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.PutMapping; import org.springframework.web.bind.annotation.RequestBody; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; +import uk.ac.ebi.eva.contigalias.exception.AssemblyNotFoundException; import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; @RequestMapping("/v1/admin") @RestController @@ -87,6 +91,36 @@ public ResponseEntity fetchAndInsertAssemblyByAccession( return new ResponseEntity<>("Accession Processing Result : " + accessionResult, HttpStatus.MULTI_STATUS); } + @ApiOperation(value = "Given an assembly accession, retrieve MD5 checksum for all chromosomes belonging to assembly and update") + @PutMapping(value = "assemblies/{accession}/md5checksum") + public ResponseEntity retrieveAndInsertMd5ChecksumForAssembly(@PathVariable(name = "accession") + @ApiParam(value = "INSDC or RefSeq assembly accession. Eg: " + + "GCA_000001405.10") String asmAccession) { + try { + handler.getAssemblyByAccession(asmAccession); + handler.retrieveAndInsertMd5ChecksumForAssembly(asmAccession); + return ResponseEntity.ok("A task has been submitted for updating md5checksum for all chromosomes " + + "in assembly " + asmAccession + ". Depending upon the number of chromosomes present in assembly, " + + "this might take some time to complete"); + } catch (AssemblyNotFoundException e) { + return ResponseEntity.ok("Could not find assembly " + asmAccession + + ". Please insert the assembly first (md5checksum will be updated as part of the insertion process"); + } + } + + @ApiOperation(value = "Retrieve list of assemblies for which MD5 Checksum updates are running/going-to-run ") + @GetMapping(value = "assemblies/md5checksum/status") + public ResponseEntity getMD5ChecksumUpdateTaskStatus() { + Map> md5ChecksumUpdateTasks = handler.getMD5ChecksumUpdateTaskStatus(); + Set runningTasks = md5ChecksumUpdateTasks.get("running"); + Set scheduledTasks = md5ChecksumUpdateTasks.get("scheduled"); + String runningTaskRes = runningTasks == null || runningTasks.isEmpty() ? "No running MD5 checksum update tasks" : + runningTasks.stream().collect(Collectors.joining(",")); + String scheduledTaskRes = scheduledTasks == null || scheduledTasks.isEmpty() ? "No scheduled MD5 checksum update tasks" : + scheduledTasks.stream().collect(Collectors.joining(",")); + return ResponseEntity.ok("running: " + runningTaskRes + "\nscheduled: " + scheduledTaskRes); + } + // This endpoint can be enabled in the future when checksums for assemblies are added to the project. // @ApiOperation(value = "Add MD5 and TRUNC512 checksums to an assembly by accession.", // notes = "Given an INSDC or RefSeq accession along with a MD5 or a TRUNC512 checksum, this endpoint will diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/controller/admin/AdminHandler.java b/src/main/java/uk/ac/ebi/eva/contigalias/controller/admin/AdminHandler.java index 9cc50eb9..2007cd17 100644 --- a/src/main/java/uk/ac/ebi/eva/contigalias/controller/admin/AdminHandler.java +++ b/src/main/java/uk/ac/ebi/eva/contigalias/controller/admin/AdminHandler.java @@ -27,6 +27,8 @@ import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.Optional; +import java.util.Set; @Service public class AdminHandler { @@ -46,6 +48,10 @@ public AdminHandler(AssemblyService assemblyService, this.assemblyAssembler = assemblyAssembler; } + public Optional getAssemblyByAccession(String accession) { + return assemblyService.getAssemblyByAccession(accession); + } + public void fetchAndInsertAssemblyByAccession(String accession) throws IOException { assemblyService.fetchAndInsertAssembly(accession); } @@ -54,6 +60,14 @@ public Map> fetchAndInsertAssemblyByAccession(List return assemblyService.fetchAndInsertAssembly(accessions); } + public void retrieveAndInsertMd5ChecksumForAssembly(String accession) { + assemblyService.retrieveAndInsertMd5ChecksumForAssembly(accession); + } + + public Map> getMD5ChecksumUpdateTaskStatus() { + return assemblyService.getMD5ChecksumUpdateTaskStatus(); + } + public void deleteAssemblyByAccession(String accession) { assemblyService.deleteAssemblyByAccession(accession); } diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/datasource/ENAAssemblyDataSource.java b/src/main/java/uk/ac/ebi/eva/contigalias/datasource/ENAAssemblyDataSource.java index d739dc7f..37a5c791 100644 --- a/src/main/java/uk/ac/ebi/eva/contigalias/datasource/ENAAssemblyDataSource.java +++ b/src/main/java/uk/ac/ebi/eva/contigalias/datasource/ENAAssemblyDataSource.java @@ -123,20 +123,17 @@ public Optional downloadAssemblyReport(ENABrowser enaBrowser, String acces * @param optional {@link AssemblyEntity} to add ENA sequence names to * @throws IOException Passes IOException thrown by {@link #getAssemblyByAccession(String)} */ - public void addENASequenceNamesToAssembly(Optional optional) throws IOException { - if (optional.isPresent()) { - AssemblyEntity targetAssembly = optional.get(); - if (!hasAllEnaSequenceNames(targetAssembly)) { - String insdcAccession = targetAssembly.getInsdcAccession(); - Optional enaAssembly = getAssemblyByAccession(insdcAccession); - - if (enaAssembly.isPresent()) { - AssemblyEntity sourceAssembly = enaAssembly.get(); - addENASequenceNames(Objects.nonNull(sourceAssembly.getChromosomes()) ? - sourceAssembly.getChromosomes() : Collections.emptyList(), - Objects.nonNull(targetAssembly.getChromosomes()) ? - targetAssembly.getChromosomes() : Collections.emptyList()); - } + public void addENASequenceNamesToAssembly(AssemblyEntity targetAssembly) throws IOException { + if (!hasAllEnaSequenceNames(targetAssembly)) { + String insdcAccession = targetAssembly.getInsdcAccession(); + Optional enaAssembly = getAssemblyByAccession(insdcAccession); + + if (enaAssembly.isPresent()) { + AssemblyEntity sourceAssembly = enaAssembly.get(); + addENASequenceNames(Objects.nonNull(sourceAssembly.getChromosomes()) ? + sourceAssembly.getChromosomes() : Collections.emptyList(), + Objects.nonNull(targetAssembly.getChromosomes()) ? + targetAssembly.getChromosomes() : Collections.emptyList()); } } } diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/repo/ChromosomeRepository.java b/src/main/java/uk/ac/ebi/eva/contigalias/repo/ChromosomeRepository.java index 0b6f5bd7..2a1ae338 100644 --- a/src/main/java/uk/ac/ebi/eva/contigalias/repo/ChromosomeRepository.java +++ b/src/main/java/uk/ac/ebi/eva/contigalias/repo/ChromosomeRepository.java @@ -19,11 +19,16 @@ import org.springframework.data.domain.Page; import org.springframework.data.domain.Pageable; import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Modifying; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; import org.springframework.stereotype.Repository; import uk.ac.ebi.eva.contigalias.entities.AssemblyEntity; import uk.ac.ebi.eva.contigalias.entities.ChromosomeEntity; +import java.util.List; + @Repository public interface ChromosomeRepository extends JpaRepository { @@ -35,6 +40,16 @@ public interface ChromosomeRepository extends JpaRepository findChromosomeEntitiesByAssembly_InsdcAccession(String asmInsdcAccession, Pageable request); + @Query("SELECT c FROM ChromosomeEntity c WHERE c.assembly.insdcAccession = :asmInsdcAccession AND (c.md5checksum IS NULL OR c.md5checksum = '')") + Page findChromosomeEntitiesByAssembly_InsdcAccessionAndMd5checksumIsNullOrEmpty(@Param("asmInsdcAccession") String asmInsdcAccession, Pageable pageable); + + @Query("SELECT distinct c.assembly.insdcAccession FROM ChromosomeEntity c WHERE c.md5checksum IS NULL OR c.md5checksum = ''") + List findAssembliesWhereChromosomeMd5checksumIsNullOrEmpty(); + + @Modifying + @Query("UPDATE ChromosomeEntity c SET c.md5checksum = :md5Checksum WHERE c.assembly.insdcAccession= :asmInsdcAccession AND c.insdcAccession = :insdcAccession") + void updateMd5ChecksumByInsdcAccession(@Param("asmInsdcAccession") String asmInsdcAccession, @Param("insdcAccession") String insdcAccession, @Param("md5Checksum") String md5Checksum); + Page findChromosomeEntitiesByAssembly_Refseq(String asmRefseq, Pageable request); Page findChromosomeEntitiesByGenbankSequenceNameAndAssembly_Taxid(String genbankName, long asmTaxid, Pageable request); diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/ChecksumSetter.java b/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/ChecksumSetter.java new file mode 100644 index 00000000..c1063c50 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/ChecksumSetter.java @@ -0,0 +1,123 @@ +package uk.ac.ebi.eva.contigalias.scheduler; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.data.domain.PageRequest; +import org.springframework.data.domain.Pageable; +import org.springframework.data.domain.Slice; +import org.springframework.scheduling.annotation.Scheduled; +import org.springframework.stereotype.Component; +import uk.ac.ebi.eva.contigalias.entities.ChromosomeEntity; +import uk.ac.ebi.eva.contigalias.service.ChromosomeService; + +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutionException; +import java.util.stream.Collectors; + +@Component +public class ChecksumSetter { + private final Logger logger = LoggerFactory.getLogger(ChecksumSetter.class); + private final Map> runningMD5ChecksumUpdateTasks = new ConcurrentHashMap<>(); + private Set scheduledToRunMD5ChecksumUpdateTasks = new HashSet<>(); + private int DEFAULT_PAGE_SIZE = 10000; + private ChromosomeService chromosomeService; + private Md5ChecksumRetriever md5ChecksumRetriever; + + @Autowired + public ChecksumSetter(ChromosomeService chromosomeService, Md5ChecksumRetriever md5ChecksumRetriever) { + this.chromosomeService = chromosomeService; + this.md5ChecksumRetriever = md5ChecksumRetriever; + } + + @Scheduled(cron = "0 0 0 ? * TUE") + public void updateMd5CheckSumForAllAssemblies() { + scheduledToRunMD5ChecksumUpdateTasks = new HashSet<>(); + List assemblyList = chromosomeService.getAssembliesWhereChromosomeMd5ChecksumIsNull(); + logger.info("List of assemblies to be updated for MD5 Checksum: " + assemblyList); + scheduledToRunMD5ChecksumUpdateTasks.addAll(assemblyList.stream().collect(Collectors.toSet())); + + for (String assembly : assemblyList) { + CompletableFuture future = updateMd5CheckSumForAssemblyAsync(assembly); + try { + future.get(); + } catch (InterruptedException | ExecutionException e) { + logger.error("Encountered an error when running MD5Checksum update for assembly: " + assembly); + } finally { + scheduledToRunMD5ChecksumUpdateTasks.remove(assembly); + } + } + } + + public CompletableFuture updateMd5CheckSumForAssemblyAsync(String assembly) { + logger.info("Submitted job for updating MD5 Checksum for assembly (asynchronously)"); + // Check if the async task for this assembly is already running + CompletableFuture existingTask = runningMD5ChecksumUpdateTasks.get(assembly); + if (existingTask != null && !existingTask.isDone()) { + logger.info("Async task is still running for assembly: " + assembly); + return existingTask; + } + // Start the async task (removing existing run if present) + runningMD5ChecksumUpdateTasks.remove(assembly); + CompletableFuture future = CompletableFuture.runAsync(() -> { + updateMD5ChecksumForAllChromosomesInAssembly(assembly); + }); + // Store the future in the map for the given assembly + runningMD5ChecksumUpdateTasks.put(assembly, future); + + // check the status of task upon completion and remove from running tasks + future.whenComplete((result, exception) -> { + if (exception != null) { + logger.error("Async task (MD5Checksum setter) failed for assembly: " + assembly, exception); + } else { + logger.info("Async task (MD5Checksum setter) completed successfully for assembly: " + assembly); + } + runningMD5ChecksumUpdateTasks.remove(assembly); + }); + + return future; + } + + public void updateMD5ChecksumForAllChromosomesInAssembly(String assembly) { + logger.info("Trying to update md5checksum for assembly: " + assembly); + Slice chrSlice; + Pageable pageable = PageRequest.of(0, DEFAULT_PAGE_SIZE); + long chromosomeUpdated = 0; + do { + chrSlice = chromosomeService.getChromosomesByAssemblyInsdcAccessionWhereMd5ChecksumIsNull(assembly, pageable); + List chromosomeEntityList = chrSlice.getContent(); + updateMd5ChecksumForChromosome(chromosomeEntityList); + + chromosomeUpdated += chromosomeEntityList.size(); + logger.info("Chromosomes Updated till now: " + chromosomeUpdated); + } while (chrSlice.hasNext()); + + logger.info("Updating md5checksum for assembly " + assembly + " completed"); + } + + public void updateMd5ChecksumForChromosome(List chromosomesList) { + chromosomesList.parallelStream().forEach(chromosome -> { + try { + String md5Checksum = md5ChecksumRetriever.retrieveMd5Checksum(chromosome.getInsdcAccession()); + chromosome.setMd5checksum(md5Checksum); + } catch (Exception e) { + logger.info("Could not retrieve md5Checksum for insdc accession: " + chromosome.getInsdcAccession()); + } + }); + + chromosomeService.updateMd5ChecksumForAll(chromosomesList); + } + + public Map> getMD5ChecksumUpdateTaskStatus() { + Map> taskStatus = new HashMap<>(); + taskStatus.put("running", runningMD5ChecksumUpdateTasks.keySet()); + taskStatus.put("scheduled", scheduledToRunMD5ChecksumUpdateTasks); + return taskStatus; + } +} diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/Md5ChecksumRetriever.java b/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/Md5ChecksumRetriever.java new file mode 100644 index 00000000..1077f38b --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/Md5ChecksumRetriever.java @@ -0,0 +1,32 @@ +package uk.ac.ebi.eva.contigalias.scheduler; + +import com.fasterxml.jackson.databind.JsonNode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.retry.annotation.Backoff; +import org.springframework.retry.annotation.Retryable; +import org.springframework.stereotype.Component; +import org.springframework.web.client.RestTemplate; + +@Component +public class Md5ChecksumRetriever { + private final Logger logger = LoggerFactory.getLogger(Md5ChecksumRetriever.class); + private String INSDC_ACCESSION_PLACE_HOLDER = "INSDC_ACCESSION_PLACE_HOLDER"; + private String INSDC_CHECKSUM_URL = "https://www.ebi.ac.uk/ena/cram/sequence/insdc:" + INSDC_ACCESSION_PLACE_HOLDER + "/metadata"; + + private RestTemplate restTemplate; + + @Autowired + public Md5ChecksumRetriever(RestTemplate restTemplate) { + this.restTemplate = restTemplate; + } + + @Retryable(value = Exception.class, maxAttempts = 5, backoff = @Backoff(delay = 2000, multiplier = 2)) + public String retrieveMd5Checksum(String insdcAccession) { + String apiURL = INSDC_CHECKSUM_URL.replace(INSDC_ACCESSION_PLACE_HOLDER, insdcAccession); + JsonNode jsonResponse = restTemplate.getForObject(apiURL, JsonNode.class); + String md5Checksum = jsonResponse.get("metadata").get("md5").asText(); + return md5Checksum; + } +} diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/service/AssemblyService.java b/src/main/java/uk/ac/ebi/eva/contigalias/service/AssemblyService.java index 58a68d34..e97adc2f 100644 --- a/src/main/java/uk/ac/ebi/eva/contigalias/service/AssemblyService.java +++ b/src/main/java/uk/ac/ebi/eva/contigalias/service/AssemblyService.java @@ -29,6 +29,7 @@ import uk.ac.ebi.eva.contigalias.exception.AssemblyNotFoundException; import uk.ac.ebi.eva.contigalias.exception.DuplicateAssemblyException; import uk.ac.ebi.eva.contigalias.repo.AssemblyRepository; +import uk.ac.ebi.eva.contigalias.scheduler.ChecksumSetter; import javax.transaction.Transactional; import java.io.IOException; @@ -38,8 +39,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; +import java.util.Set; @Service @@ -51,14 +51,17 @@ public class AssemblyService { private final ENAAssemblyDataSource enaDataSource; + private final ChecksumSetter checksumSetter; + private final Logger logger = LoggerFactory.getLogger(AssemblyService.class); @Autowired - public AssemblyService( - AssemblyRepository repository, NCBIAssemblyDataSource ncbiDataSource, ENAAssemblyDataSource enaDataSource) { + public AssemblyService(AssemblyRepository repository, NCBIAssemblyDataSource ncbiDataSource, + ENAAssemblyDataSource enaDataSource, ChecksumSetter checksumSetter) { this.repository = repository; this.ncbiDataSource = ncbiDataSource; this.enaDataSource = enaDataSource; + this.checksumSetter = checksumSetter; } public Optional getAssemblyByInsdcAccession(String insdcAccession) { @@ -99,15 +102,30 @@ public void fetchAndInsertAssembly(String accession) throws IOException { if (!fetchAssembly.isPresent()) { throw new AssemblyNotFoundException(accession); } - enaDataSource.addENASequenceNamesToAssembly(fetchAssembly); - if (fetchAssembly.get().getChromosomes() != null && fetchAssembly.get().getChromosomes().size() > 0) { - insertAssembly(fetchAssembly.get()); - logger.info("Successfully inserted assembly for accession " + accession); + if (fetchAssembly.isPresent()) { + AssemblyEntity assemblyEntity = fetchAssembly.get(); + enaDataSource.addENASequenceNamesToAssembly(assemblyEntity); + if (assemblyEntity.getChromosomes() != null && assemblyEntity.getChromosomes().size() > 0) { + insertAssembly(assemblyEntity); + logger.info("Successfully inserted assembly for accession " + accession); + // submit job for retrieving and updating MD5 Checksum for assembly (asynchronously) + checksumSetter.updateMd5CheckSumForAssemblyAsync(accession); + } else { + logger.error("Skipping inserting assembly : No chromosome in assembly " + accession); + } } else { - logger.error("Skipping inserting assembly : No chromosome in assembly " + accession); + logger.error("Could not get assembly from NCBI"); } } + public void retrieveAndInsertMd5ChecksumForAssembly(String assembly) { + checksumSetter.updateMd5CheckSumForAssemblyAsync(assembly); + } + + public Map> getMD5ChecksumUpdateTaskStatus() { + return checksumSetter.getMD5ChecksumUpdateTaskStatus(); + } + public Optional getAssemblyByAccession(String accession) { Optional entity = repository.findAssemblyEntityByAccession(accession); if (entity.isPresent()) { diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/service/ChromosomeService.java b/src/main/java/uk/ac/ebi/eva/contigalias/service/ChromosomeService.java index 93679963..898ae64b 100644 --- a/src/main/java/uk/ac/ebi/eva/contigalias/service/ChromosomeService.java +++ b/src/main/java/uk/ac/ebi/eva/contigalias/service/ChromosomeService.java @@ -25,6 +25,7 @@ import uk.ac.ebi.eva.contigalias.entities.ChromosomeEntity; import uk.ac.ebi.eva.contigalias.repo.ChromosomeRepository; +import javax.transaction.Transactional; import java.util.LinkedList; import java.util.List; @@ -54,6 +55,22 @@ public Page getChromosomesByAssemblyInsdcAccession(String asmI return stripAssembliesFromChromosomes(chromosomes); } + public List getAssembliesWhereChromosomeMd5ChecksumIsNull() { + return repository.findAssembliesWhereChromosomeMd5checksumIsNullOrEmpty(); + } + + public Page getChromosomesByAssemblyInsdcAccessionWhereMd5ChecksumIsNull(String asmInsdcAccession, Pageable request) { + Page chrPage = repository.findChromosomeEntitiesByAssembly_InsdcAccessionAndMd5checksumIsNullOrEmpty(asmInsdcAccession, request); + return chrPage; + } + + @Transactional + public void updateMd5ChecksumForAll(List chromosomeEntityList) { + for (ChromosomeEntity chromosome : chromosomeEntityList) { + repository.updateMd5ChecksumByInsdcAccession(chromosome.getAssembly().getInsdcAccession(), chromosome.getInsdcAccession(), chromosome.getMd5checksum()); + } + } + public Page getChromosomesByAssemblyRefseq(String asmRefseq, Pageable request) { Page chromosomes = repository.findChromosomeEntitiesByAssembly_Refseq(asmRefseq, request); return stripAssembliesFromChromosomes(chromosomes); diff --git a/src/test/java/uk/ac/ebi/eva/contigalias/datasource/ENAAssemblyDataSourceTest.java b/src/test/java/uk/ac/ebi/eva/contigalias/datasource/ENAAssemblyDataSourceTest.java index 89de6d66..11f7bd5c 100644 --- a/src/test/java/uk/ac/ebi/eva/contigalias/datasource/ENAAssemblyDataSourceTest.java +++ b/src/test/java/uk/ac/ebi/eva/contigalias/datasource/ENAAssemblyDataSourceTest.java @@ -56,7 +56,7 @@ public void getAssemblyByAccessionGCAHavingChromosomes() throws IOException { @Test public void getENASequenceNamesForAssembly() throws IOException { Optional assembly = ncbiDataSource.getAssemblyByAccession(GCA_ACCESSION_HAVING_CHROMOSOMES); - enaDataSource.addENASequenceNamesToAssembly(assembly); + enaDataSource.addENASequenceNamesToAssembly(assembly.get()); assertTrue(assembly.isPresent()); assertTrue(enaDataSource.hasAllEnaSequenceNames(assembly.get())); } diff --git a/src/test/java/uk/ac/ebi/eva/contigalias/datasource/RetryTest.java b/src/test/java/uk/ac/ebi/eva/contigalias/datasource/RetryTest.java index 2ba4895a..8c8440df 100644 --- a/src/test/java/uk/ac/ebi/eva/contigalias/datasource/RetryTest.java +++ b/src/test/java/uk/ac/ebi/eva/contigalias/datasource/RetryTest.java @@ -1,5 +1,6 @@ package uk.ac.ebi.eva.contigalias.datasource; +import com.fasterxml.jackson.databind.JsonNode; import org.apache.commons.net.ftp.FTPFile; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -7,10 +8,13 @@ import org.mockito.Mockito; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; +import org.springframework.boot.test.mock.mockito.MockBean; import org.springframework.retry.annotation.EnableRetry; import org.springframework.test.context.TestPropertySource; +import org.springframework.web.client.RestTemplate; import uk.ac.ebi.eva.contigalias.dus.NCBIBrowser; import uk.ac.ebi.eva.contigalias.exception.DownloadFailedException; +import uk.ac.ebi.eva.contigalias.scheduler.Md5ChecksumRetriever; import java.io.IOException; import java.nio.file.Path; @@ -20,6 +24,8 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -38,6 +44,12 @@ public class RetryTest { @Autowired private NCBIAssemblyDataSource dataSource; + @Autowired + private Md5ChecksumRetriever md5ChecksumRetriever; + + @MockBean + private RestTemplate restTemplate; + @Test public void fileDownloadSuccessfulTest() throws IOException { @@ -108,4 +120,20 @@ public void fileDownloadFailedRetryTest2() throws IOException { verify(ncbiBrowser, times(5)).getGenomeReportDirectory(mockAccession); } + + @Test + public void retrieveMd5ChecksumRetry() { + String insdcAccession = "TEST_ACCESSION"; + when(restTemplate.getForObject(anyString(), eq(JsonNode.class))) + .thenThrow(new RuntimeException("Simulated network issue")); + + Md5ChecksumRetriever anotherObjSpy = Mockito.spy(md5ChecksumRetriever); + RuntimeException thrown = Assertions.assertThrows(RuntimeException.class, () -> { + anotherObjSpy.retrieveMd5Checksum(insdcAccession); + }); + + assertEquals("Simulated network issue", thrown.getMessage()); + verify(restTemplate, times(5)).getForObject(anyString(), eq(JsonNode.class)); + } + } diff --git a/src/test/java/uk/ac/ebi/eva/contigalias/entitygenerator/ChromosomeGenerator.java b/src/test/java/uk/ac/ebi/eva/contigalias/entitygenerator/ChromosomeGenerator.java index c9a96857..ccf08e5e 100644 --- a/src/test/java/uk/ac/ebi/eva/contigalias/entitygenerator/ChromosomeGenerator.java +++ b/src/test/java/uk/ac/ebi/eva/contigalias/entitygenerator/ChromosomeGenerator.java @@ -40,7 +40,7 @@ public static ChromosomeEntity generate(long id) { .setRefseq("refseq" + id) .setUcscName("ucsc" + id) .setEnaSequenceName("ena" + id) - .setMd5checksum("md5" + id) + .setMd5checksum(null) .setTrunc512checksum("trunc512" + id) .setAssembly(null); } @@ -80,7 +80,7 @@ void generateTest() { assertTrue(entity.getInsdcAccession().endsWith(sId)); assertTrue(entity.getRefseq().endsWith(sId)); assertTrue(entity.getUcscName().endsWith(sId)); - assertTrue(entity.getMd5checksum().endsWith(sId)); + assertEquals(null, entity.getMd5checksum()); assertTrue(entity.getTrunc512checksum().endsWith(sId)); assertNull(entity.getAssembly()); } diff --git a/src/test/java/uk/ac/ebi/eva/contigalias/service/AssemblyServiceIntegrationTest.java b/src/test/java/uk/ac/ebi/eva/contigalias/service/AssemblyServiceIntegrationTest.java index 2d0c8542..d713e2e9 100644 --- a/src/test/java/uk/ac/ebi/eva/contigalias/service/AssemblyServiceIntegrationTest.java +++ b/src/test/java/uk/ac/ebi/eva/contigalias/service/AssemblyServiceIntegrationTest.java @@ -31,10 +31,12 @@ import uk.ac.ebi.eva.contigalias.entities.AssemblyEntity; import uk.ac.ebi.eva.contigalias.entitygenerator.AssemblyGenerator; import uk.ac.ebi.eva.contigalias.repo.AssemblyRepository; +import uk.ac.ebi.eva.contigalias.scheduler.ChecksumSetter; import java.io.IOException; import java.util.List; import java.util.Optional; +import java.util.concurrent.CompletableFuture; import java.util.stream.Collectors; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -62,6 +64,7 @@ public class AssemblyServiceIntegrationTest { void setup() throws IOException { NCBIAssemblyDataSource mockNcbiDataSource = mock(NCBIAssemblyDataSource.class); ENAAssemblyDataSource mockEnaDataSource = mock(ENAAssemblyDataSource.class); + ChecksumSetter mockChecksumSetter = mock(ChecksumSetter.class); for (int i = 0; i < entities.length; i++) { AssemblyEntity generate = AssemblyGenerator.generate(i); entities[i] = generate; @@ -69,8 +72,10 @@ void setup() throws IOException { .thenReturn(Optional.of(generate)); Mockito.when(mockNcbiDataSource.getAssemblyByAccession(generate.getRefseq())) .thenReturn(Optional.of(generate)); + Mockito.when(mockChecksumSetter.updateMd5CheckSumForAssemblyAsync(generate.getInsdcAccession())) + .thenReturn(new CompletableFuture<>()); } - service = new AssemblyService(repository, mockNcbiDataSource, mockEnaDataSource); + service = new AssemblyService(repository, mockNcbiDataSource, mockEnaDataSource, mockChecksumSetter); } @AfterEach diff --git a/src/test/java/uk/ac/ebi/eva/contigalias/service/ChromosomeServiceIntegrationTest.java b/src/test/java/uk/ac/ebi/eva/contigalias/service/ChromosomeServiceIntegrationTest.java index 67eda556..e73cda35 100644 --- a/src/test/java/uk/ac/ebi/eva/contigalias/service/ChromosomeServiceIntegrationTest.java +++ b/src/test/java/uk/ac/ebi/eva/contigalias/service/ChromosomeServiceIntegrationTest.java @@ -22,6 +22,7 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; import org.springframework.data.domain.Page; +import org.springframework.data.domain.PageRequest; import org.springframework.data.domain.Pageable; import org.springframework.test.context.ActiveProfiles; @@ -29,6 +30,9 @@ import uk.ac.ebi.eva.contigalias.entitygenerator.AssemblyGenerator; import uk.ac.ebi.eva.contigalias.entitygenerator.ChromosomeGenerator; +import java.util.Collections; +import java.util.List; + import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -80,6 +84,31 @@ void putChromosomeChecksumsByAccession() { }); } + @Test + void testGetAssemblyWhereChromosomeMd5ChecksumIsNullOrEmpty() { + List asmList = service.getAssembliesWhereChromosomeMd5ChecksumIsNull(); + assertEquals(entity.getAssembly().getInsdcAccession(), asmList.get(0)); + } + + @Test + void testGetChromosomesByAssemblyInsdcAccessionWhereMd5ChecksumIsNull() { + Page chrPage = service.getChromosomesByAssemblyInsdcAccessionWhereMd5ChecksumIsNull(entity.getAssembly().getInsdcAccession(), PageRequest.of(0, 100)); + assertChromosomePageIdenticalToEntity(chrPage); + assertEquals(null, chrPage.getContent().get(0).getMd5checksum()); + } + + @Test + void testUpdateMD5ChecksumForAllChromosomesInAssembly() { + String testMD5Checksum = "testmd5checksum"; + entity.setMd5checksum(testMD5Checksum); + service.updateMd5ChecksumForAll(Collections.singletonList(entity)); + + Page chrPage = service.getChromosomesByInsdcAccession(entity.getInsdcAccession(), Pageable.unpaged()); + assertChromosomePageIdenticalToEntity(chrPage); + assertEquals(testMD5Checksum, chrPage.getContent().get(0).getMd5checksum()); + + } + void assertChromosomePageIdenticalToEntity(Page page) { assertNotNull(page); assertTrue(page.getTotalElements() > 0);