Skip to content

Commit

Permalink
Merge pull request #102 from nitin-ebi/download_file
Browse files Browse the repository at this point in the history
EVA-3018: Quick fixes
  • Loading branch information
nitin-ebi authored Oct 31, 2022
2 parents 746dca6 + c986ddb commit 66f6e3f
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ public Optional<AssemblyEntity> getAssemblyByAccession(String accession) throws
}
return Optional.of(assemblyEntity);
} catch (Exception e) {
logger.info("Could not fetch Assembly Report form ENA for accession " + accession + "Exception: " + e);
logger.warn("Could not fetch Assembly Report from ENA for accession " + accession + "Exception: " + e);
return Optional.empty();
}

Expand All @@ -105,14 +105,14 @@ public Optional<Path> downloadAssemblyReport(ENABrowser enaBrowser, String acces
try {
boolean success = enaBrowser.downloadFTPFile(ftpFilePath, downloadFilePath, ftpFile.getSize());
if (success) {
logger.info("ENA assembly report downloaded successfully");
logger.info("ENA assembly report downloaded successfully for accession "+ accession);
return Optional.of(downloadFilePath);
} else {
logger.info("ENA assembly report could not be downloaded successfully");
logger.warn("ENA assembly report could not be downloaded successfully for accession "+accession);
return Optional.empty();
}
} catch (IOException | DownloadFailedException e) {
logger.info("Error downloading ENA assembly report " + e);
logger.warn("Error downloading ENA assembly report for accession "+ accession + e);
return Optional.empty();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.UnknownHostException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
Expand Down Expand Up @@ -63,12 +62,8 @@ public Optional<AssemblyEntity> getAssemblyByAccession(
String accession) throws IOException, IllegalArgumentException {
NCBIBrowser ncbiBrowser = factory.build();
ncbiBrowser.connect();
Optional<String> directory = ncbiBrowser.getGenomeReportDirectory(accession);
if (!directory.isPresent()) {
return Optional.empty();
}

Optional<Path> downloadFilePath = downloadAssemblyReport(ncbiBrowser, directory.get());
Optional<Path> downloadFilePath = downloadAssemblyReport(accession, ncbiBrowser);
if (!downloadFilePath.isPresent()) {
return Optional.empty();
}
Expand All @@ -77,7 +72,8 @@ public Optional<AssemblyEntity> getAssemblyByAccession(
try (InputStream stream = new FileInputStream(downloadFilePath.get().toFile())) {
NCBIAssemblyReportReader reader = readerFactory.build(stream);
assemblyEntity = reader.getAssemblyEntity();
logger.info("NCBI: Number of chromosomes in " + accession + " : " + assemblyEntity.getChromosomes().size());
logger.info("NCBI: Number of chromosomes in " + accession + " : " +
(assemblyEntity.getChromosomes() != null ? assemblyEntity.getChromosomes().size() : 0));
} finally {
try {
ncbiBrowser.disconnect();
Expand All @@ -89,17 +85,23 @@ public Optional<AssemblyEntity> getAssemblyByAccession(
return Optional.of(assemblyEntity);
}

@Retryable(value = Exception.class, maxAttempts = 5, backoff = @Backoff(delay = 2000, multiplier=2))
public Optional<Path> downloadAssemblyReport(NCBIBrowser ncbiBrowser, String directory) throws IOException {
FTPFile ftpFile = ncbiBrowser.getNCBIAssemblyReportFile(directory);
String ftpFilePath = directory + ftpFile.getName();
@Retryable(value = Exception.class, maxAttempts = 5, backoff = @Backoff(delay = 2000, multiplier = 2))
public Optional<Path> downloadAssemblyReport(String accession, NCBIBrowser ncbiBrowser) throws IOException {
Optional<String> directory = ncbiBrowser.getGenomeReportDirectory(accession);
if (!directory.isPresent()) {
return Optional.empty();
}
logger.info("NCBI directory for assembly report download: " + directory.get());

FTPFile ftpFile = ncbiBrowser.getNCBIAssemblyReportFile(directory.get());
String ftpFilePath = directory.get() + ftpFile.getName();
Path downloadFilePath = Paths.get(asmFileDownloadDir, ftpFile.getName());
boolean success = ncbiBrowser.downloadFTPFile(ftpFilePath, downloadFilePath, ftpFile.getSize());
if (success) {
logger.info("NCBI assembly report downloaded successfully");
logger.info("NCBI assembly report downloaded successfully (" + ftpFile.getName() + ")");
return Optional.of(downloadFilePath);
} else {
logger.info("NCBI assembly report could not be downloaded successfully");
logger.error("NCBI assembly report could not be downloaded successfully(" + ftpFile.getName() + ")");
return Optional.empty();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ protected void parseReport() throws IOException, NullPointerException {
parseAssemblyData(line);
} else if (!line.startsWith("#")) {
String[] columns = line.split("\t", -1);
if (columns.length >= 6 && columns[5].equals("=")) {
if (columns.length >= 6 && (columns[5].equals("=") || columns[5].equals("<>")) &&
(columns[4] != null && !columns[4].isEmpty() && !columns[4].equals("na"))) {
if (columns[3].equals("Chromosome") && columns[1].equals("assembled-molecule")) {
parseChromosomeLine(columns);
} else if (isScaffoldsEnabled) {
Expand Down Expand Up @@ -98,7 +99,11 @@ protected void parseChromosomeLine(String[] columns) {

chromosomeEntity.setGenbankSequenceName(columns[0]);
chromosomeEntity.setInsdcAccession(columns[4]);
chromosomeEntity.setRefseq(columns[6]);
if (columns[6] == null || columns[6].isEmpty() || columns[6].equals("na")) {
chromosomeEntity.setRefseq(null);
} else {
chromosomeEntity.setRefseq(columns[6]);
}

if (columns.length > 8) {
try {
Expand Down Expand Up @@ -132,7 +137,11 @@ protected void parseScaffoldLine(String[] columns) {

scaffoldEntity.setGenbankSequenceName(columns[0]);
scaffoldEntity.setInsdcAccession(columns[4]);
scaffoldEntity.setRefseq(columns[6]);
if (columns[6] == null || columns[6].isEmpty() || columns[6].equals("na")) {
scaffoldEntity.setRefseq(null);
} else {
scaffoldEntity.setRefseq(columns[6]);
}

if (columns.length > 8) {
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ public class SequenceEntity {
@ApiModelProperty(value = "Sequence's INSDC accession.")
private String insdcAccession;

@Column(nullable = false)
@ApiModelProperty(value = "Sequence's RefSeq accession.")
private String refseq;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.Pageable;
import org.springframework.data.util.Pair;
import org.springframework.stereotype.Service;
import uk.ac.ebi.eva.contigalias.datasource.ENAAssemblyDataSource;
import uk.ac.ebi.eva.contigalias.datasource.NCBIAssemblyDataSource;
Expand All @@ -31,6 +30,7 @@
import uk.ac.ebi.eva.contigalias.exception.DuplicateAssemblyException;
import uk.ac.ebi.eva.contigalias.repo.AssemblyRepository;

import javax.transaction.Transactional;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
Expand All @@ -40,7 +40,7 @@
import java.util.Optional;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;


@Service
public class AssemblyService {
Expand All @@ -51,8 +51,6 @@ public class AssemblyService {

private final ENAAssemblyDataSource enaDataSource;

private final ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());

private final Logger logger = LoggerFactory.getLogger(AssemblyService.class);

@Autowired
Expand Down Expand Up @@ -102,7 +100,12 @@ public void fetchAndInsertAssembly(String accession) throws IOException {
throw new AssemblyNotFoundException(accession);
}
enaDataSource.addENASequenceNamesToAssembly(fetchAssembly);
fetchAssembly.ifPresent(this::insertAssembly);
if (fetchAssembly.get().getChromosomes() != null && fetchAssembly.get().getChromosomes().size() > 0) {
insertAssembly(fetchAssembly.get());
logger.info("Successfully inserted assembly for accession " + accession);
} else {
logger.error("Skipping inserting assembly : No chromosome in assembly " + accession);
}
}

public Optional<AssemblyEntity> getAssemblyByAccession(String accession) {
Expand Down Expand Up @@ -131,6 +134,7 @@ private void stripAssemblyFromChromosomes(AssemblyEntity assembly) {
}
}

@Transactional
public void insertAssembly(AssemblyEntity entity) {
if (isEntityPresent(entity)) {
throw duplicateAssemblyInsertionException(null, entity);
Expand All @@ -155,15 +159,21 @@ public boolean isEntityPresent(AssemblyEntity entity) {

public Map<String, List<String>> fetchAndInsertAssembly(List<String> accessions) {
Map<String, List<String>> accessionResult = new HashMap<>();
List<Future<Pair<String, String>>> executorResponseList = new ArrayList<>();
accessionResult.put("SUCCESS", new ArrayList<>());
accessionResult.put("FAILURE", new ArrayList<>());

for (String accession : accessions) {
try {
logger.info("Started processing assembly accession : " + accession);
this.fetchAndInsertAssembly(accession);
accessionResult.getOrDefault("SUCCESS", new ArrayList<>()).add(accession);
accessionResult.get("SUCCESS").add(accession);
} catch (Exception e) {
accessionResult.getOrDefault("FAILURE", new ArrayList<>()).add(accession);
logger.error("Exception while loading assembly for accession " + accession + e);
accessionResult.get("FAILURE").add(accession);
}
}
logger.info("Success: " + accessionResult.getOrDefault("SUCCESS", Collections.emptyList()));
logger.info("Failure: " + accessionResult.getOrDefault("FAILURE", Collections.emptyList()));

return accessionResult;
}
Expand Down
26 changes: 23 additions & 3 deletions src/test/java/uk/ac/ebi/eva/contigalias/datasource/RetryTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,37 +41,43 @@ public class RetryTest {

@Test
public void fileDownloadSuccessfulTest() throws IOException {
String mockAccession = "GCA_000012345.1";
String mockDirectory = "/src/test/resources/";
String mockFileName = "mock_file.txt";
Long mockFileSize = 1000l;
when(ncbiBrowser.getGenomeReportDirectory(mockAccession)).thenReturn(Optional.of(mockDirectory));
when(ncbiBrowser.getNCBIAssemblyReportFile(mockDirectory)).thenReturn(ftpFile);
when(ftpFile.getName()).thenReturn(mockFileName);
when(ftpFile.getSize()).thenReturn(mockFileSize);
when(ncbiBrowser.downloadFTPFile(mockDirectory + mockFileName, Paths.get("/tmp/mock_file.txt"), mockFileSize))
.thenReturn(true);
Optional<Path> result = dataSource.downloadAssemblyReport(ncbiBrowser, mockDirectory);
Optional<Path> result = dataSource.downloadAssemblyReport(mockAccession, ncbiBrowser);
assertTrue(result.isPresent());
}

@Test
public void fileDownloadFailedTest() throws IOException {
String mockAccession = "GCA_000012345.1";
String mockDirectory = "/src/test/resources/";
String mockFileName = "mock_file.txt";
Long mockFileSize = 1000l;
when(ncbiBrowser.getGenomeReportDirectory(mockAccession)).thenReturn(Optional.of(mockDirectory));
when(ncbiBrowser.getNCBIAssemblyReportFile(mockDirectory)).thenReturn(ftpFile);
when(ftpFile.getName()).thenReturn(mockFileName);
when(ftpFile.getSize()).thenReturn(mockFileSize);
when(ncbiBrowser.downloadFTPFile(mockDirectory + mockFileName, Paths.get("/tmp/mock_file.txt"), mockFileSize))
.thenReturn(false);
Optional<Path> result = dataSource.downloadAssemblyReport(ncbiBrowser, mockDirectory);
Optional<Path> result = dataSource.downloadAssemblyReport(mockAccession, ncbiBrowser);
assertFalse(result.isPresent());
}

@Test
public void fileDownloadFailedRetryTest() throws IOException {
String mockAccession = "GCA_000012345.1";
String mockDirectory = "/src/test/resources/";
String mockFileName = "mock_file.txt";
Long mockFileSize = 1000l;
when(ncbiBrowser.getGenomeReportDirectory(mockAccession)).thenReturn(Optional.of(mockDirectory));
when(ncbiBrowser.getNCBIAssemblyReportFile(mockDirectory)).thenReturn(ftpFile);
when(ftpFile.getName()).thenReturn(mockFileName);
when(ftpFile.getSize()).thenReturn(mockFileSize);
Expand All @@ -80,12 +86,26 @@ public void fileDownloadFailedRetryTest() throws IOException {

NCBIAssemblyDataSource anotherObjSpy = Mockito.spy(dataSource);
DownloadFailedException thrown = Assertions.assertThrows(DownloadFailedException.class, () -> {
anotherObjSpy.downloadAssemblyReport(ncbiBrowser, mockDirectory);
anotherObjSpy.downloadAssemblyReport(mockAccession, ncbiBrowser);
});
assertEquals("Download Failed", thrown.getMessage());

verify(ncbiBrowser, times(5))
.downloadFTPFile(mockDirectory + mockFileName, Paths.get("/tmp/mock_file.txt"), mockFileSize);
}

@Test
public void fileDownloadFailedRetryTest2() throws IOException {
String mockAccession = "GCA_000012345.1";
when(ncbiBrowser.getGenomeReportDirectory(mockAccession)).thenThrow(new IOException("Error listing files"));

NCBIAssemblyDataSource anotherObjSpy = Mockito.spy(dataSource);
IOException thrown = Assertions.assertThrows(IOException.class, () -> {
anotherObjSpy.downloadAssemblyReport(mockAccession, ncbiBrowser);
});

assertEquals("Error listing files", thrown.getMessage());
verify(ncbiBrowser, times(5)).getGenomeReportDirectory(mockAccession);
}

}

0 comments on commit 66f6e3f

Please sign in to comment.