Skip to content

Commit

Permalink
Merge pull request #41 from enasequence/DCP-3194-covid-analyses
Browse files Browse the repository at this point in the history
retain order when parsing accs from file.
  • Loading branch information
Manishemblebi authored Apr 4, 2022
2 parents b1422df + e2d0fdb commit 8408b70
Show file tree
Hide file tree
Showing 12 changed files with 131 additions and 64 deletions.
2 changes: 1 addition & 1 deletion .gitlab-ci.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
image: openjdk:8

variables:
APP_VERSION: "1.0.1"
APP_VERSION: "1.0.2"
APP_NAME: 'ena-file-downloader'
FTP_DIR: "/nfs/ftp/pub/databases/ena/tools"

Expand Down
2 changes: 1 addition & 1 deletion command-line-downloader/distribution/README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
******************************************************************************/

version:1.0.1
version:1.0.2
Ena File Downloader
Copyright © EMBL 2021 | EMBL-EBI is part of the European Molecular Biology Laboratory

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ public static List<String> accsFromFile(String inputValues) {
}
List<String> accessionIds = Files.lines(Paths.get(inputValues), StandardCharsets.US_ASCII).collect(Collectors.toList());

return new ArrayList<>(accessionIds.stream().map(aRow -> aRow.replace("\"", "").trim()).collect(Collectors.toSet()));
return new ArrayList<>(accessionIds.stream().map(aRow -> aRow.replace("\"", "").trim()).collect(Collectors.toCollection(LinkedHashSet::new)));
}
}
} catch (IOException exception) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@
public class EnaPortalResponse {

@JsonProperty("run_accession")
private String runId;
@JsonAlias("analysis_accession")
private String recordId;
@JsonProperty("experiment_accession")
@JsonAlias({"sample_accession", "study_accession", "analysis_accession", "run_accession"})
private String parentId;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
@AllArgsConstructor
public class FileDetail {
private String parentId;
private String runId;
private String recordId;
private String ftpUrl;
private Long bytes;
private String md5;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ private List<FileDetail> createFileDetails(List<EnaPortalResponse> enaPortalResp
List<Long> bytesList = getBytes(enaPortalResponse.getBytes());

for (int i = 0; i < ftpUrlsList.size(); i++) {
fileDetails.add(new FileDetail(enaPortalResponse.getParentId(), enaPortalResponse.getRunId(),
fileDetails.add(new FileDetail(enaPortalResponse.getParentId(), enaPortalResponse.getRecordId(),
ftpUrlsList.get(i), bytesList.get(i), md5List.get(i)));
}
}
Expand Down Expand Up @@ -109,7 +109,7 @@ public long fetchAccessionAndDownload(DownloadFormatEnum format, String download
final ExecutorService executorService = Executors.newFixedThreadPool(Constants.EXECUTOR_THREAD_COUNT);

String accessionField = accessionDetailsMap.get(ACCESSION_FIELD).get(0);
String accessionType = AccessionTypeEnum.getAccessionType(accessionField).name();
AccessionTypeEnum accessionType = AccessionTypeEnum.getAccessionType(accessionField);
List<String> accessions = accessionDetailsMap.get(ACCESSION_LIST);
List<List<String>> accLists = Collections.synchronizedList(Lists.partition(accessions, 10000));
long total = 0;
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.http.impl.client.HttpClientBuilder;
import org.springframework.stereotype.Component;
import uk.ac.ebi.ena.app.constants.Constants;
import uk.ac.ebi.ena.app.menu.enums.AccessionTypeEnum;
import uk.ac.ebi.ena.app.menu.enums.DownloadFormatEnum;
import uk.ac.ebi.ena.app.utils.CommonUtils;
import uk.ac.ebi.ena.backend.dto.FileDetail;
Expand Down Expand Up @@ -197,7 +198,7 @@ private List<String> getAsperaCommandParts(String asperaLocation, FileDetail fil

public Future<FileDownloadStatus> startDownloadAspera(ExecutorService executorService, List<FileDetail> fileDetails,
String asperaLocation, String downloadLocation,
String accessionType, DownloadFormatEnum format, int set) {
AccessionTypeEnum accessionType, DownloadFormatEnum format, int set) {
FileDownloadStatus fileDownloadStatus = new FileDownloadStatus(fileDetails.size(), 0,
new ArrayList<>());
return executorService.submit(() -> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.springframework.util.Assert;
import org.springframework.util.DigestUtils;
import uk.ac.ebi.ena.app.constants.Constants;
import uk.ac.ebi.ena.app.menu.enums.AccessionTypeEnum;
import uk.ac.ebi.ena.app.menu.enums.DownloadFormatEnum;
import uk.ac.ebi.ena.backend.dto.FileDetail;
import uk.ac.ebi.ena.backend.enums.FileDownloadStatus;
Expand Down Expand Up @@ -95,18 +96,19 @@ public static ProgressBarBuilder getBProgressBar(String s, long size) {
.setUnit("B", 1); // setting the progress bar to use MB as the unit
}

public static String getFileDownloadPath(String downloadLoc, String accessionType, DownloadFormatEnum format,
public static String getFileDownloadPath(String downloadLoc, AccessionTypeEnum accessionType,
DownloadFormatEnum format,
FileDetail fileDetail) {
switch (accessionType) {
case "RUN":
case RUN:
return downloadLoc + File.separator
+ StringUtils.lowerCase(format.toString()) + File.separator + fileDetail.getRunId();
case "PROJECT":
case "EXPERIMENT":
case "SAMPLE":
+ StringUtils.lowerCase(format.toString()) + File.separator + fileDetail.getRecordId();
case STUDY:
case EXPERIMENT:
case SAMPLE:
return downloadLoc + File.separator
+ StringUtils.lowerCase(format.toString()) + File.separator + fileDetail.getParentId() + File.separator + fileDetail.getRunId();
case "ANALYSIS":
+ StringUtils.lowerCase(format.toString()) + File.separator + fileDetail.getParentId() + File.separator + fileDetail.getRecordId();
case ANALYSIS:
return downloadLoc + File.separator
+ StringUtils.lowerCase(format.toString()) + File.separator + fileDetail.getParentId();
}
Expand Down Expand Up @@ -144,7 +146,7 @@ private void deleteIfPartialFileExists(Path partialFilePath, String partialFileN


public Future<FileDownloadStatus> startDownload(ExecutorService executorService, List<FileDetail> fileDetails,
String downloadLoc, String accessionType,
String downloadLoc, AccessionTypeEnum accessionType,
DownloadFormatEnum format, int set) {
FileDownloadStatus fileDownloadStatus = new FileDownloadStatus(fileDetails.size(), 0, new ArrayList<>());

Expand Down Expand Up @@ -205,6 +207,7 @@ public Future<FileDownloadStatus> startDownload(ExecutorService executorService,
fileDownloaderPath + File.separator + remoteFileName, bytesCopied);
if (isDownloaded) {
fileProgressBar.stepBy(1);
log.info("{} completed.", fileDownloaderPath + File.separator + remoteFileName);
fileDownloadStatus.setSuccesssful(fileDownloadStatus.getSuccesssful() + 1);
} else {
log.error("Failed to download file:{}, experimentId:{}", remoteFileName,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import org.mockito.Mockito;
import org.mockito.junit.MockitoJUnitRunner;
import org.mockito.junit.jupiter.MockitoExtension;
import uk.ac.ebi.ena.app.menu.enums.AccessionTypeEnum;
import uk.ac.ebi.ena.app.menu.enums.DownloadFormatEnum;
import uk.ac.ebi.ena.app.menu.enums.ProtocolEnum;
import uk.ac.ebi.ena.app.utils.CommonUtils;
Expand Down Expand Up @@ -63,7 +64,7 @@ public void testFetchAccessionAndDownloadWhenSuccess() throws ExecutionException
final Future<FileDownloadStatus> mockedFuture = Mockito.mock(Future.class);
when(mockedFuture.get()).thenReturn(new FileDownloadStatus(0, 0, new ArrayList<>()));
Mockito.when(fileDownloaderService.startDownload(Mockito.any(ExecutorService.class), Mockito.any(List.class),
Mockito.any(String.class), Mockito.any(String.class), Mockito.any(DownloadFormatEnum.class),
Mockito.any(String.class), Mockito.any(AccessionTypeEnum.class), Mockito.any(DownloadFormatEnum.class),
Mockito.anyInt())).thenReturn(mockedFuture);
//ACT
accessionDetailsService.fetchAccessionAndDownload(format, downloadLocation, accessionDetailsMap, protocol, asperaLocation, recipientEmailId);
Expand All @@ -72,7 +73,7 @@ public void testFetchAccessionAndDownloadWhenSuccess() throws ExecutionException
Mockito.anyMap());

verify(fileDownloaderService, times(3)).startDownload(Mockito.any(ExecutorService.class), Mockito.anyList(), Mockito.any(String.class),
Mockito.any(String.class), Mockito.any(DownloadFormatEnum.class), Mockito.anyInt());
Mockito.any(AccessionTypeEnum.class), Mockito.any(DownloadFormatEnum.class), Mockito.anyInt());


}
Expand All @@ -82,21 +83,21 @@ private List<EnaPortalResponse> getPortalResponses() {
List<EnaPortalResponse> portalResponses = new ArrayList<>();
EnaPortalResponse enaPortalResponse1 = new EnaPortalResponse();
enaPortalResponse1.setParentId("SRX2000905");
enaPortalResponse1.setRunId("SRR4000583");
enaPortalResponse1.setRecordId("SRR4000583");
enaPortalResponse1.setBytes("1174738707");
enaPortalResponse1.setUrl("ftp.sra.ebi.ac.uk/vol1/fastq/SRR400/003/SRR4000583/SRR4000583.fastq.gz");
enaPortalResponse1.setMd5("a991ce890047ffca760c6de2617b5fec");
portalResponses.add(enaPortalResponse1);
EnaPortalResponse enaPortalResponse2 = new EnaPortalResponse();
enaPortalResponse2.setParentId("SRX6415696");
enaPortalResponse2.setRunId("SRR9654360");
enaPortalResponse2.setRecordId("SRR9654360");
enaPortalResponse2.setBytes("14139836");
enaPortalResponse2.setUrl("ftp.sra.ebi.ac.uk/vol1/fastq/SRR965/000/SRR9654360/SRR9654360.fastq.gz");
enaPortalResponse2.setMd5("f3611f35a977b8b82a7adcf0a28c397d");
portalResponses.add(enaPortalResponse2);
EnaPortalResponse enaPortalResponse3 = new EnaPortalResponse();
enaPortalResponse3.setParentId("SRX6415695");
enaPortalResponse3.setRunId("SRR9654361");
enaPortalResponse3.setRecordId("SRR9654361");
enaPortalResponse3.setBytes("15541843");
enaPortalResponse3.setUrl("ftp.sra.ebi.ac.uk/vol1/fastq/SRR965/001/SRR9654361/SRR9654361.fastq.gz");
enaPortalResponse3.setMd5("1236b79cd93a63289841765aabacb880");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,21 +72,21 @@ private EnaPortalResponse[] getPortalResponses() {
List<EnaPortalResponse> portalResponses = new ArrayList<>();
EnaPortalResponse enaPortalResponse1 = new EnaPortalResponse();
enaPortalResponse1.setParentId("SRX2000905");
enaPortalResponse1.setRunId("SRR4000583");
enaPortalResponse1.setRecordId("SRR4000583");
enaPortalResponse1.setBytes("1174738707");
enaPortalResponse1.setUrl("ftp.sra.ebi.ac.uk/vol1/fastq/SRR400/003/SRR4000583/SRR4000583.fastq.gz");
enaPortalResponse1.setMd5("a991ce890047ffca760c6de2617b5fec");
portalResponses.add(enaPortalResponse1);
EnaPortalResponse enaPortalResponse2 = new EnaPortalResponse();
enaPortalResponse2.setParentId("SRX6415696");
enaPortalResponse2.setRunId("SRR9654360");
enaPortalResponse2.setRecordId("SRR9654360");
enaPortalResponse2.setBytes("14139836");
enaPortalResponse2.setUrl("ftp.sra.ebi.ac.uk/vol1/fastq/SRR965/000/SRR9654360/SRR9654360.fastq.gz");
enaPortalResponse2.setMd5("f3611f35a977b8b82a7adcf0a28c397d");
portalResponses.add(enaPortalResponse2);
EnaPortalResponse enaPortalResponse3 = new EnaPortalResponse();
enaPortalResponse3.setParentId("SRX6415695");
enaPortalResponse3.setRunId("SRR9654361");
enaPortalResponse3.setRecordId("SRR9654361");
enaPortalResponse3.setBytes("15541843");
enaPortalResponse3.setUrl("ftp.sra.ebi.ac.uk/vol1/fastq/SRR965/001/SRR9654361/SRR9654361.fastq.gz");
enaPortalResponse3.setMd5("1236b79cd93a63289841765aabacb880");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import org.mockito.InjectMocks;
import org.mockito.junit.MockitoJUnitRunner;
import org.mockito.junit.jupiter.MockitoExtension;
import uk.ac.ebi.ena.app.menu.enums.AccessionTypeEnum;
import uk.ac.ebi.ena.app.menu.enums.DownloadFormatEnum;
import uk.ac.ebi.ena.backend.dto.FileDetail;
import uk.ac.ebi.ena.backend.enums.FileDownloadStatus;
Expand Down Expand Up @@ -40,12 +41,12 @@ public void testStartDownload_UsingFtp() throws ExecutionException, InterruptedE
List<FileDetail> fileDetailList = new ArrayList<>();
FileDetail fileDetail = createFileDetailFtp();
fileDetailList.add(fileDetail);
String accessionType = "EXPERIMENT";

DownloadFormatEnum format = DownloadFormatEnum.READS_FASTQ;
int set = 1;
//ACT
FileDownloadStatus fileDownloadStatus = fileDownloaderService.
startDownload(executorService, fileDetailList, downloadFolderPath, accessionType, format, set).get();
startDownload(executorService, fileDetailList, downloadFolderPath, AccessionTypeEnum.EXPERIMENT, format, set).get();
System.out.println(fileDownloadStatus);

}
Expand All @@ -59,12 +60,11 @@ public void testStartDownload_UsingAspera() throws ExecutionException, Interrupt
FileDetail fileDetail = createFileDetailAspera();
fileDetailList.add(fileDetail);
String asperaLocation = "C:\\Users\\suman\\AppData\\Local\\Programs\\Aspera\\Aspera Connect\\";//local aspera connect folder
String accessionType = "EXPERIMENT";
DownloadFormatEnum format = DownloadFormatEnum.READS_FASTQ;
int set = 1;
//ACT
FileDownloadStatus fileDownloadStatus = fileDownloaderClient.startDownloadAspera
(executorService, fileDetailList, asperaLocation, downloadFolderPath, accessionType, format, set).get();
(executorService, fileDetailList, asperaLocation, downloadFolderPath, AccessionTypeEnum.EXPERIMENT, format, set).get();
Assert.assertEquals(1, fileDownloadStatus.getSuccesssful());

}
Expand Down

0 comments on commit 8408b70

Please sign in to comment.