Skip to content

Commit

Permalink
Fix bucket index loading for non-variant explorer environments (#123)
Browse files Browse the repository at this point in the history
  • Loading branch information
ramari16 authored Oct 22, 2024
1 parent 89357dc commit 292e66a
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@ public class GenomicProcessorNodeImpl implements GenomicProcessor {

private final VariantService variantService;

private final VariantMetadataIndex variantMetadataIndex;


private final String HOMOZYGOUS_VARIANT = "1/1";
private final String HETEROZYGOUS_VARIANT = "0/1";
Expand All @@ -46,7 +44,6 @@ public GenomicProcessorNodeImpl(String genomicDataDirectory) {
this.genomicDataDirectory = genomicDataDirectory;
this.variantService = new VariantService(genomicDataDirectory);
this.patientVariantJoinHandler = new PatientVariantJoinHandler(variantService);
this.variantMetadataIndex = VariantMetadataIndex.createInstance(genomicDataDirectory);

infoStores = new HashMap<>();
File genomicDataDirectoryFile = new File(this.genomicDataDirectory);
Expand Down Expand Up @@ -402,6 +399,6 @@ public List<InfoColumnMeta> getInfoColumnMeta() {

@Override
public Map<String, Set<String>> getVariantMetadata(Collection<String> variantList) {
return variantMetadataIndex.findByMultipleVariantSpec(variantList);
return variantService.findByMultipleVariantSpec(variantList);
}
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
package edu.harvard.hms.dbmi.avillach.hpds.processing;

import edu.harvard.hms.dbmi.avillach.hpds.data.genotype.BucketIndexBySample;
import edu.harvard.hms.dbmi.avillach.hpds.data.genotype.VariableVariantMasks;
import edu.harvard.hms.dbmi.avillach.hpds.data.genotype.VariantMasks;
import edu.harvard.hms.dbmi.avillach.hpds.data.genotype.VariantStore;
import edu.harvard.hms.dbmi.avillach.hpds.data.genotype.*;
import edu.harvard.hms.dbmi.avillach.hpds.data.genotype.caching.VariantBucketHolder;
import edu.harvard.hms.dbmi.avillach.hpds.storage.FileBackedByteIndexedStorage;
import edu.harvard.hms.dbmi.avillach.hpds.storage.FileBackedJavaIndexedStorage;
Expand Down Expand Up @@ -38,6 +35,8 @@ public class VariantService {
private String[] variantIndex = null;
private BucketIndexBySample bucketIndex;

private final VariantMetadataIndex variantMetadataIndex;

public String[] getVariantIndex() {
return variantIndex;
}
Expand All @@ -60,6 +59,8 @@ public VariantService(String genomicDataDirectory) {
BUCKET_INDEX_BY_SAMPLE_FILE = genomicDataDirectory + "BucketIndexBySample.javabin";

variantStore = loadVariantStore();

this.variantMetadataIndex = VariantMetadataIndex.createInstance(genomicDataDirectory);
try {
loadGenomicCacheFiles();
} catch (Exception e) {
Expand Down Expand Up @@ -162,7 +163,10 @@ private void loadGenomicCacheFiles() throws FileNotFoundException, IOException,
log.info("Found " + variantIndex.length + " total variants.");
}
}
if(variantStore.getPatientIds().length > 0 && !new File(BUCKET_INDEX_BY_SAMPLE_FILE).exists()) {
// todo: not loading bucket index when there is no variant metadata index is a temporary fix for non-variant explorer environments
// once we start building the bucket index as part of the ETL, we can remove this check and leverage the bucket index
// for all genomic queries
if(variantStore.getPatientIds().length > 0 && variantMetadataIndex != null && !new File(BUCKET_INDEX_BY_SAMPLE_FILE).exists()) {
log.info("creating new " + BUCKET_INDEX_BY_SAMPLE_FILE);
bucketIndex = new BucketIndexBySample(variantStore, genomicDataDirectory);
try (
Expand Down Expand Up @@ -204,4 +208,8 @@ public List<VariableVariantMasks> getMasksForDbSnpSpec(String variantName) {
public BigInteger emptyBitmask() {
return variantStore.emptyBitmask();
}

public Map<String, Set<String>> findByMultipleVariantSpec(Collection<String> variantList) {
return variantMetadataIndex.findByMultipleVariantSpec(variantList);
}
}

0 comments on commit 292e66a

Please sign in to comment.