Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
ke4 committed Nov 3, 2023
2 parents c5e85a0 + 198090b commit fc8e641
Show file tree
Hide file tree
Showing 26 changed files with 4,703 additions and 3,861 deletions.
5 changes: 1 addition & 4 deletions app/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,11 @@ dependencies {
implementation 'com.fasterxml.jackson.datatype:jackson-datatype-guava'

implementation "org.cache2k:cache2k-api:${cache2kVersion}"
compileOnly 'org.projectlombok:lombok:1.18.22'
compileOnly 'org.projectlombok:lombok:1.18.24'
annotationProcessor 'org.projectlombok:lombok:1.18.24'
runtimeOnly "org.cache2k:cache2k-core:${cache2kVersion}"
implementation "org.cache2k:cache2k-spring:${cache2kVersion}"

compileOnly 'org.projectlombok:lombok:1.18.24'
annotationProcessor 'org.projectlombok:lombok:1.18.24'

runtimeOnly 'org.apache.tiles:tiles-extras:3.0.8'
// For URL rewrite (see WEB-INF/web.xml), or your Filter will fail with the most confusing error messages
runtimeOnly 'org.tuckey:urlrewritefilter:4.0.3'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@
import uk.ac.ebi.atlas.controllers.JsonExceptionHandlingController;
import uk.ac.ebi.atlas.experimentpage.ExperimentAttributesService;
import uk.ac.ebi.atlas.model.experiment.singlecell.SingleCellBaselineExperiment;
import uk.ac.ebi.atlas.search.analytics.AnalyticsSearchService;
import uk.ac.ebi.atlas.search.celltype.CellTypeSearchService;
import uk.ac.ebi.atlas.search.geneids.GeneIdSearchService;
import uk.ac.ebi.atlas.search.geneids.QueryParsingException;
import uk.ac.ebi.atlas.search.organismpart.OrganismPartSearchService;
import uk.ac.ebi.atlas.search.species.SpeciesSearchService;
import uk.ac.ebi.atlas.trader.ExperimentTrader;
import uk.ac.ebi.atlas.utils.StringUtil;
Expand All @@ -46,7 +47,8 @@ public class JsonGeneSearchController extends JsonExceptionHandlingController {
private final ExperimentTrader experimentTrader;
private final ExperimentAttributesService experimentAttributesService;

private final AnalyticsSearchService analyticsSearchService;
private final OrganismPartSearchService organismPartSearchService;
private final CellTypeSearchService cellTypeSearchService;
private final SpeciesSearchService speciesSearchService;

@GetMapping(value = "/json/search", produces = MediaType.APPLICATION_JSON_UTF8_VALUE)
Expand Down Expand Up @@ -129,10 +131,11 @@ public String search(@RequestParam MultiValueMap<String, String> requestParams)

// If the query term matches a single Ensembl ID different to the query term, we return it in the response.
// The most common case is a non-Ensembl gene identifier (e.g. Entrez, MGI, ...).
var matchingGeneIds =
(geneIds.get().size() == 1 && !geneIds.get().iterator().next().equals(geneQuery.queryTerm())) ?
"(" + String.join(", ", geneIds.get()) + ")" :
"";
var matchingGeneIds = geneIds.filter(
strings -> strings.size() == 1
&& !strings.iterator().next().equals(geneQuery.queryTerm()))
.map(strings -> "(" + String.join(", ", strings) + ")")
.orElse("");

var json = GSON.toJson(
ImmutableMap.of(
Expand Down Expand Up @@ -177,12 +180,10 @@ public String searchForGene(@RequestParam MultiValueMap<String, String> requestP
expressedGeneIdEntries.stream()
// TODO Measure in production if parallelising the stream results in any speedup
// (the more experiments we have the better). BEWARE: just adding parallel() throws! (?)
.flatMap(entry -> entry.getValue().entrySet().stream().map(exp2cells -> {
.flatMap(entry -> entry.getValue().keySet().stream().map(experimentAccession -> {

// Inside this map-within-a-flatMap we unfold expressedGeneIdEntries to triplets of...
var geneId = entry.getKey();
var experimentAccession = exp2cells.getKey();
var cellIds = exp2cells.getValue();

var experimentAttributes =
ImmutableMap.<String, Object>builder().putAll(
Expand All @@ -205,10 +206,11 @@ public String searchForGene(@RequestParam MultiValueMap<String, String> requestP

})).collect(toImmutableList());

var matchingGeneIds = "";
if (geneIds.get().size() == 1 && !geneIds.get().iterator().next().equals(geneQuery.queryTerm())) {
matchingGeneIds = "(" + String.join(", ", geneIds.get()) + ")";
}
var matchingGeneIds = geneIds.filter(
strings -> strings.size() == 1
&& !strings.iterator().next().equals(geneQuery.queryTerm()))
.map(strings -> "(" + String.join(", ", strings) + ")")
.orElse("");

return GSON.toJson(
ImmutableMap.of(
Expand Down Expand Up @@ -236,7 +238,7 @@ public Boolean isMarkerGene(@RequestParam MultiValueMap<String, String> requestP
.map(Map.Entry::getKey)
.collect(toImmutableSet()));

return markerGeneFacets != null && markerGeneFacets.size() > 0;
return markerGeneFacets != null && !markerGeneFacets.isEmpty();
}

@GetMapping(value = "/json/gene-search/organism-parts",
Expand All @@ -249,7 +251,10 @@ public Set<String> getOrganismPartBySearchTerm(@RequestParam MultiValueMap<Strin
return ImmutableSet.of();
}

return analyticsSearchService.searchOrganismPart(geneIds.get());
var cellTypes = requestParams.get("cellTypes");

return organismPartSearchService.search(geneIds.get(),
cellTypes != null ? ImmutableSet.copyOf(cellTypes) : ImmutableSet.of());
}

@GetMapping(value = "/json/gene-search/cell-types",
Expand All @@ -262,7 +267,10 @@ public Set<String> getCellTypeBySearchTerm(@RequestParam MultiValueMap<String, S
return ImmutableSet.of();
}

return analyticsSearchService.searchCellType(geneIds.get());
var organismParts = requestParams.get("organismParts");

return cellTypeSearchService.search(geneIds.get(),
organismParts != null ? ImmutableSet.copyOf(organismParts) : ImmutableSet.of());
}

@GetMapping(value = "/json/gene-search/species", produces = MediaType.APPLICATION_JSON_UTF8_VALUE)
Expand All @@ -279,7 +287,7 @@ public ImmutableSet<String> getSpeciesByGeneId(@RequestParam MultiValueMap<Strin
private ImmutableList<Map.Entry<String, Map<String, List<String>>>> getMarkerGeneProfileByGeneIds(Optional<ImmutableSet<String>> geneIds) {
// We found expressed gene IDs, let’s get to it now...
var geneIds2ExperimentAndCellIds =
geneSearchService.getCellIdsInExperiments(geneIds.get());
geneSearchService.getCellIdsInExperiments(geneIds.orElse(null));

return geneIds2ExperimentAndCellIds.entrySet().stream()
.filter(entry -> !entry.getValue().isEmpty())
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package uk.ac.ebi.atlas.search.analytics;

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import org.apache.solr.client.solrj.SolrQuery;
import org.springframework.stereotype.Component;
Expand All @@ -12,6 +13,9 @@

import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CELL_ID;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CTW_CELL_TYPE;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CTW_ORGANISM;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CTW_ORGANISM_PART;

@Component
public class AnalyticsSearchDao {
Expand Down Expand Up @@ -43,6 +47,87 @@ public ImmutableSet<String> searchFieldByCellIds(
);
}

public ImmutableSet<String> searchOrganismPartsByCellIdsAndSpecies(ImmutableSet<String> cellIDs,
ImmutableSet<String> species) {
var inputParams = ImmutableMap.of(
CELL_ID, cellIDs,
CTW_ORGANISM, species
);

return searchOutputFieldByInputFieldValues(CTW_ORGANISM_PART, inputParams);
}

public ImmutableSet<String> searchCellTypesByCellIdsAndSpeciesAndOrganismParts(ImmutableSet<String> cellIDs,
ImmutableSet<String> species,
ImmutableSet<String> organismParts) {
var inputParams = ImmutableMap.of(
CELL_ID, cellIDs,
CTW_ORGANISM, species,
CTW_ORGANISM_PART, organismParts
);

return searchOutputFieldByInputFieldValues(CTW_CELL_TYPE, inputParams);
}

private ImmutableSet<String> searchOutputFieldByInputFieldValues(
SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField outputSchemaField,
ImmutableMap<SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField, ImmutableSet<String>> inputParams) {
var queryBuilder = getStreamBuilderForOutputField(outputSchemaField);
inputParams.forEach((key, value) -> {
if (!value.isEmpty()) {
queryBuilder.addQueryFieldByTerm(key, value);
}
});

var uniqueSearchStreamBuilder = new UniqueStreamBuilder(
new SearchStreamBuilder<>(singleCellAnalyticsCollectionProxy, queryBuilder).returnAllDocs(),
outputSchemaField.name());

return getSchemaFieldFromStreamQuery(uniqueSearchStreamBuilder, outputSchemaField.name());
}

private SolrQueryBuilder<SingleCellAnalyticsCollectionProxy> getStreamBuilderForOutputField(
SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField outputSchemaField) {
return new SolrQueryBuilder<SingleCellAnalyticsCollectionProxy>()
.setFieldList(outputSchemaField)
.sortBy(outputSchemaField, SolrQuery.ORDER.asc);
}

public ImmutableSet<String> searchOutputFieldByInputFieldValues(
SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField outputSchemaField,
SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField inputSchemaField,
ImmutableSet<String> inputValues) {
// Streaming query for getting the set of output field values provided by set of input field values
// unique(
// search(scxa-analytics, q=<name_of_input_field>:<SET_OF_INPUT_FIELD_VALUES>, // could be ctw_cell_type
// fl="outputSchemaField", // could be : cell_id
// sort="outputSchemaField asc"
// ),
// over="outputSchemaField"
// )
return getSchemaFieldFromStreamQuery(
new UniqueStreamBuilder(
getStreamBuilderByInputFieldValuesForOutputField(
inputSchemaField, inputValues, outputSchemaField),
outputSchemaField.name()
),
outputSchemaField.name()
);
}

private SearchStreamBuilder<SingleCellAnalyticsCollectionProxy> getStreamBuilderByInputFieldValuesForOutputField(
SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField inputSchemaField,
ImmutableSet<String> inputValues,
SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField outputSchemaField) {
return new SearchStreamBuilder<>(
singleCellAnalyticsCollectionProxy,
new SolrQueryBuilder<SingleCellAnalyticsCollectionProxy>()
.addQueryFieldByTerm(inputSchemaField, inputValues)
.setFieldList(outputSchemaField)
.sortBy(outputSchemaField, SolrQuery.ORDER.asc)
).returnAllDocs();
}

private SearchStreamBuilder<SingleCellAnalyticsCollectionProxy> getStreamBuilderByCellIdsForSchemaField(
ImmutableSet<String> cellIDs, SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField schemaField) {
return new SearchStreamBuilder<>(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CELL_ID;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CTW_CELL_TYPE;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CTW_ORGANISM_PART;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.EXPERIMENT_ACCESSION;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.FACET_CHARACTERISTIC_NAME;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.FACET_CHARACTERISTIC_VALUE;
Expand All @@ -37,9 +39,9 @@ public class CellTypeSearchDao {

private final SingleCellAnalyticsCollectionProxy singleCellAnalyticsCollectionProxy;

public CellTypeSearchDao(SolrCloudCollectionProxyFactory solrCloudCollectionProxyFactory) {
public CellTypeSearchDao(SolrCloudCollectionProxyFactory collectionProxyFactory) {
this.singleCellAnalyticsCollectionProxy =
solrCloudCollectionProxyFactory.create(SingleCellAnalyticsCollectionProxy.class);
collectionProxyFactory.create(SingleCellAnalyticsCollectionProxy.class);
}

@Cacheable(cacheNames = "inferredCellTypesOntology", key = "{#experimentAccession, #organOrOrganismPart}")
Expand Down Expand Up @@ -100,7 +102,7 @@ public ImmutableSet<String> getInferredCellTypeAuthorsLabels(String experimentAc
*/
private ImmutableSet<String> getCellTypeMetadata(String experimentAccession,
ImmutableSet<String> organOrOrganismPart,
String cellTypeValue) {
String cellTypeFacetName) {
var cellIdsInOrganOrOrganismPartQueryBuilder =
new SolrQueryBuilder<SingleCellAnalyticsCollectionProxy>()
.addQueryFieldByTerm(EXPERIMENT_ACCESSION, experimentAccession)
Expand All @@ -124,8 +126,8 @@ private ImmutableSet<String> getCellTypeMetadata(String experimentAccession,
.addQueryFieldByTerm(EXPERIMENT_ACCESSION, experimentAccession)
.addQueryFieldByTerm(
ImmutableMap.of(
FACET_FACTOR_NAME, ImmutableSet.of(cellTypeValue),
FACET_CHARACTERISTIC_NAME, ImmutableSet.of(cellTypeValue)))
FACET_FACTOR_NAME, ImmutableSet.of(cellTypeFacetName),
FACET_CHARACTERISTIC_NAME, ImmutableSet.of(cellTypeFacetName)))
.setFieldList(ImmutableSet.of(CELL_ID, FACET_FACTOR_VALUE, FACET_CHARACTERISTIC_VALUE))
.sortBy(CELL_ID, SolrQuery.ORDER.asc);
var uniqueCellIdsAnnotatedWithCellTypeValue =
Expand Down Expand Up @@ -162,4 +164,45 @@ private ImmutableSet<String> getCellTypeMetadata(String experimentAccession,
.collect(toImmutableSet());
}
}

public ImmutableSet<String> searchCellTypes(ImmutableSet<String> cellIds, ImmutableSet<String> organismParts) {
// Streaming query for getting the cell types provided by set of cell IDs and organism parts
// unique(
// search(scxa-analytics-v6, q=cell_id:<SET_OF_CELL_IDS> AND ctw_organism_part:<SET_OF_ORGANISM_PART>,
// fl="ctw_cell_type",
// sort="ctw_cell_type asc"
// ),
// over="ctw_cell_type"
// )
return getCellTypeFromStreamQuery(
new UniqueStreamBuilder(getStreamBuilderForCellTypeByCellIdsAndOrganismParts(
cellIds, organismParts), CTW_CELL_TYPE.name()));
}

private SearchStreamBuilder<SingleCellAnalyticsCollectionProxy> getStreamBuilderForCellTypeByCellIdsAndOrganismParts(
ImmutableSet<String> cellIDs, ImmutableSet<String> organismParts) {
var cellTypeQueryBuilder = new SolrQueryBuilder<SingleCellAnalyticsCollectionProxy>()
.addQueryFieldByTerm(CELL_ID, cellIDs)
.setFieldList(CTW_CELL_TYPE)
.sortBy(CTW_CELL_TYPE, SolrQuery.ORDER.asc);

if (organismParts != null && !organismParts.isEmpty()) {
cellTypeQueryBuilder.addQueryFieldByTerm(CTW_ORGANISM_PART, organismParts);
}

return new SearchStreamBuilder<>(
singleCellAnalyticsCollectionProxy,
cellTypeQueryBuilder
).returnAllDocs();
}

private ImmutableSet<String> getCellTypeFromStreamQuery(UniqueStreamBuilder uniqueCellTypeStreamBuilder) {
try (TupleStreamer tupleStreamer = TupleStreamer.of(uniqueCellTypeStreamBuilder.build())) {
return tupleStreamer.get()
.filter(tuple -> !tuple.getFields().isEmpty())
.map(tuple -> tuple.getString(CTW_CELL_TYPE.name()))
.collect(toImmutableSet()
);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package uk.ac.ebi.atlas.search.celltype;

import com.google.common.collect.ImmutableSet;
import lombok.RequiredArgsConstructor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import uk.ac.ebi.atlas.search.GeneSearchService;

@Component
@RequiredArgsConstructor
public class CellTypeSearchService {

private final CellTypeSearchDao cellTypeSearchDao;
private final GeneSearchService geneSearchService;

private static final Logger LOGGER = LoggerFactory.getLogger(CellTypeSearchService.class);

public ImmutableSet<String> search(ImmutableSet<String> geneIds, ImmutableSet<String> organismParts) {
if (geneIds.isEmpty()) {
LOGGER.warn("Can't query for organism part as no gene IDs has given.");
return ImmutableSet.of();
}

LOGGER.info("Searching organism parts for this gene ids: {}", geneIds.asList());

return cellTypeSearchDao.searchCellTypes(geneSearchService.getCellIdsFromGeneIds(geneIds), organismParts);
}
}
Loading

0 comments on commit fc8e641

Please sign in to comment.