Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor cell types endpoint #353

Merged
merged 22 commits into from
Nov 1, 2023
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
ccfe8ff
Pull and update latest submodule changes
ke4 Feb 1, 2023
7fecd55
Merge branch 'develop' of https://github.com/ebi-gene-expression-grou…
ke4 Feb 1, 2023
98c88b3
Merge branch 'develop' of https://github.com/ebi-gene-expression-grou…
ke4 Feb 14, 2023
d6ce88e
Merge branch 'develop' of https://github.com/ebi-gene-expression-grou…
ke4 Mar 10, 2023
c3221ad
Merge branch 'develop' of https://github.com/ebi-gene-expression-grou…
ke4 Apr 24, 2023
1bd1389
Merge branch 'develop' of https://github.com/ebi-gene-expression-grou…
ke4 May 12, 2023
5ddb076
Merge branch 'develop' of https://github.com/ebi-gene-expression-grou…
ke4 Jun 5, 2023
9b30847
Add organism part and cell type search to analytics search DAO
ke4 Jun 14, 2023
ba67469
Merge branch 'develop' of https://github.com/ebi-gene-expression-grou…
ke4 Jul 28, 2023
c90387d
Clean variable naming
ke4 Jul 28, 2023
5f79f72
Add cell types as an input parameter to OrganismPartSearchDao
ke4 Aug 1, 2023
2a61058
Add cell types as an input parameter to OrganismPartSearchService
ke4 Aug 1, 2023
359c117
clean code in JsonGeneSearchController
ke4 Aug 1, 2023
1913dcf
remove unneeded JdbcUtils from CellTypeSearchDaoIT
ke4 Aug 1, 2023
06bdccf
Add cell type query parameter to the JsonGeneSearchController - WIP
ke4 Aug 2, 2023
e90b3ad
Rename test method name and remove unneeded test case
ke4 Aug 3, 2023
88b6621
Merge branch 'feature/refactor_organism_part_endpoint' of https://git…
ke4 Aug 3, 2023
949e760
Add organism parts parameter to CellTypeSearchDao
ke4 Aug 4, 2023
18cef2e
Implement CellTypeSearchService
ke4 Aug 4, 2023
24f6c44
Use new CellTypeSearchService in JsonGeneSearchController
ke4 Aug 4, 2023
af1844c
Remove null value from cell type search results
ke4 Sep 19, 2023
41fea43
Merge branch 'develop' of https://github.com/ebi-gene-expression-grou…
ke4 Nov 1, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,18 @@
import uk.ac.ebi.atlas.controllers.JsonExceptionHandlingController;
import uk.ac.ebi.atlas.experimentpage.ExperimentAttributesService;
import uk.ac.ebi.atlas.model.experiment.singlecell.SingleCellBaselineExperiment;
import uk.ac.ebi.atlas.search.analytics.AnalyticsSearchService;
import uk.ac.ebi.atlas.search.celltype.CellTypeSearchService;
import uk.ac.ebi.atlas.search.geneids.GeneIdSearchService;
import uk.ac.ebi.atlas.search.geneids.QueryParsingException;
import uk.ac.ebi.atlas.search.organismpart.OrganismPartSearchService;
import uk.ac.ebi.atlas.search.species.SpeciesSearchService;
import uk.ac.ebi.atlas.trader.ExperimentTrader;
import uk.ac.ebi.atlas.utils.StringUtil;

import java.util.AbstractMap.SimpleEntry;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import static com.google.common.base.Strings.isNullOrEmpty;
Expand All @@ -46,7 +48,8 @@ public class JsonGeneSearchController extends JsonExceptionHandlingController {
private final ExperimentTrader experimentTrader;
private final ExperimentAttributesService experimentAttributesService;

private final AnalyticsSearchService analyticsSearchService;
private final OrganismPartSearchService organismPartSearchService;
private final CellTypeSearchService cellTypeSearchService;
private final SpeciesSearchService speciesSearchService;

@GetMapping(value = "/json/search", produces = MediaType.APPLICATION_JSON_UTF8_VALUE)
Expand Down Expand Up @@ -130,7 +133,7 @@ public String search(@RequestParam MultiValueMap<String, String> requestParams)
// If the query term matches a single Ensembl ID different to the query term, we return it in the response.
// The most common case is a non-Ensembl gene identifier (e.g. Entrez, MGI, ...).
var matchingGeneIds =
(geneIds.get().size() == 1 && !geneIds.get().iterator().next().equals(geneQuery.queryTerm())) ?
(Objects.requireNonNull(geneIds.orElse(null)).size() == 1 && !geneIds.get().iterator().next().equals(geneQuery.queryTerm())) ?
"(" + String.join(", ", geneIds.get()) + ")" :
"";

Expand Down Expand Up @@ -182,7 +185,6 @@ public String searchForGene(@RequestParam MultiValueMap<String, String> requestP
// Inside this map-within-a-flatMap we unfold expressedGeneIdEntries to triplets of...
var geneId = entry.getKey();
var experimentAccession = exp2cells.getKey();
var cellIds = exp2cells.getValue();

var experimentAttributes =
ImmutableMap.<String, Object>builder().putAll(
Expand Down Expand Up @@ -236,7 +238,7 @@ public Boolean isMarkerGene(@RequestParam MultiValueMap<String, String> requestP
.map(Map.Entry::getKey)
.collect(toImmutableSet()));

return markerGeneFacets != null && markerGeneFacets.size() > 0;
return markerGeneFacets != null && !markerGeneFacets.isEmpty();
ke4 marked this conversation as resolved.
Show resolved Hide resolved
}

@GetMapping(value = "/json/gene-search/organism-parts",
Expand All @@ -249,7 +251,10 @@ public Set<String> getOrganismPartBySearchTerm(@RequestParam MultiValueMap<Strin
return ImmutableSet.of();
}

return analyticsSearchService.searchOrganismPart(geneIds.get());
var cellTypes = requestParams.get("cellTypes");

return organismPartSearchService.search(geneIds.get(),
cellTypes != null ? ImmutableSet.copyOf(cellTypes) : ImmutableSet.of());
}

@GetMapping(value = "/json/gene-search/cell-types",
Expand All @@ -262,7 +267,10 @@ public Set<String> getCellTypeBySearchTerm(@RequestParam MultiValueMap<String, S
return ImmutableSet.of();
}

return analyticsSearchService.searchCellType(geneIds.get());
var organismParts = requestParams.get("organismParts");
upendrakumbham marked this conversation as resolved.
Show resolved Hide resolved

return cellTypeSearchService.search(geneIds.get(),
organismParts != null ? ImmutableSet.copyOf(organismParts) : ImmutableSet.of());
}

@GetMapping(value = "/json/gene-search/species", produces = MediaType.APPLICATION_JSON_UTF8_VALUE)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package uk.ac.ebi.atlas.search.analytics;

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import org.apache.solr.client.solrj.SolrQuery;
import org.springframework.stereotype.Component;
Expand All @@ -12,6 +13,9 @@

import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CELL_ID;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CTW_CELL_TYPE;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CTW_ORGANISM;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CTW_ORGANISM_PART;

@Component
public class AnalyticsSearchDao {
Expand Down Expand Up @@ -43,6 +47,87 @@ public ImmutableSet<String> searchFieldByCellIds(
);
}

public ImmutableSet<String> searchOrganismPartsByCellIdsAndSpecies(ImmutableSet<String> cellIDs,
ImmutableSet<String> species) {
var inputParams = ImmutableMap.of(
CELL_ID, cellIDs,
CTW_ORGANISM, species
);

return searchOutputFieldByInputFieldValues(CTW_ORGANISM_PART, inputParams);
}

public ImmutableSet<String> searchCellTypesByCellIdsAndSpeciesAndOrganismParts(ImmutableSet<String> cellIDs,
ImmutableSet<String> species,
ImmutableSet<String> organismParts) {
var inputParams = ImmutableMap.of(
CELL_ID, cellIDs,
CTW_ORGANISM, species,
CTW_ORGANISM_PART, organismParts
);

return searchOutputFieldByInputFieldValues(CTW_CELL_TYPE, inputParams);
}

private ImmutableSet<String> searchOutputFieldByInputFieldValues(
SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField outputSchemaField,
ImmutableMap<SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField, ImmutableSet<String>> inputParams) {
var queryBuilder = getStreamBuilderForOutputField(outputSchemaField);
inputParams.forEach((key, value) -> {
if (!value.isEmpty()) {
queryBuilder.addQueryFieldByTerm(key, value);
}
});

var uniqueSearchStreamBuilder = new UniqueStreamBuilder(
new SearchStreamBuilder<>(singleCellAnalyticsCollectionProxy, queryBuilder).returnAllDocs(),
outputSchemaField.name());

return getSchemaFieldFromStreamQuery(uniqueSearchStreamBuilder, outputSchemaField.name());
}

private SolrQueryBuilder<SingleCellAnalyticsCollectionProxy> getStreamBuilderForOutputField(
SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField outputSchemaField) {
return new SolrQueryBuilder<SingleCellAnalyticsCollectionProxy>()
.setFieldList(outputSchemaField)
.sortBy(outputSchemaField, SolrQuery.ORDER.asc);
}

public ImmutableSet<String> searchOutputFieldByInputFieldValues(
SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField outputSchemaField,
SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField inputSchemaField,
ImmutableSet<String> inputValues) {
// Streaming query for getting the set of output field values provided by set of input field values
// unique(
// search(scxa-analytics, q=<name_of_input_field>:<SET_OF_INPUT_FIELD_VALUES>, // could be ctw_cell_type
// fl="outputSchemaField", // could be : cell_id
// sort="outputSchemaField asc"
// ),
// over="outputSchemaField"
// )
return getSchemaFieldFromStreamQuery(
new UniqueStreamBuilder(
getStreamBuilderByInputFieldValuesForOutputField(
inputSchemaField, inputValues, outputSchemaField),
outputSchemaField.name()
),
outputSchemaField.name()
);
}

private SearchStreamBuilder<SingleCellAnalyticsCollectionProxy> getStreamBuilderByInputFieldValuesForOutputField(
SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField inputSchemaField,
ImmutableSet<String> inputValues,
SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField outputSchemaField) {
return new SearchStreamBuilder<>(
singleCellAnalyticsCollectionProxy,
new SolrQueryBuilder<SingleCellAnalyticsCollectionProxy>()
.addQueryFieldByTerm(inputSchemaField, inputValues)
.setFieldList(outputSchemaField)
.sortBy(outputSchemaField, SolrQuery.ORDER.asc)
).returnAllDocs();
}

private SearchStreamBuilder<SingleCellAnalyticsCollectionProxy> getStreamBuilderByCellIdsForSchemaField(
ImmutableSet<String> cellIDs, SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField schemaField) {
return new SearchStreamBuilder<>(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CELL_ID;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CTW_CELL_TYPE;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CTW_ORGANISM_PART;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.EXPERIMENT_ACCESSION;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.FACET_CHARACTERISTIC_NAME;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.FACET_CHARACTERISTIC_VALUE;
Expand All @@ -37,9 +39,9 @@ public class CellTypeSearchDao {

private final SingleCellAnalyticsCollectionProxy singleCellAnalyticsCollectionProxy;

public CellTypeSearchDao(SolrCloudCollectionProxyFactory solrCloudCollectionProxyFactory) {
public CellTypeSearchDao(SolrCloudCollectionProxyFactory collectionProxyFactory) {
this.singleCellAnalyticsCollectionProxy =
solrCloudCollectionProxyFactory.create(SingleCellAnalyticsCollectionProxy.class);
collectionProxyFactory.create(SingleCellAnalyticsCollectionProxy.class);
}

@Cacheable(cacheNames = "inferredCellTypesOntology", key = "{#experimentAccession, #organOrOrganismPart}")
Expand Down Expand Up @@ -100,7 +102,7 @@ public ImmutableSet<String> getInferredCellTypeAuthorsLabels(String experimentAc
*/
private ImmutableSet<String> getCellTypeMetadata(String experimentAccession,
ImmutableSet<String> organOrOrganismPart,
String cellTypeValue) {
String cellTypeFacetName) {
var cellIdsInOrganOrOrganismPartQueryBuilder =
new SolrQueryBuilder<SingleCellAnalyticsCollectionProxy>()
.addQueryFieldByTerm(EXPERIMENT_ACCESSION, experimentAccession)
Expand All @@ -124,8 +126,8 @@ private ImmutableSet<String> getCellTypeMetadata(String experimentAccession,
.addQueryFieldByTerm(EXPERIMENT_ACCESSION, experimentAccession)
.addQueryFieldByTerm(
ImmutableMap.of(
FACET_FACTOR_NAME, ImmutableSet.of(cellTypeValue),
FACET_CHARACTERISTIC_NAME, ImmutableSet.of(cellTypeValue)))
FACET_FACTOR_NAME, ImmutableSet.of(cellTypeFacetName),
FACET_CHARACTERISTIC_NAME, ImmutableSet.of(cellTypeFacetName)))
.setFieldList(ImmutableSet.of(CELL_ID, FACET_FACTOR_VALUE, FACET_CHARACTERISTIC_VALUE))
.sortBy(CELL_ID, SolrQuery.ORDER.asc);
var uniqueCellIdsAnnotatedWithCellTypeValue =
Expand Down Expand Up @@ -162,4 +164,45 @@ private ImmutableSet<String> getCellTypeMetadata(String experimentAccession,
.collect(toImmutableSet());
}
}

public ImmutableSet<String> searchCellTypes(ImmutableSet<String> cellIds, ImmutableSet<String> organismParts) {
// Streaming query for getting the cell types provided by set of cell IDs and organism parts
// unique(
// search(scxa-analytics-v6, q=cell_id:<SET_OF_CELL_IDS> AND ctw_organism_part:<SET_OF_ORGANISM_PART>,
// fl="ctw_cell_type",
// sort="ctw_cell_type asc"
// ),
// over="ctw_cell_type"
// )
return getCellTypeFromStreamQuery(
new UniqueStreamBuilder(getStreamBuilderForCellTypeByCellIdsAndOrganismParts(
cellIds, organismParts), CTW_CELL_TYPE.name()));
}

private SearchStreamBuilder<SingleCellAnalyticsCollectionProxy> getStreamBuilderForCellTypeByCellIdsAndOrganismParts(
ImmutableSet<String> cellIDs, ImmutableSet<String> organismParts) {
var cellTypeQueryBuilder = new SolrQueryBuilder<SingleCellAnalyticsCollectionProxy>()
.addQueryFieldByTerm(CELL_ID, cellIDs)
.setFieldList(CTW_CELL_TYPE)
.sortBy(CTW_CELL_TYPE, SolrQuery.ORDER.asc);

if (organismParts != null && !organismParts.isEmpty()) {
cellTypeQueryBuilder.addQueryFieldByTerm(CTW_ORGANISM_PART, organismParts);
}

return new SearchStreamBuilder<>(
singleCellAnalyticsCollectionProxy,
cellTypeQueryBuilder
).returnAllDocs();
}

private ImmutableSet<String> getCellTypeFromStreamQuery(UniqueStreamBuilder uniqueCellTypeStreamBuilder) {
try (TupleStreamer tupleStreamer = TupleStreamer.of(uniqueCellTypeStreamBuilder.build())) {
return tupleStreamer.get()
.filter(tuple -> !tuple.getFields().isEmpty())
.map(tuple -> tuple.getString(CTW_CELL_TYPE.name()))
.collect(toImmutableSet()
);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package uk.ac.ebi.atlas.search.celltype;

import com.google.common.collect.ImmutableSet;
import lombok.RequiredArgsConstructor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import uk.ac.ebi.atlas.search.GeneSearchService;

@Component
@RequiredArgsConstructor
public class CellTypeSearchService {

private final CellTypeSearchDao cellTypeSearchDao;
private final GeneSearchService geneSearchService;

private static final Logger LOGGER = LoggerFactory.getLogger(CellTypeSearchService.class);

public ImmutableSet<String> search(ImmutableSet<String> geneIds, ImmutableSet<String> organismParts) {
if (geneIds.isEmpty()) {
ke4 marked this conversation as resolved.
Show resolved Hide resolved
LOGGER.warn("Can't query for organism part as no gene IDs has given.");
return ImmutableSet.of();
}

LOGGER.info("Searching organism parts for this gene ids: {}", geneIds.asList());

return cellTypeSearchDao.searchCellTypes(geneSearchService.getCellIdsFromGeneIds(geneIds), organismParts);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CELL_ID;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CTW_CELL_TYPE;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CTW_ORGANISM_PART;

@Component
Expand All @@ -24,27 +25,33 @@ public OrganismPartSearchDao(SolrCloudCollectionProxyFactory collectionProxyFact
collectionProxyFactory.create(SingleCellAnalyticsCollectionProxy.class);
}

public ImmutableSet<String> searchOrganismPart(ImmutableSet<String> cellIDs) {
// Streaming query for getting the organism_part provided by set of cell IDs
public ImmutableSet<String> searchOrganismPart(ImmutableSet<String> cellIDs, ImmutableSet<String> cellTypes) {
// Streaming query for getting the organism_part provided by set of cell IDs and cell types
// unique(
// search(scxa-analytics-v6, q=cell_id:<SET_OF_CELL_IDS>,
// search(scxa-analytics-v6, q=cell_id:<SET_OF_CELL_IDS> AND ctw_cell_type:<SET_OF_CELL_TYPES>,
// fl="ctw_organism_part",
// sort="ctw_organism_part asc"
// ),
// over="ctw_organism_part"
// )
return getOrganismPartFromStreamQuery(
new UniqueStreamBuilder(getStreamBuilderForOrganismPartByCellIds(cellIDs), CTW_ORGANISM_PART.name()));
new UniqueStreamBuilder(getStreamBuilderForOrganismPartByCellIds(cellIDs, cellTypes), CTW_ORGANISM_PART.name()));
}

private SearchStreamBuilder<SingleCellAnalyticsCollectionProxy> getStreamBuilderForOrganismPartByCellIds(
ImmutableSet<String> cellIDs) {
ImmutableSet<String> cellIDs, ImmutableSet<String> cellTypes) {
var organismPartQueryBuilder = new SolrQueryBuilder<SingleCellAnalyticsCollectionProxy>()
.addQueryFieldByTerm(CELL_ID, cellIDs)
.setFieldList(CTW_ORGANISM_PART)
.sortBy(CTW_ORGANISM_PART, SolrQuery.ORDER.asc);

if (cellTypes != null && !cellTypes.isEmpty()) {
organismPartQueryBuilder.addQueryFieldByTerm(CTW_CELL_TYPE, cellTypes);
}

return new SearchStreamBuilder<>(
singleCellAnalyticsCollectionProxy,
new SolrQueryBuilder<SingleCellAnalyticsCollectionProxy>()
.addQueryFieldByTerm(CELL_ID, cellIDs)
.setFieldList(CTW_ORGANISM_PART)
.sortBy(CTW_ORGANISM_PART, SolrQuery.ORDER.asc)
organismPartQueryBuilder
).returnAllDocs();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ public class OrganismPartSearchService {

private static final Logger LOGGER = LoggerFactory.getLogger(OrganismPartSearchService.class);

public ImmutableSet<String> search(ImmutableSet<String> geneIds) {
public ImmutableSet<String> search(ImmutableSet<String> geneIds, ImmutableSet<String> cellTypes) {
if (geneIds.isEmpty()) {
LOGGER.warn("Can't query for organism part as no gene IDs has given.");
return ImmutableSet.of();
}

LOGGER.info("Searching organism parts for this gene ids: {}", geneIds.asList());

return organismPartSearchDao.searchOrganismPart(geneSearchService.getCellIdsFromGeneIds(geneIds));
return organismPartSearchDao.searchOrganismPart(geneSearchService.getCellIdsFromGeneIds(geneIds), cellTypes);
}

}
Loading