Skip to content

Commit

Permalink
Refactor organism part endpoint (#352)
Browse files Browse the repository at this point in the history
* Pull and update latest submodule changes

* Add organism part and cell type search to analytics search DAO

* Clean variable naming

* Add cell types as an input parameter to OrganismPartSearchDao

* Add cell types as an input parameter to OrganismPartSearchService

* clean code in JsonGeneSearchController

* Add cell type query parameter to the JsonGeneSearchController - WIP

* Rename test method name and remove unneeded test case

* Clean the code that gets the matching gene ids

* Update submodules

* Update atlas-web-core to the latest version
  • Loading branch information
ke4 authored Nov 1, 2023
1 parent 0108404 commit c464485
Show file tree
Hide file tree
Showing 10 changed files with 357 additions and 83 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import uk.ac.ebi.atlas.search.analytics.AnalyticsSearchService;
import uk.ac.ebi.atlas.search.geneids.GeneIdSearchService;
import uk.ac.ebi.atlas.search.geneids.QueryParsingException;
import uk.ac.ebi.atlas.search.organismpart.OrganismPartSearchService;
import uk.ac.ebi.atlas.search.species.SpeciesSearchService;
import uk.ac.ebi.atlas.trader.ExperimentTrader;
import uk.ac.ebi.atlas.utils.StringUtil;
Expand Down Expand Up @@ -47,6 +48,7 @@ public class JsonGeneSearchController extends JsonExceptionHandlingController {
private final ExperimentAttributesService experimentAttributesService;

private final AnalyticsSearchService analyticsSearchService;
private final OrganismPartSearchService organismPartSearchService;
private final SpeciesSearchService speciesSearchService;

@GetMapping(value = "/json/search", produces = MediaType.APPLICATION_JSON_UTF8_VALUE)
Expand Down Expand Up @@ -129,10 +131,11 @@ public String search(@RequestParam MultiValueMap<String, String> requestParams)

// If the query term matches a single Ensembl ID different to the query term, we return it in the response.
// The most common case is a non-Ensembl gene identifier (e.g. Entrez, MGI, ...).
var matchingGeneIds =
(geneIds.get().size() == 1 && !geneIds.get().iterator().next().equals(geneQuery.queryTerm())) ?
"(" + String.join(", ", geneIds.get()) + ")" :
"";
var matchingGeneIds = geneIds.filter(
strings -> strings.size() == 1
&& !strings.iterator().next().equals(geneQuery.queryTerm()))
.map(strings -> "(" + String.join(", ", strings) + ")")
.orElse("");

var json = GSON.toJson(
ImmutableMap.of(
Expand Down Expand Up @@ -177,12 +180,10 @@ public String searchForGene(@RequestParam MultiValueMap<String, String> requestP
expressedGeneIdEntries.stream()
// TODO Measure in production if parallelising the stream results in any speedup
// (the more experiments we have the better). BEWARE: just adding parallel() throws! (?)
.flatMap(entry -> entry.getValue().entrySet().stream().map(exp2cells -> {
.flatMap(entry -> entry.getValue().keySet().stream().map(experimentAccession -> {

// Inside this map-within-a-flatMap we unfold expressedGeneIdEntries to triplets of...
var geneId = entry.getKey();
var experimentAccession = exp2cells.getKey();
var cellIds = exp2cells.getValue();

var experimentAttributes =
ImmutableMap.<String, Object>builder().putAll(
Expand All @@ -205,10 +206,11 @@ public String searchForGene(@RequestParam MultiValueMap<String, String> requestP

})).collect(toImmutableList());

var matchingGeneIds = "";
if (geneIds.get().size() == 1 && !geneIds.get().iterator().next().equals(geneQuery.queryTerm())) {
matchingGeneIds = "(" + String.join(", ", geneIds.get()) + ")";
}
var matchingGeneIds = geneIds.filter(
strings -> strings.size() == 1
&& !strings.iterator().next().equals(geneQuery.queryTerm()))
.map(strings -> "(" + String.join(", ", strings) + ")")
.orElse("");

return GSON.toJson(
ImmutableMap.of(
Expand Down Expand Up @@ -236,7 +238,7 @@ public Boolean isMarkerGene(@RequestParam MultiValueMap<String, String> requestP
.map(Map.Entry::getKey)
.collect(toImmutableSet()));

return markerGeneFacets != null && markerGeneFacets.size() > 0;
return markerGeneFacets != null && !markerGeneFacets.isEmpty();
}

@GetMapping(value = "/json/gene-search/organism-parts",
Expand All @@ -249,7 +251,10 @@ public Set<String> getOrganismPartBySearchTerm(@RequestParam MultiValueMap<Strin
return ImmutableSet.of();
}

return analyticsSearchService.searchOrganismPart(geneIds.get());
var cellTypes = requestParams.get("cellTypes");

return organismPartSearchService.search(geneIds.get(),
cellTypes != null ? ImmutableSet.copyOf(cellTypes) : ImmutableSet.of());
}

@GetMapping(value = "/json/gene-search/cell-types",
Expand Down Expand Up @@ -279,7 +284,7 @@ public ImmutableSet<String> getSpeciesByGeneId(@RequestParam MultiValueMap<Strin
private ImmutableList<Map.Entry<String, Map<String, List<String>>>> getMarkerGeneProfileByGeneIds(Optional<ImmutableSet<String>> geneIds) {
// We found expressed gene IDs, let’s get to it now...
var geneIds2ExperimentAndCellIds =
geneSearchService.getCellIdsInExperiments(geneIds.get());
geneSearchService.getCellIdsInExperiments(geneIds.orElse(null));

return geneIds2ExperimentAndCellIds.entrySet().stream()
.filter(entry -> !entry.getValue().isEmpty())
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package uk.ac.ebi.atlas.search.analytics;

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import org.apache.solr.client.solrj.SolrQuery;
import org.springframework.stereotype.Component;
Expand All @@ -12,6 +13,9 @@

import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CELL_ID;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CTW_CELL_TYPE;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CTW_ORGANISM;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CTW_ORGANISM_PART;

@Component
public class AnalyticsSearchDao {
Expand Down Expand Up @@ -43,6 +47,87 @@ public ImmutableSet<String> searchFieldByCellIds(
);
}

public ImmutableSet<String> searchOrganismPartsByCellIdsAndSpecies(ImmutableSet<String> cellIDs,
ImmutableSet<String> species) {
var inputParams = ImmutableMap.of(
CELL_ID, cellIDs,
CTW_ORGANISM, species
);

return searchOutputFieldByInputFieldValues(CTW_ORGANISM_PART, inputParams);
}

public ImmutableSet<String> searchCellTypesByCellIdsAndSpeciesAndOrganismParts(ImmutableSet<String> cellIDs,
ImmutableSet<String> species,
ImmutableSet<String> organismParts) {
var inputParams = ImmutableMap.of(
CELL_ID, cellIDs,
CTW_ORGANISM, species,
CTW_ORGANISM_PART, organismParts
);

return searchOutputFieldByInputFieldValues(CTW_CELL_TYPE, inputParams);
}

private ImmutableSet<String> searchOutputFieldByInputFieldValues(
SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField outputSchemaField,
ImmutableMap<SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField, ImmutableSet<String>> inputParams) {
var queryBuilder = getStreamBuilderForOutputField(outputSchemaField);
inputParams.forEach((key, value) -> {
if (!value.isEmpty()) {
queryBuilder.addQueryFieldByTerm(key, value);
}
});

var uniqueSearchStreamBuilder = new UniqueStreamBuilder(
new SearchStreamBuilder<>(singleCellAnalyticsCollectionProxy, queryBuilder).returnAllDocs(),
outputSchemaField.name());

return getSchemaFieldFromStreamQuery(uniqueSearchStreamBuilder, outputSchemaField.name());
}

private SolrQueryBuilder<SingleCellAnalyticsCollectionProxy> getStreamBuilderForOutputField(
SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField outputSchemaField) {
return new SolrQueryBuilder<SingleCellAnalyticsCollectionProxy>()
.setFieldList(outputSchemaField)
.sortBy(outputSchemaField, SolrQuery.ORDER.asc);
}

public ImmutableSet<String> searchOutputFieldByInputFieldValues(
SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField outputSchemaField,
SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField inputSchemaField,
ImmutableSet<String> inputValues) {
// Streaming query for getting the set of output field values provided by set of input field values
// unique(
// search(scxa-analytics, q=<name_of_input_field>:<SET_OF_INPUT_FIELD_VALUES>, // could be ctw_cell_type
// fl="outputSchemaField", // could be : cell_id
// sort="outputSchemaField asc"
// ),
// over="outputSchemaField"
// )
return getSchemaFieldFromStreamQuery(
new UniqueStreamBuilder(
getStreamBuilderByInputFieldValuesForOutputField(
inputSchemaField, inputValues, outputSchemaField),
outputSchemaField.name()
),
outputSchemaField.name()
);
}

private SearchStreamBuilder<SingleCellAnalyticsCollectionProxy> getStreamBuilderByInputFieldValuesForOutputField(
SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField inputSchemaField,
ImmutableSet<String> inputValues,
SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField outputSchemaField) {
return new SearchStreamBuilder<>(
singleCellAnalyticsCollectionProxy,
new SolrQueryBuilder<SingleCellAnalyticsCollectionProxy>()
.addQueryFieldByTerm(inputSchemaField, inputValues)
.setFieldList(outputSchemaField)
.sortBy(outputSchemaField, SolrQuery.ORDER.asc)
).returnAllDocs();
}

private SearchStreamBuilder<SingleCellAnalyticsCollectionProxy> getStreamBuilderByCellIdsForSchemaField(
ImmutableSet<String> cellIDs, SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField schemaField) {
return new SearchStreamBuilder<>(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CELL_ID;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CTW_CELL_TYPE;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CTW_ORGANISM_PART;

@Component
Expand All @@ -24,27 +25,33 @@ public OrganismPartSearchDao(SolrCloudCollectionProxyFactory collectionProxyFact
collectionProxyFactory.create(SingleCellAnalyticsCollectionProxy.class);
}

public ImmutableSet<String> searchOrganismPart(ImmutableSet<String> cellIDs) {
public ImmutableSet<String> searchOrganismPart(ImmutableSet<String> cellIDs, ImmutableSet<String> cellTypes) {
// Streaming query for getting the organism_part provided by set of cell IDs
// unique(
// search(scxa-analytics-v6, q=cell_id:<SET_OF_CELL_IDS>,
// search(scxa-analytics-v6, q=cell_id:<SET_OF_CELL_IDS> AND cell_type:<SET_OF_CELL_TYPES>,
// fl="ctw_organism_part",
// sort="ctw_organism_part asc"
// ),
// over="ctw_organism_part"
// )
return getOrganismPartFromStreamQuery(
new UniqueStreamBuilder(getStreamBuilderForOrganismPartByCellIds(cellIDs), CTW_ORGANISM_PART.name()));
new UniqueStreamBuilder(getStreamBuilderForOrganismPartByCellIds(cellIDs, cellTypes), CTW_ORGANISM_PART.name()));
}

private SearchStreamBuilder<SingleCellAnalyticsCollectionProxy> getStreamBuilderForOrganismPartByCellIds(
ImmutableSet<String> cellIDs) {
ImmutableSet<String> cellIDs, ImmutableSet<String> cellTypes) {
var organismPartQueryBuilder = new SolrQueryBuilder<SingleCellAnalyticsCollectionProxy>()
.addQueryFieldByTerm(CELL_ID, cellIDs)
.setFieldList(CTW_ORGANISM_PART)
.sortBy(CTW_ORGANISM_PART, SolrQuery.ORDER.asc);

if (cellTypes != null && !cellTypes.isEmpty()) {
organismPartQueryBuilder.addQueryFieldByTerm(CTW_CELL_TYPE, cellTypes);
}

return new SearchStreamBuilder<>(
singleCellAnalyticsCollectionProxy,
new SolrQueryBuilder<SingleCellAnalyticsCollectionProxy>()
.addQueryFieldByTerm(CELL_ID, cellIDs)
.setFieldList(CTW_ORGANISM_PART)
.sortBy(CTW_ORGANISM_PART, SolrQuery.ORDER.asc)
organismPartQueryBuilder
).returnAllDocs();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ public class OrganismPartSearchService {

private static final Logger LOGGER = LoggerFactory.getLogger(OrganismPartSearchService.class);

public ImmutableSet<String> search(ImmutableSet<String> geneIds) {
public ImmutableSet<String> search(ImmutableSet<String> geneIds, ImmutableSet<String> cellTypes) {
if (geneIds.isEmpty()) {
LOGGER.warn("Can't query for organism part as no gene IDs has given.");
return ImmutableSet.of();
}

LOGGER.info("Searching organism parts for this gene ids: {}", geneIds.asList());

return organismPartSearchDao.searchOrganismPart(geneSearchService.getCellIdsFromGeneIds(geneIds));
return organismPartSearchDao.searchOrganismPart(geneSearchService.getCellIdsFromGeneIds(geneIds), cellTypes);
}

}
44 changes: 44 additions & 0 deletions app/src/main/java/uk/ac/ebi/atlas/solr/SingleCellSolrUtils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package uk.ac.ebi.atlas.solr;

import com.google.common.collect.ImmutableSet;
import org.apache.solr.common.SolrDocumentList;
import org.springframework.stereotype.Component;
import uk.ac.ebi.atlas.solr.cloud.SolrCloudCollectionProxyFactory;
import uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy;
import uk.ac.ebi.atlas.solr.cloud.search.SolrQueryBuilder;

import java.util.Arrays;
import java.util.Random;

import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CELL_ID;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CTW_CELL_TYPE;

@Component
public class SingleCellSolrUtils {

private final SingleCellAnalyticsCollectionProxy singleCellAnalyticsCollectionProxy;

private static final int MAX_ROWS = 10000;

public SingleCellSolrUtils(SolrCloudCollectionProxyFactory solrCloudCollectionProxyFactory) {
singleCellAnalyticsCollectionProxy =
solrCloudCollectionProxyFactory.create(SingleCellAnalyticsCollectionProxy.class);
}

public ImmutableSet<String> fetchedRandomCellTypesByCellIDs(ImmutableSet<String> cellIDs, int numberOfCellTypes) {
SolrQueryBuilder<SingleCellAnalyticsCollectionProxy> queryBuilder = new SolrQueryBuilder<>();
queryBuilder
.addQueryFieldByTerm(CELL_ID, cellIDs)
.setFieldList(CTW_CELL_TYPE)
.setRows(MAX_ROWS);

return getRandomCellTypesFromQueryResult(singleCellAnalyticsCollectionProxy.query(queryBuilder).getResults(), numberOfCellTypes);
}

private ImmutableSet<String> getRandomCellTypesFromQueryResult(SolrDocumentList solrDocumentList, int numberOfCellTypes) {
return Arrays.stream(new Random().ints(numberOfCellTypes, 0, solrDocumentList.size()).toArray())
.mapToObj(index -> solrDocumentList.get(index).getFieldValue(CTW_CELL_TYPE.name()).toString())
.collect(toImmutableSet());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import uk.ac.ebi.atlas.search.geneids.GeneIdSearchService;
import uk.ac.ebi.atlas.search.geneids.GeneQuery;
import uk.ac.ebi.atlas.search.geneids.QueryParsingException;
import uk.ac.ebi.atlas.search.organismpart.OrganismPartSearchService;
import uk.ac.ebi.atlas.search.species.SpeciesSearchService;
import uk.ac.ebi.atlas.trader.ExperimentTrader;

Expand Down Expand Up @@ -47,6 +48,10 @@ class JsonGeneSearchControllerIT {
@Mock
private AnalyticsSearchService analyticsSearchServiceMock;

@Mock
private OrganismPartSearchService organismPartSearchServiceMock;


@Inject
private ExperimentTrader experimentTrader;

Expand All @@ -67,6 +72,7 @@ void setUp() {
experimentTrader,
experimentAttributesService,
analyticsSearchServiceMock,
organismPartSearchServiceMock,
speciesSearchService);
}

Expand Down Expand Up @@ -171,7 +177,7 @@ void whenGeneIsAMarkerGeneSearchForItReturnsTrue() {
}

@Test
void whenRequestParamIsEmptyOrganismPartSearchReturnsEmptySet() {
void whenRequestParamIsEmptyOrganismPartSearchReturnsException() {
var requestParams = new LinkedMultiValueMap<String, String>();

when(geneIdSearchServiceMock.getGeneQueryByRequestParams(requestParams))
Expand All @@ -194,7 +200,7 @@ void whenSearchTermIsNotFoundAnyGeneIdsThenOrganismPartSearchReturnsEmptySet() {
.thenReturn(geneQuery);
when(geneIdSearchServiceMock.search(geneQuery))
.thenReturn(Optional.of(ImmutableSet.of()));
when(analyticsSearchServiceMock.searchOrganismPart(ImmutableSet.of()))
when(organismPartSearchServiceMock.search(ImmutableSet.of(), ImmutableSet.of()))
.thenReturn(ImmutableSet.of());

var emptyOrganismPartSet = subject.getOrganismPartBySearchTerm(requestParams);
Expand All @@ -216,7 +222,7 @@ void whenSearchTermIsFoundButNoRelatedCellIdsThenOrganismPartSearchReturnsEmptyS
.thenReturn(geneQuery);
when(geneIdSearchServiceMock.search(geneQuery))
.thenReturn(Optional.of(geneIdsFromService));
when(analyticsSearchServiceMock.searchOrganismPart(geneIdsFromService))
when(organismPartSearchServiceMock.search(geneIdsFromService, ImmutableSet.of()))
.thenReturn(ImmutableSet.of());

var emptyOrganismPartSet = subject.getOrganismPartBySearchTerm(requestParams);
Expand All @@ -239,7 +245,7 @@ void whenSearchTermIsFoundAndThereAreRelatedCellIdsThenReturnsOrganismParts() {
.thenReturn(geneQuery);
when(geneIdSearchServiceMock.search(geneQuery))
.thenReturn(Optional.of(geneIdsFromService));
when(analyticsSearchServiceMock.searchOrganismPart(geneIdsFromService))
when(organismPartSearchServiceMock.search(geneIdsFromService, ImmutableSet.of()))
.thenReturn(ImmutableSet.of(expectedOrganismPart));

var actualOrganismParts = subject.getOrganismPartBySearchTerm(requestParams);
Expand Down Expand Up @@ -335,17 +341,6 @@ void whenRequestParamIsEmptySpeciesSearchReturnsAnException() {
.isThrownBy(() -> subject.getSpeciesByGeneId(requestParams));
}

@Test
void whenRequestParamIsNullSpeciesSearchReturnsAnException() {
LinkedMultiValueMap<String, String> requestParams = null;

when(geneIdSearchServiceMock.getCategoryFromRequestParams(requestParams))
.thenThrow(new QueryParsingException("Error parsing query"));

assertThatExceptionOfType(QueryParsingException.class)
.isThrownBy(() -> subject.getSpeciesByGeneId(requestParams));
}

@Test
void whenGeneIdIsNotPartOfAnyExperimentThenReturnsEmptySetOfSpecies() {
var requestParams = new LinkedMultiValueMap<String, String>();
Expand Down
Loading

0 comments on commit c464485

Please sign in to comment.