Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor organism part endpoint #352

Merged
merged 20 commits into from
Nov 1, 2023
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
ccfe8ff
Pull and update latest submodule changes
ke4 Feb 1, 2023
7fecd55
Merge branch 'develop' of https://github.com/ebi-gene-expression-grou…
ke4 Feb 1, 2023
98c88b3
Merge branch 'develop' of https://github.com/ebi-gene-expression-grou…
ke4 Feb 14, 2023
d6ce88e
Merge branch 'develop' of https://github.com/ebi-gene-expression-grou…
ke4 Mar 10, 2023
c3221ad
Merge branch 'develop' of https://github.com/ebi-gene-expression-grou…
ke4 Apr 24, 2023
1bd1389
Merge branch 'develop' of https://github.com/ebi-gene-expression-grou…
ke4 May 12, 2023
5ddb076
Merge branch 'develop' of https://github.com/ebi-gene-expression-grou…
ke4 Jun 5, 2023
9b30847
Add organism part and cell type search to analytics search DAO
ke4 Jun 14, 2023
ba67469
Merge branch 'develop' of https://github.com/ebi-gene-expression-grou…
ke4 Jul 28, 2023
c90387d
Clean variable naming
ke4 Jul 28, 2023
5f79f72
Add cell types as an input parameter to OrganismPartSearchDao
ke4 Aug 1, 2023
2a61058
Add cell types as an input parameter to OrganismPartSearchService
ke4 Aug 1, 2023
359c117
clean code in JsonGeneSearchController
ke4 Aug 1, 2023
06bdccf
Add cell type query parameter to the JsonGeneSearchController - WIP
ke4 Aug 2, 2023
e90b3ad
Rename test method name and remove unneeded test case
ke4 Aug 3, 2023
fbf3dea
Clean the code that gets the matching gene ids
ke4 Oct 25, 2023
806900a
Update submodules
ke4 Oct 30, 2023
d15dba7
Update atlas-web-core to the latest version
ke4 Oct 30, 2023
d61e877
Merge branch 'develop' of https://github.com/ebi-gene-expression-grou…
ke4 Oct 30, 2023
1f838c0
Merge branch 'develop' into feature/refactor_organism_part_endpoint
ke4 Nov 1, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,15 @@
import uk.ac.ebi.atlas.search.analytics.AnalyticsSearchService;
import uk.ac.ebi.atlas.search.geneids.GeneIdSearchService;
import uk.ac.ebi.atlas.search.geneids.QueryParsingException;
import uk.ac.ebi.atlas.search.organismpart.OrganismPartSearchService;
import uk.ac.ebi.atlas.search.species.SpeciesSearchService;
import uk.ac.ebi.atlas.trader.ExperimentTrader;
import uk.ac.ebi.atlas.utils.StringUtil;

import java.util.AbstractMap.SimpleEntry;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import static com.google.common.base.Strings.isNullOrEmpty;
Expand All @@ -47,6 +49,7 @@ public class JsonGeneSearchController extends JsonExceptionHandlingController {
private final ExperimentAttributesService experimentAttributesService;

private final AnalyticsSearchService analyticsSearchService;
private final OrganismPartSearchService organismPartSearchService;
private final SpeciesSearchService speciesSearchService;

@GetMapping(value = "/json/search", produces = MediaType.APPLICATION_JSON_UTF8_VALUE)
Expand Down Expand Up @@ -130,7 +133,7 @@ public String search(@RequestParam MultiValueMap<String, String> requestParams)
// If the query term matches a single Ensembl ID different to the query term, we return it in the response.
// The most common case is a non-Ensembl gene identifier (e.g. Entrez, MGI, ...).
var matchingGeneIds =
(geneIds.get().size() == 1 && !geneIds.get().iterator().next().equals(geneQuery.queryTerm())) ?
(Objects.requireNonNull(geneIds.orElse(null)).size() == 1 && !geneIds.get().iterator().next().equals(geneQuery.queryTerm())) ?
ke4 marked this conversation as resolved.
Show resolved Hide resolved
"(" + String.join(", ", geneIds.get()) + ")" :
"";

Expand Down Expand Up @@ -182,7 +185,6 @@ public String searchForGene(@RequestParam MultiValueMap<String, String> requestP
// Inside this map-within-a-flatMap we unfold expressedGeneIdEntries to triplets of...
var geneId = entry.getKey();
var experimentAccession = exp2cells.getKey();
var cellIds = exp2cells.getValue();

var experimentAttributes =
ImmutableMap.<String, Object>builder().putAll(
Expand Down Expand Up @@ -236,7 +238,7 @@ public Boolean isMarkerGene(@RequestParam MultiValueMap<String, String> requestP
.map(Map.Entry::getKey)
.collect(toImmutableSet()));

return markerGeneFacets != null && markerGeneFacets.size() > 0;
return markerGeneFacets != null && !markerGeneFacets.isEmpty();
upendrakumbham marked this conversation as resolved.
Show resolved Hide resolved
}

@GetMapping(value = "/json/gene-search/organism-parts",
Expand All @@ -249,7 +251,10 @@ public Set<String> getOrganismPartBySearchTerm(@RequestParam MultiValueMap<Strin
return ImmutableSet.of();
}

return analyticsSearchService.searchOrganismPart(geneIds.get());
var cellTypes = requestParams.get("cellTypes");
upendrakumbham marked this conversation as resolved.
Show resolved Hide resolved

return organismPartSearchService.search(geneIds.get(),
cellTypes != null ? ImmutableSet.copyOf(cellTypes) : ImmutableSet.of());
}

@GetMapping(value = "/json/gene-search/cell-types",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package uk.ac.ebi.atlas.search.analytics;

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import org.apache.solr.client.solrj.SolrQuery;
import org.springframework.stereotype.Component;
Expand All @@ -12,6 +13,9 @@

import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CELL_ID;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CTW_CELL_TYPE;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CTW_ORGANISM;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CTW_ORGANISM_PART;

@Component
public class AnalyticsSearchDao {
Expand Down Expand Up @@ -43,6 +47,87 @@ public ImmutableSet<String> searchFieldByCellIds(
);
}

public ImmutableSet<String> searchOrganismPartsByCellIdsAndSpecies(ImmutableSet<String> cellIDs,
ImmutableSet<String> species) {
var inputParams = ImmutableMap.of(
CELL_ID, cellIDs,
CTW_ORGANISM, species
);

return searchOutputFieldByInputFieldValues(CTW_ORGANISM_PART, inputParams);
}

public ImmutableSet<String> searchCellTypesByCellIdsAndSpeciesAndOrganismParts(ImmutableSet<String> cellIDs,
ImmutableSet<String> species,
ImmutableSet<String> organismParts) {
var inputParams = ImmutableMap.of(
CELL_ID, cellIDs,
CTW_ORGANISM, species,
CTW_ORGANISM_PART, organismParts
);

return searchOutputFieldByInputFieldValues(CTW_CELL_TYPE, inputParams);
}

private ImmutableSet<String> searchOutputFieldByInputFieldValues(
SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField outputSchemaField,
ImmutableMap<SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField, ImmutableSet<String>> inputParams) {
var queryBuilder = getStreamBuilderForOutputField(outputSchemaField);
inputParams.forEach((key, value) -> {
if (!value.isEmpty()) {
queryBuilder.addQueryFieldByTerm(key, value);
}
});

var uniqueSearchStreamBuilder = new UniqueStreamBuilder(
new SearchStreamBuilder<>(singleCellAnalyticsCollectionProxy, queryBuilder).returnAllDocs(),
outputSchemaField.name());

return getSchemaFieldFromStreamQuery(uniqueSearchStreamBuilder, outputSchemaField.name());
}

private SolrQueryBuilder<SingleCellAnalyticsCollectionProxy> getStreamBuilderForOutputField(
SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField outputSchemaField) {
return new SolrQueryBuilder<SingleCellAnalyticsCollectionProxy>()
.setFieldList(outputSchemaField)
.sortBy(outputSchemaField, SolrQuery.ORDER.asc);
}

public ImmutableSet<String> searchOutputFieldByInputFieldValues(
SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField outputSchemaField,
SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField inputSchemaField,
ImmutableSet<String> inputValues) {
// Streaming query for getting the set of output field values provided by set of input field values
// unique(
// search(scxa-analytics, q=<name_of_input_field>:<SET_OF_INPUT_FIELD_VALUES>, // could be ctw_cell_type
// fl="outputSchemaField", // could be : cell_id
// sort="outputSchemaField asc"
// ),
// over="outputSchemaField"
// )
return getSchemaFieldFromStreamQuery(
new UniqueStreamBuilder(
getStreamBuilderByInputFieldValuesForOutputField(
inputSchemaField, inputValues, outputSchemaField),
outputSchemaField.name()
),
outputSchemaField.name()
);
}

private SearchStreamBuilder<SingleCellAnalyticsCollectionProxy> getStreamBuilderByInputFieldValuesForOutputField(
SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField inputSchemaField,
ImmutableSet<String> inputValues,
SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField outputSchemaField) {
return new SearchStreamBuilder<>(
singleCellAnalyticsCollectionProxy,
new SolrQueryBuilder<SingleCellAnalyticsCollectionProxy>()
.addQueryFieldByTerm(inputSchemaField, inputValues)
.setFieldList(outputSchemaField)
.sortBy(outputSchemaField, SolrQuery.ORDER.asc)
).returnAllDocs();
}

private SearchStreamBuilder<SingleCellAnalyticsCollectionProxy> getStreamBuilderByCellIdsForSchemaField(
ImmutableSet<String> cellIDs, SingleCellAnalyticsCollectionProxy.SingleCellAnalyticsSchemaField schemaField) {
return new SearchStreamBuilder<>(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CELL_ID;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CTW_CELL_TYPE;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CTW_ORGANISM_PART;

@Component
Expand All @@ -24,27 +25,33 @@ public OrganismPartSearchDao(SolrCloudCollectionProxyFactory collectionProxyFact
collectionProxyFactory.create(SingleCellAnalyticsCollectionProxy.class);
}

public ImmutableSet<String> searchOrganismPart(ImmutableSet<String> cellIDs) {
public ImmutableSet<String> searchOrganismPart(ImmutableSet<String> cellIDs, ImmutableSet<String> cellTypes) {
// Streaming query for getting the organism_part provided by set of cell IDs
// unique(
// search(scxa-analytics-v6, q=cell_id:<SET_OF_CELL_IDS>,
// search(scxa-analytics-v6, q=cell_id:<SET_OF_CELL_IDS> AND cell_type:<SET_OF_CELL_TYPES>,
// fl="ctw_organism_part",
// sort="ctw_organism_part asc"
// ),
// over="ctw_organism_part"
// )
return getOrganismPartFromStreamQuery(
new UniqueStreamBuilder(getStreamBuilderForOrganismPartByCellIds(cellIDs), CTW_ORGANISM_PART.name()));
new UniqueStreamBuilder(getStreamBuilderForOrganismPartByCellIds(cellIDs, cellTypes), CTW_ORGANISM_PART.name()));
}

private SearchStreamBuilder<SingleCellAnalyticsCollectionProxy> getStreamBuilderForOrganismPartByCellIds(
ImmutableSet<String> cellIDs) {
ImmutableSet<String> cellIDs, ImmutableSet<String> cellTypes) {
var organismPartQueryBuilder = new SolrQueryBuilder<SingleCellAnalyticsCollectionProxy>()
.addQueryFieldByTerm(CELL_ID, cellIDs)
.setFieldList(CTW_ORGANISM_PART)
.sortBy(CTW_ORGANISM_PART, SolrQuery.ORDER.asc);

if (cellTypes != null && !cellTypes.isEmpty()) {
organismPartQueryBuilder.addQueryFieldByTerm(CTW_CELL_TYPE, cellTypes);
}

return new SearchStreamBuilder<>(
singleCellAnalyticsCollectionProxy,
new SolrQueryBuilder<SingleCellAnalyticsCollectionProxy>()
.addQueryFieldByTerm(CELL_ID, cellIDs)
.setFieldList(CTW_ORGANISM_PART)
.sortBy(CTW_ORGANISM_PART, SolrQuery.ORDER.asc)
organismPartQueryBuilder
).returnAllDocs();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ public class OrganismPartSearchService {

private static final Logger LOGGER = LoggerFactory.getLogger(OrganismPartSearchService.class);

public ImmutableSet<String> search(ImmutableSet<String> geneIds) {
public ImmutableSet<String> search(ImmutableSet<String> geneIds, ImmutableSet<String> cellTypes) {
if (geneIds.isEmpty()) {
LOGGER.warn("Can't query for organism part as no gene IDs has given.");
return ImmutableSet.of();
}

LOGGER.info("Searching organism parts for this gene ids: {}", geneIds.asList());

return organismPartSearchDao.searchOrganismPart(geneSearchService.getCellIdsFromGeneIds(geneIds));
return organismPartSearchDao.searchOrganismPart(geneSearchService.getCellIdsFromGeneIds(geneIds), cellTypes);
}

}
44 changes: 44 additions & 0 deletions app/src/main/java/uk/ac/ebi/atlas/solr/SingleCellSolrUtils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package uk.ac.ebi.atlas.solr;

import com.google.common.collect.ImmutableSet;
import org.apache.solr.common.SolrDocumentList;
import org.springframework.stereotype.Component;
import uk.ac.ebi.atlas.solr.cloud.SolrCloudCollectionProxyFactory;
import uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy;
import uk.ac.ebi.atlas.solr.cloud.search.SolrQueryBuilder;

import java.util.Arrays;
import java.util.Random;

import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CELL_ID;
import static uk.ac.ebi.atlas.solr.cloud.collections.SingleCellAnalyticsCollectionProxy.CTW_CELL_TYPE;

@Component
public class SingleCellSolrUtils {

private final SingleCellAnalyticsCollectionProxy singleCellAnalyticsCollectionProxy;

private static final int MAX_ROWS = 10000;

public SingleCellSolrUtils(SolrCloudCollectionProxyFactory solrCloudCollectionProxyFactory) {
singleCellAnalyticsCollectionProxy =
solrCloudCollectionProxyFactory.create(SingleCellAnalyticsCollectionProxy.class);
}

public ImmutableSet<String> fetchedRandomCellTypesByCellIDs(ImmutableSet<String> cellIDs, int numberOfCellTypes) {
SolrQueryBuilder<SingleCellAnalyticsCollectionProxy> queryBuilder = new SolrQueryBuilder<>();
queryBuilder
.addQueryFieldByTerm(CELL_ID, cellIDs)
.setFieldList(CTW_CELL_TYPE)
.setRows(MAX_ROWS);

return getRandomCellTypesFromQueryResult(singleCellAnalyticsCollectionProxy.query(queryBuilder).getResults(), numberOfCellTypes);
}

private ImmutableSet<String> getRandomCellTypesFromQueryResult(SolrDocumentList solrDocumentList, int numberOfCellTypes) {
return Arrays.stream(new Random().ints(numberOfCellTypes, 0, solrDocumentList.size()).toArray())
.mapToObj(index -> solrDocumentList.get(index).getFieldValue(CTW_CELL_TYPE.name()).toString())
.collect(toImmutableSet());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import uk.ac.ebi.atlas.search.geneids.GeneIdSearchService;
import uk.ac.ebi.atlas.search.geneids.GeneQuery;
import uk.ac.ebi.atlas.search.geneids.QueryParsingException;
import uk.ac.ebi.atlas.search.organismpart.OrganismPartSearchService;
import uk.ac.ebi.atlas.search.species.SpeciesSearchService;
import uk.ac.ebi.atlas.trader.ExperimentTrader;

Expand Down Expand Up @@ -47,6 +48,10 @@ class JsonGeneSearchControllerIT {
@Mock
private AnalyticsSearchService analyticsSearchServiceMock;

@Mock
private OrganismPartSearchService organismPartSearchServiceMock;


@Inject
private ExperimentTrader experimentTrader;

Expand All @@ -67,6 +72,7 @@ void setUp() {
experimentTrader,
experimentAttributesService,
analyticsSearchServiceMock,
organismPartSearchServiceMock,
speciesSearchService);
}

Expand Down Expand Up @@ -171,7 +177,7 @@ void whenGeneIsAMarkerGeneSearchForItReturnsTrue() {
}

@Test
void whenRequestParamIsEmptyOrganismPartSearchReturnsEmptySet() {
void whenRequestParamIsEmptyOrganismPartSearchReturnsException() {
var requestParams = new LinkedMultiValueMap<String, String>();

when(geneIdSearchServiceMock.getGeneQueryByRequestParams(requestParams))
Expand All @@ -194,7 +200,7 @@ void whenSearchTermIsNotFoundAnyGeneIdsThenOrganismPartSearchReturnsEmptySet() {
.thenReturn(geneQuery);
when(geneIdSearchServiceMock.search(geneQuery))
.thenReturn(Optional.of(ImmutableSet.of()));
when(analyticsSearchServiceMock.searchOrganismPart(ImmutableSet.of()))
when(organismPartSearchServiceMock.search(ImmutableSet.of(), ImmutableSet.of()))
.thenReturn(ImmutableSet.of());

var emptyOrganismPartSet = subject.getOrganismPartBySearchTerm(requestParams);
Expand All @@ -216,7 +222,7 @@ void whenSearchTermIsFoundButNoRelatedCellIdsThenOrganismPartSearchReturnsEmptyS
.thenReturn(geneQuery);
when(geneIdSearchServiceMock.search(geneQuery))
.thenReturn(Optional.of(geneIdsFromService));
when(analyticsSearchServiceMock.searchOrganismPart(geneIdsFromService))
when(organismPartSearchServiceMock.search(geneIdsFromService, ImmutableSet.of()))
.thenReturn(ImmutableSet.of());

var emptyOrganismPartSet = subject.getOrganismPartBySearchTerm(requestParams);
Expand All @@ -239,7 +245,7 @@ void whenSearchTermIsFoundAndThereAreRelatedCellIdsThenReturnsOrganismParts() {
.thenReturn(geneQuery);
when(geneIdSearchServiceMock.search(geneQuery))
.thenReturn(Optional.of(geneIdsFromService));
when(analyticsSearchServiceMock.searchOrganismPart(geneIdsFromService))
when(organismPartSearchServiceMock.search(geneIdsFromService, ImmutableSet.of()))
.thenReturn(ImmutableSet.of(expectedOrganismPart));

var actualOrganismParts = subject.getOrganismPartBySearchTerm(requestParams);
Expand Down Expand Up @@ -335,17 +341,6 @@ void whenRequestParamIsEmptySpeciesSearchReturnsAnException() {
.isThrownBy(() -> subject.getSpeciesByGeneId(requestParams));
}

@Test
void whenRequestParamIsNullSpeciesSearchReturnsAnException() {
LinkedMultiValueMap<String, String> requestParams = null;

when(geneIdSearchServiceMock.getCategoryFromRequestParams(requestParams))
.thenThrow(new QueryParsingException("Error parsing query"));

assertThatExceptionOfType(QueryParsingException.class)
.isThrownBy(() -> subject.getSpeciesByGeneId(requestParams));
}

@Test
void whenGeneIdIsNotPartOfAnyExperimentThenReturnsEmptySetOfSpecies() {
var requestParams = new LinkedMultiValueMap<String, String>();
Expand Down
Loading