Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the intermittently failing tests in SCXA #491

Merged
merged 13 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ public ImmutableSet<String> fetchedRandomCellTypesByCellIDs(ImmutableSet<String>
SolrQueryBuilder<SingleCellAnalyticsCollectionProxy> queryBuilder = new SolrQueryBuilder<>();
queryBuilder
.addQueryFieldByTerm(CELL_ID, cellIDs)
.exists(CTW_CELL_TYPE)
.setFieldList(CTW_CELL_TYPE)
.setRows(MAX_ROWS);

Expand All @@ -44,6 +45,7 @@ public ImmutableSet<String> fetchedRandomOrganismPartsByCellIDs(ImmutableSet<Str
SolrQueryBuilder<SingleCellAnalyticsCollectionProxy> queryBuilder = new SolrQueryBuilder<>();
queryBuilder
.addQueryFieldByTerm(CELL_ID, cellIDs)
.exists(CTW_CELL_TYPE)
.setFieldList(CTW_ORGANISM_PART)
.setRows(MAX_ROWS);

Expand All @@ -59,7 +61,7 @@ private ImmutableSet<String> getRandomCellTypesFromQueryResult(
String schemaFieldName,
int numberOfCellTypes) {
return Arrays.stream(new Random().ints(numberOfCellTypes, 0, solrDocumentList.size()).toArray())
.mapToObj(index -> solrDocumentList.get(index).getFieldValue(schemaFieldName).toString())
.collect(toImmutableSet());
.mapToObj(index -> solrDocumentList.get(index).getFieldValue(schemaFieldName).toString())
.collect(toImmutableSet());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

import javax.inject.Inject;
import javax.sql.DataSource;
import java.util.Map;

import static org.hamcrest.CoreMatchers.not;
import static org.hamcrest.Matchers.contains;
Expand Down Expand Up @@ -129,9 +130,11 @@ void unexpressedGene() throws Exception {

@Test
void validJsonForValidGeneId() throws Exception {
var geneId = jdbcTestUtils.fetchRandomGene();
var params = generateGeneSearchParams();
var species = params.get("species");
var geneId = params.get("geneId");

this.mockMvc.perform(get("/json/gene-search").param("ensgene", geneId))
this.mockMvc.perform(get("/json/gene-search").param("ensgene", geneId).param("species", species))
.andExpect(status().isOk())
.andExpect(content().contentType(MediaType.APPLICATION_JSON_UTF8))
.andExpect(jsonPath("$.results", hasSize(greaterThanOrEqualTo(1))))
Expand All @@ -143,31 +146,10 @@ void validJsonForValidGeneId() throws Exception {

@Test
void ifSymbolQueryMatchesUniqueGeneIdIncludeIt() throws Exception {
var geneId = jdbcTestUtils.fetchRandomGene();

// Some gene IDs don’t have a symbol, e.g. ERCC-00044
// Also, it turns out that some gene symbols like Vmn1r216 match more than one gene ID within the same species:
// ENSMUSG00000115697 and ENSMUSG00000116057
// We don’t want any of those pesky gene IDs!
var matchingSymbols = bioEntityPropertyDao.fetchPropertyValuesForGeneId(geneId, SYMBOL);
while (matchingSymbols.isEmpty() ||
bioEntityPropertyDao.fetchGeneIdsForPropertyValue(
SYMBOL, matchingSymbols.iterator().next()).size() > 1) {
geneId = jdbcTestUtils.fetchRandomGene();
matchingSymbols = bioEntityPropertyDao.fetchPropertyValuesForGeneId(geneId, SYMBOL);
}

var solrQueryBuilder =
new SolrQueryBuilder<BioentitiesCollectionProxy>()
.addQueryFieldByTerm(BIOENTITY_IDENTIFIER, geneId)
.addQueryFieldByTerm(PROPERTY_NAME, "symbol")
.setFieldList(PROPERTY_VALUE)
.setFieldList(SPECIES)
.setRows(1);

var docList = bioentitiesCollectionProxy.query(solrQueryBuilder).getResults();
var symbol = docList.get(0).getFieldValue(PROPERTY_VALUE.name()).toString();
var species = docList.get(0).getFieldValue(SPECIES.name()).toString();
var params = generateGeneSearchParams();
var symbol = params.get("symbol");
var species = params.get("species");
var geneId = params.get("geneId");

this.mockMvc.perform(get("/json/gene-search").param("symbol", symbol).param("species", species))
.andExpect(status().isOk())
Expand All @@ -178,18 +160,6 @@ void ifSymbolQueryMatchesUniqueGeneIdIncludeIt() throws Exception {
.andExpect(jsonPath("$.matchingGeneId", equalTo("(" + geneId + ")")));
}

@Test
void jsonPayloadContainsFacetDescription() throws Exception {
var geneId = jdbcTestUtils.fetchRandomGene();

this.mockMvc.perform(get("/json/gene-search").param("ensgene", geneId))
.andExpect(status().isOk())
.andExpect(content().contentType(MediaType.APPLICATION_JSON_UTF8))
.andExpect(jsonPath("$.results", hasSize(greaterThanOrEqualTo(1))))
.andExpect(jsonPath("$.results[0].experimentAccession", isA(String.class)))
.andExpect(jsonPath("$.checkboxFacetGroups", contains("Marker genes", "Species")));
}

@Test
void speciesParamCanAppearBeforeGeneQuery() throws Exception {
this.mockMvc.perform(get("/json/gene-search").param("species", "homo sapiens").param("symbol", "aspm"))
Expand Down Expand Up @@ -358,4 +328,33 @@ void whenGivenExistingSymbolReturnedCellTypesNotContainsNullValue() throws Excep
.andExpect(content().contentType(MediaType.APPLICATION_JSON_UTF8))
.andExpect(jsonPath("$", not(contains(IsNull.nullValue()))));
}

private Map<String, String> generateGeneSearchParams() {
var geneId = jdbcTestUtils.fetchRandomGene();
var matchingSymbols = bioEntityPropertyDao.fetchPropertyValuesForGeneId(geneId, SYMBOL);
while (matchingSymbols.isEmpty() ||
bioEntityPropertyDao.fetchGeneIdsForPropertyValue(
SYMBOL, matchingSymbols.iterator().next()).size() > 1) {
geneId = jdbcTestUtils.fetchRandomGene();
matchingSymbols = bioEntityPropertyDao.fetchPropertyValuesForGeneId(geneId, SYMBOL);
}

var solrQueryBuilder =
new SolrQueryBuilder<BioentitiesCollectionProxy>()
.addQueryFieldByTerm(BIOENTITY_IDENTIFIER, geneId)
.addQueryFieldByTerm(PROPERTY_NAME, "symbol")
.setFieldList(PROPERTY_VALUE)
.setFieldList(SPECIES)
.setRows(1);

var docList = bioentitiesCollectionProxy.query(solrQueryBuilder).getResults();
var symbol = docList.get(0).getFieldValue(PROPERTY_VALUE.name()).toString();
var species = docList.get(0).getFieldValue(SPECIES.name()).toString();

return Map.ofEntries(
Map.entry("geneId", geneId),
Map.entry("symbol", symbol),
Map.entry("species", species)
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.TestInstance;
import org.junit.jupiter.api.extension.ExtendWith;
Expand All @@ -23,13 +24,12 @@
import uk.ac.ebi.atlas.solr.cloud.collections.BioentitiesCollectionProxy;
import uk.ac.ebi.atlas.solr.cloud.search.SolrQueryBuilder;
import uk.ac.ebi.atlas.testutils.JdbcUtils;
import uk.ac.ebi.atlas.testutils.RandomDataTestUtils;

import javax.inject.Inject;
import javax.sql.DataSource;
import java.util.Map;

import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThanOrEqualTo;
import static org.hamcrest.Matchers.hasSize;
Expand Down Expand Up @@ -82,7 +82,7 @@ void populateDatabaseTables() {
new ClassPathResource("fixtures/scxa_cell_group_marker_gene_stats.sql")
);

populator.execute(dataSource);
populator.execute(dataSource);
}

@AfterAll
Expand Down Expand Up @@ -189,12 +189,15 @@ void ifSymbolQueryMatchesUniqueGeneIdIncludeIt() throws Exception {
.andExpect(jsonPath("$.matchingGeneId", equalTo("(" + shouldBeMarkerGene + ")")));
}

@Test
// It is ignored as it is hard to make this work,
ke4 marked this conversation as resolved.
Show resolved Hide resolved
// and hopefully it is going to be deprecated soon
@Disabled
void jsonPayloadContainsFacetDescription() throws Exception {
var shouldBeMarkerGene =
jdbcTestUtils.fetchRandomMarkerGeneFromSingleCellExperiment("E-CURD-4");
var params = generateGeneSearchParams("E-CURD-4");
var species = params.get("species");
var geneId = params.get("geneId");

this.mockMvc.perform(get("/json/search").param("ensgene", shouldBeMarkerGene))
this.mockMvc.perform(get("/json/search").param("ensgene", geneId).param("species", species))
.andExpect(status().isOk())
.andExpect(content().contentType(MediaType.APPLICATION_JSON_UTF8))
.andExpect(jsonPath("$.results", hasSize(greaterThanOrEqualTo(1))))
Expand All @@ -207,156 +210,37 @@ void jsonPayloadContainsFacetDescription() throws Exception {
.andExpect(jsonPath("$.checkboxFacetGroups", contains("Marker genes", "Species")));
}

@Test
ke4 marked this conversation as resolved.
Show resolved Hide resolved
void speciesParamCanAppearBeforeGeneQuery() throws Exception {
this.mockMvc.perform(get("/json/search").param("species", "homo sapiens").param("symbol", "aspm"))
.andExpect(status().isOk());
}

@Test
void whenSearchForAMarkerGeneWithEmptyValueReturnsError() throws Exception {
final String emptyGeneSearchParams = "";
final String expectedMessage = "{\"error\":\"Error parsing query\"}\n";
this.mockMvc.perform(get("/json/gene-search/marker-genes").param("q", emptyGeneSearchParams))
.andExpect(status().isBadRequest())
.andExpect(content().contentType(MediaType.APPLICATION_JSON_UTF8))
.andExpect(content().string(expectedMessage));
}

@Test
void whenGeneIsAMarkerGeneSearchForItReturnsTrue() throws Exception {
var shouldBeMarkerGene =
jdbcTestUtils.fetchRandomMarkerGeneFromSingleCellExperiment("E-CURD-4");

this.mockMvc.perform(get("/json/gene-search/marker-genes").param("ensgene", shouldBeMarkerGene))
.andExpect(status().isOk())
.andExpect(content().contentType(MediaType.APPLICATION_JSON_UTF8))
.andExpect(content().string("true"));
}

@Test
void whenGeneIsNotAMarkerGeneSearchForItReturnsFalse() throws Exception {
var notAMarkerGene = RandomDataTestUtils.generateRandomEnsemblGeneId();
private Map<String, String> generateGeneSearchParams(String accessionId) {
var geneId = jdbcTestUtils.fetchRandomMarkerGeneFromSingleCellExperiment(accessionId);

this.mockMvc.perform(get("/json/gene-search/marker-genes").param("ensgene", notAMarkerGene))
.andExpect(status().isOk())
.andExpect(content().contentType(MediaType.APPLICATION_JSON_UTF8))
.andExpect(content().string("false"));
}

@Test
void whenSearchForSpeciesWithEmptyValueReturnsError() throws Exception {
final String emptySpeciesSearchParams = "";
final String expectedMessage = "{\"error\":\"Error parsing query\"}\n";
this.mockMvc.perform(get("/json/gene-search/species").param("q", emptySpeciesSearchParams))
.andExpect(status().isBadRequest())
.andExpect(content().contentType(MediaType.APPLICATION_JSON_UTF8))
.andExpect(content().string(expectedMessage));
}

@Test
void whenGeneIdIsPartOfSomeExperimentsThenReturnSetOfSpecies() throws Exception {
var shouldBeGeneThatPartOfExperiments =
jdbcTestUtils.fetchRandomGeneFromSingleCellExperiment("E-CURD-4");

var expectedSpecies = "Arabidopsis_thaliana";

this.mockMvc.perform(get("/json/gene-search/species").param("ensgene", shouldBeGeneThatPartOfExperiments))
.andExpect(status().isOk())
.andExpect(content().contentType(MediaType.APPLICATION_JSON_UTF8))
.andExpect(jsonPath("$", hasSize(equalTo(1))))
.andExpect(jsonPath("$", containsInAnyOrder(expectedSpecies)));
}

@Test
void whenGeneIdIsNotPartOfAnyExperimentsThenReturnEmptySetOfSpecies() throws Exception {
// This is the SolR streaming expression query that is getting the list of bioentity_identifiers
// that is not part of any experiments based on a given species (as a query parameter in the bioentities query)
// I just selected the 1st ID and used that in my test
// If we are going to use this query more than once, we might have to implement this in a utility method
// Currently the `complement` streaming expression is not implemented in our code, yet, so it is a bigger effort to do it
// complement(
// search(bioentities-v1, q=species:solanum_lycopersicum, fl="bioentity_identifier_dv",
// sort="bioentity_identifier_dv asc", qt="/export"),
// select(
// search(scxa-gene2experiment-v1, q=experiment_accession:E-ENAD-53, fl="bioentity_identifier",
// sort="bioentity_identifier asc", qt="/export"),
// bioentity_identifier as bioentity_identifier_dv),
// on="bioentity_identifier_dv"
// )

var geneNotPartOfAnyExperiments = "ENSRNA049444660";

this.mockMvc.perform(get("/json/gene-search/species").param("ensgene", geneNotPartOfAnyExperiments))
.andExpect(status().isOk())
.andExpect(content().contentType(MediaType.APPLICATION_JSON_UTF8))
.andExpect(jsonPath("$", hasSize(equalTo(0))));
}

@Test
void whenSearchForOrganismPartWithEmptyValueReturnsError() throws Exception {
final String emptyOrganismPartSearchTerm = "";
final String expectedMessage = "{\"error\":\"Error parsing query\"}\n";
this.mockMvc.perform(get("/json/gene-search/organism-parts").param("q", emptyOrganismPartSearchTerm))
.andExpect(status().isBadRequest())
.andExpect(content().contentType(MediaType.APPLICATION_JSON_UTF8))
.andExpect(content().string(expectedMessage));
}

@Test
void whenSearchTermNotExistsInDBThenOrganismPartSearchReturnsEmptySet() throws Exception {
var geneNotPartOfAnyExperiments = "ENSRNA049444660";

this.mockMvc.perform(get("/json/gene-search/organism-parts").param("ensgene", geneNotPartOfAnyExperiments))
.andExpect(status().isOk())
.andExpect(content().contentType(MediaType.APPLICATION_JSON_UTF8))
.andExpect(jsonPath("$", hasSize(equalTo(0))));
}

@Test
void whenSearchTermExistsInDBThenReturnsSetOfOrganismParts() throws Exception {
var shouldBeGeneThatPartOfExperiments =
jdbcTestUtils.fetchRandomGeneFromSingleCellExperiment("E-CURD-4");

var expectedOrganismParts = "root";

this.mockMvc.perform(get("/json/gene-search/organism-parts").param("ensgene", shouldBeGeneThatPartOfExperiments))
.andExpect(status().isOk())
.andExpect(content().contentType(MediaType.APPLICATION_JSON_UTF8))
.andExpect(jsonPath("$", hasSize(equalTo(1))))
.andExpect(jsonPath("$", containsInAnyOrder(expectedOrganismParts)));
}

@Test
void whenSearchForCellTypesWithEmptyValueReturnsError() throws Exception {
final String emptyCellTypeSearchTerm = "";
final String expectedMessage = "{\"error\":\"Error parsing query\"}\n";
this.mockMvc.perform(get("/json/gene-search/cell-types").param("q", emptyCellTypeSearchTerm))
.andExpect(status().isBadRequest())
.andExpect(content().contentType(MediaType.APPLICATION_JSON_UTF8))
.andExpect(content().string(expectedMessage));
}

@Test
void whenSearchTermNotExistsInDBThenCellTypeSearchReturnsEmptySet() throws Exception {
var geneNotPartOfAnyExperiments = "ENSRNA049444660";
// Some gene IDs don’t have a symbol, e.g. ERCC-00044
// Also, it turns out that some gene symbols like Vmn1r216 match more than one gene ID within the same species:
// ENSMUSG00000115697 and ENSMUSG00000116057
// We don’t want any of those pesky gene IDs!
var matchingSymbols = bioEntityPropertyDao.fetchPropertyValuesForGeneId(geneId, SYMBOL);
while (matchingSymbols.isEmpty() ||
bioEntityPropertyDao.fetchGeneIdsForPropertyValue(
SYMBOL, matchingSymbols.iterator().next()).size() > 1) {
geneId = jdbcTestUtils.fetchRandomMarkerGeneFromSingleCellExperiment(accessionId);
matchingSymbols = bioEntityPropertyDao.fetchPropertyValuesForGeneId(geneId, SYMBOL);
}

this.mockMvc.perform(get("/json/gene-search/cell-types").param("ensgene", geneNotPartOfAnyExperiments))
.andExpect(status().isOk())
.andExpect(content().contentType(MediaType.APPLICATION_JSON_UTF8))
.andExpect(jsonPath("$", hasSize(equalTo(0))));
}
var solrQueryBuilder =
new SolrQueryBuilder<BioentitiesCollectionProxy>()
.addQueryFieldByTerm(BIOENTITY_IDENTIFIER, geneId)
.addQueryFieldByTerm(PROPERTY_NAME, "symbol")
.setFieldList(PROPERTY_VALUE)
.setFieldList(SPECIES)
.setRows(1);

@Test
void whenSearchTermExistsInDBThenReturnsSetOfCellType() throws Exception {
var shouldBeGeneThatPartOfExperiments =
jdbcTestUtils.fetchRandomGeneFromSingleCellExperiment(
jdbcTestUtils.fetchRandomExperimentAccession()
);
var docList = bioentitiesCollectionProxy.query(solrQueryBuilder).getResults();
var symbol = docList.get(0).getFieldValue(PROPERTY_VALUE.name()).toString();
var species = docList.get(0).getFieldValue(SPECIES.name()).toString();

this.mockMvc.perform(get("/json/gene-search/cell-types").param("ensgene", shouldBeGeneThatPartOfExperiments))
.andExpect(status().isOk())
.andExpect(content().contentType(MediaType.APPLICATION_JSON_UTF8))
.andExpect(jsonPath("$", hasSize(greaterThanOrEqualTo(1))));
return Map.ofEntries(
Map.entry("geneId", geneId),
Map.entry("symbol", symbol),
Map.entry("species", species)
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

import javax.inject.Inject;
import javax.sql.DataSource;

import java.util.HashSet;

import static org.assertj.core.api.Assertions.assertThat;
Expand Down Expand Up @@ -183,8 +182,8 @@ void whenValidCellIdsAndValidProvidedReturnSetOfCellTypes() {
var randomListOfCellIDs =
ImmutableSet.copyOf(
new HashSet<>(jdbcUtils.fetchRandomListOfCells(3)));
ImmutableSet<String> organismParts = solrUtils.fetchedRandomOrganismPartsByCellIDs(
randomListOfCellIDs, 1);
var organismParts = solrUtils.fetchedRandomOrganismPartsByCellIDs(
randomListOfCellIDs, 1);

var cellTypes = subject.searchCellTypes(randomListOfCellIDs, organismParts);

Expand Down
Loading