From a89cd5081ac5d78784ceea795f434537d334dc60 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?K=C3=A1roly=20Erd=C5=91s?= <2183410+ke4@users.noreply.github.com> Date: Tue, 26 Nov 2024 18:01:16 +0000 Subject: [PATCH] Fix ordering of the cluster labels in the marker gene heat map (#488) --- .../JsonMarkerGenesController.java | 18 +++--- .../markergenes/ClusterNameComparator.java | 23 ++++++++ .../markergenes/HighchartsHeatmapAdapter.java | 42 ++++++-------- ...eneComparatorByCellGroupValueByMarker.java | 55 ------------------- ...ComparatorByCellGroupValueWhereMarker.java | 13 +++++ ...perimentCellTypeMarkerGenesController.java | 5 +- .../HighchartsHeatmapAdapterTest.java | 16 +++--- ...imentCellTypeMarkerGenesControllerWIT.java | 1 - 8 files changed, 71 insertions(+), 102 deletions(-) create mode 100644 app/src/main/java/uk/ac/ebi/atlas/experimentpage/markergenes/ClusterNameComparator.java delete mode 100644 app/src/main/java/uk/ac/ebi/atlas/experimentpage/markergenes/MarkerGeneComparatorByCellGroupValueByMarker.java create mode 100644 app/src/main/java/uk/ac/ebi/atlas/experimentpage/markergenes/MarkerGeneComparatorByCellGroupValueWhereMarker.java diff --git a/app/src/main/java/uk/ac/ebi/atlas/experimentpage/JsonMarkerGenesController.java b/app/src/main/java/uk/ac/ebi/atlas/experimentpage/JsonMarkerGenesController.java index a14230625..805f9f179 100644 --- a/app/src/main/java/uk/ac/ebi/atlas/experimentpage/JsonMarkerGenesController.java +++ b/app/src/main/java/uk/ac/ebi/atlas/experimentpage/JsonMarkerGenesController.java @@ -30,27 +30,27 @@ public JsonMarkerGenesController(HighchartsHeatmapAdapter highchartsHeatmapAdapt public String getClusterMarkerGenes(@PathVariable String experimentAccession, @RequestParam String k) { return GSON.toJson( - highchartsHeatmapAdapter.getMarkerGeneHeatmapDataSortedNaturally( - markerGeneService.getMarkerGenesPerCluster(experimentAccession, k) - )); + highchartsHeatmapAdapter.getSortedMarkerGeneData( + markerGeneService.getMarkerGenesPerCluster(experimentAccession, k) + )); } @GetMapping(value = "/json/experiments/{experimentAccession}/marker-genes/cell-types", produces = MediaType.APPLICATION_JSON_UTF8_VALUE) public String getCellTypeMarkerGenes(@PathVariable String experimentAccession, @RequestParam Set organismPart) { - return GSON.toJson(highchartsHeatmapAdapter.getMarkerGeneHeatmapDataSortedLexicographically( - markerGeneService.getCellTypeMarkerGeneProfile(experimentAccession, ImmutableSet.copyOf(organismPart)) + return GSON.toJson(highchartsHeatmapAdapter.getSortedMarkerGeneData( + markerGeneService.getCellTypeMarkerGeneProfile(experimentAccession, ImmutableSet.copyOf(organismPart)) )); } @GetMapping(value = "/json/experiments/{experimentAccession}/marker-genes-heatmap/cell-types", produces = MediaType.APPLICATION_JSON_UTF8_VALUE) public String getCellTypeMarkerGenesHeatmapData(@PathVariable String experimentAccession, - @RequestParam String cellGroupType) { - return GSON.toJson(highchartsHeatmapAdapter.getMarkerGeneHeatmapDataSortedLexicographically( + @RequestParam String cellGroupType) { + return GSON.toJson( + highchartsHeatmapAdapter.getSortedMarkerGeneData( markerGeneService.getCellTypeMarkerGeneHeatmapData(experimentAccession, cellGroupType) - )); + )); } - } diff --git a/app/src/main/java/uk/ac/ebi/atlas/experimentpage/markergenes/ClusterNameComparator.java b/app/src/main/java/uk/ac/ebi/atlas/experimentpage/markergenes/ClusterNameComparator.java new file mode 100644 index 000000000..36d596921 --- /dev/null +++ b/app/src/main/java/uk/ac/ebi/atlas/experimentpage/markergenes/ClusterNameComparator.java @@ -0,0 +1,23 @@ +package uk.ac.ebi.atlas.experimentpage.markergenes; + +import java.text.Collator; +import java.util.Comparator; +import java.util.Locale; + +class ClusterNameComparator implements Comparator { + + @Override + public int compare(String o1, String o2) { + final String regexForNumeric = "^[0-9]*$"; + + if (o1.matches(regexForNumeric) && o2.matches(regexForNumeric)) { + return Integer.compare(Integer.parseInt(o1), Integer.parseInt(o2)); + } else if (o1.matches(regexForNumeric)) { + return 1; + } else if (o2.matches(regexForNumeric)) { + return -1; + } else { + return Collator.getInstance(Locale.US).compare(o1, o2); + } + } +} diff --git a/app/src/main/java/uk/ac/ebi/atlas/experimentpage/markergenes/HighchartsHeatmapAdapter.java b/app/src/main/java/uk/ac/ebi/atlas/experimentpage/markergenes/HighchartsHeatmapAdapter.java index 7d0e1f195..31ff29fcd 100644 --- a/app/src/main/java/uk/ac/ebi/atlas/experimentpage/markergenes/HighchartsHeatmapAdapter.java +++ b/app/src/main/java/uk/ac/ebi/atlas/experimentpage/markergenes/HighchartsHeatmapAdapter.java @@ -12,7 +12,6 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableSet.toImmutableSet; -import static java.util.Comparator.comparing; import static java.util.stream.Collectors.groupingBy; @Component @@ -20,27 +19,20 @@ public class HighchartsHeatmapAdapter { private static final Function> MARKER_GENE_ID_TO_CELL_GROUP_VALUE_WHERE_MARKER = markerGene -> Pair.of(markerGene.geneId(), markerGene.cellGroupValueWhereMarker()); - private static final Comparator CELL_GROUP_VALUE_WHERE_MARKER_LEXICOGRAPHICAL = - comparing(MarkerGene::cellGroupValueWhereMarker).thenComparing(MarkerGene::pValue); - private static final Comparator MARKER_NATURALLY_ORDERED_BY_CELL_GROUP_VALUE = - new MarkerGeneComparatorByCellGroupValueByMarker(); + new MarkerGeneComparatorByCellGroupValueWhereMarker(); private static final Comparator MARKER_GENE_COMPARATOR = MARKER_NATURALLY_ORDERED_BY_CELL_GROUP_VALUE.thenComparing(MarkerGene::pValue); + private static final Comparator CLUSTER_NAME_COMPARATOR = new ClusterNameComparator(); + private final BioEntityPropertyDao bioEntityPropertyDao; public HighchartsHeatmapAdapter(BioEntityPropertyDao bioEntityPropertyDao) { this.bioEntityPropertyDao = bioEntityPropertyDao; } - public ImmutableList> getMarkerGeneHeatmapDataSortedNaturally - (Collection markerGenes) { - - return getMarkerGeneHeatmapData(markerGenes, MARKER_GENE_COMPARATOR); - } - /** * Given a list of marker genes, this method returns a Highcharts data series object * (heatmap data for series), where gene IDs/symbols are @@ -49,16 +41,21 @@ public HighchartsHeatmapAdapter(BioEntityPropertyDao bioEntityPropertyDao) { * The rows of the heatmap are ordered by the cell type, i.e. genes for celltype 1, 2, etc. * If there are no marker genes for a cell group, then no rows will be present in the data. */ - public ImmutableList> getMarkerGeneHeatmapDataSortedLexicographically( - Collection markerGenes) { + public ImmutableList> getSortedMarkerGeneData + (Collection markerGenes) { + var sortedMarkerGenes = getSortedMarkerGenes(markerGenes); + + var rows = getRowsFromSortedMarkerGenes(sortedMarkerGenes); + + var columns = getColumnsFromSortedMarkerGenes(sortedMarkerGenes); - return getMarkerGeneHeatmapData(markerGenes, CELL_GROUP_VALUE_WHERE_MARKER_LEXICOGRAPHICAL); + return getMarkerGeneHeatmapData(sortedMarkerGenes, rows, columns); } - private ImmutableList getSortedMarkerGenes(Collection markerGenes, Comparator markerGeneComparator) { + private ImmutableList getSortedMarkerGenes(Collection markerGenes) { return mergeSameGeneIdIntoSingleGroup(markerGenes).stream() .parallel() - .sorted(markerGeneComparator) + .sorted(MARKER_GENE_COMPARATOR) .collect(toImmutableList()); } @@ -73,18 +70,13 @@ private static ImmutableList getColumnsFromSortedMarkerGenes(ImmutableLi return sortedMarkerGenes.stream() .map(MarkerGene::cellGroupValue) .distinct() - .sorted() + .sorted(CLUSTER_NAME_COMPARATOR) .collect(toImmutableList()); } - private ImmutableList> getMarkerGeneHeatmapData(Collection markerGenes, - Comparator markerGeneComparator) { - var sortedMarkerGenes = getSortedMarkerGenes(markerGenes, markerGeneComparator); - - var rows = getRowsFromSortedMarkerGenes(sortedMarkerGenes); - - var columns = getColumnsFromSortedMarkerGenes(sortedMarkerGenes); - + private ImmutableList> getMarkerGeneHeatmapData(Collection sortedMarkerGenes, + ImmutableList> rows, + ImmutableList columns) { var symbolsForGeneIds = bioEntityPropertyDao.getSymbolsForGeneIds( sortedMarkerGenes.stream().map(MarkerGene::geneId).collect(toImmutableSet())); diff --git a/app/src/main/java/uk/ac/ebi/atlas/experimentpage/markergenes/MarkerGeneComparatorByCellGroupValueByMarker.java b/app/src/main/java/uk/ac/ebi/atlas/experimentpage/markergenes/MarkerGeneComparatorByCellGroupValueByMarker.java deleted file mode 100644 index f483c84a0..000000000 --- a/app/src/main/java/uk/ac/ebi/atlas/experimentpage/markergenes/MarkerGeneComparatorByCellGroupValueByMarker.java +++ /dev/null @@ -1,55 +0,0 @@ -package uk.ac.ebi.atlas.experimentpage.markergenes; - -import java.util.Comparator; - -class MarkerGeneComparatorByCellGroupValueByMarker implements Comparator { - - public int compare(MarkerGene marker1, MarkerGene marker2) { - var marker1CellGroupValue = marker1.cellGroupValueWhereMarker(); - var marker2CellGroupValue = marker2.cellGroupValueWhereMarker(); - - int len1 = marker1CellGroupValue.length(); - int len2 = marker2CellGroupValue.length(); - int i = 0; - while (markerValueNotEnded(i, len1, len2)) { - char c1 = marker1CellGroupValue.charAt(i); - char c2 = marker2CellGroupValue.charAt(i); - if (isBothNonNumeric(c1, c2)) { - // Non-numeric characters, compare as usual - if (c1 != c2) { - return c1 - c2; - } - } else if (isNonNumeric(c1)) { - // marker1CellGroupValue has non-numeric prefix, marker2CellGroupValue is numeric - return -1; - } else if (isNonNumeric(c2)) { - // marker2CellGroupValue has non-numeric prefix, marker1CellGroupValue is numeric - return 1; - } else { - // Both strings have numeric parts, compare as integers - return compareAsNumeric(marker1CellGroupValue, marker2CellGroupValue, i); - } - i++; - } - // Handle strings with different lengths - return Integer.compare(len1, len2); - } - - private static boolean markerValueNotEnded(int i, int len1, int len2) { - return i < len1 && i < len2; - } - - private static boolean isNonNumeric(char c1) { - return !Character.isDigit(c1); - } - - private static boolean isBothNonNumeric(char c1, char c2) { - return isNonNumeric(c1) && isNonNumeric(c2); - } - - private static int compareAsNumeric(String marker1CellGroupValue, String marker2CellGroupValue, int i) { - int num1 = Integer.parseInt(marker1CellGroupValue.substring(i)); - int num2 = Integer.parseInt(marker2CellGroupValue.substring(i)); - return Integer.compare(num1, num2); - } -} diff --git a/app/src/main/java/uk/ac/ebi/atlas/experimentpage/markergenes/MarkerGeneComparatorByCellGroupValueWhereMarker.java b/app/src/main/java/uk/ac/ebi/atlas/experimentpage/markergenes/MarkerGeneComparatorByCellGroupValueWhereMarker.java new file mode 100644 index 000000000..97fcd76ae --- /dev/null +++ b/app/src/main/java/uk/ac/ebi/atlas/experimentpage/markergenes/MarkerGeneComparatorByCellGroupValueWhereMarker.java @@ -0,0 +1,13 @@ +package uk.ac.ebi.atlas.experimentpage.markergenes; + +import java.util.Comparator; + +class MarkerGeneComparatorByCellGroupValueWhereMarker implements Comparator { + + public int compare(MarkerGene marker1, MarkerGene marker2) { + var clusterNameComparator = new ClusterNameComparator(); + + return clusterNameComparator.compare( + marker1.cellGroupValueWhereMarker(),marker2.cellGroupValueWhereMarker()); + } +} diff --git a/app/src/main/java/uk/ac/ebi/atlas/search/metadata/JsonMultiexperimentCellTypeMarkerGenesController.java b/app/src/main/java/uk/ac/ebi/atlas/search/metadata/JsonMultiexperimentCellTypeMarkerGenesController.java index 64a3aed76..dcbbed0c0 100644 --- a/app/src/main/java/uk/ac/ebi/atlas/search/metadata/JsonMultiexperimentCellTypeMarkerGenesController.java +++ b/app/src/main/java/uk/ac/ebi/atlas/search/metadata/JsonMultiexperimentCellTypeMarkerGenesController.java @@ -1,7 +1,6 @@ package uk.ac.ebi.atlas.search.metadata; import com.google.common.collect.ImmutableSet; -import org.jetbrains.annotations.NotNull; import org.springframework.http.MediaType; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.PathVariable; @@ -10,8 +9,6 @@ import uk.ac.ebi.atlas.controllers.JsonExceptionHandlingController; import uk.ac.ebi.atlas.experimentpage.markergenes.HighchartsHeatmapAdapter; -import java.net.URLDecoder; -import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.util.Base64; import java.util.Collection; @@ -36,7 +33,7 @@ public String getCellTypeMarkerGenes( @PathVariable String cellType, @RequestParam(name = "experiment-accessions", required = false) Collection experimentAccessions) { return GSON.toJson( - highchartsHeatmapAdapter.getMarkerGeneHeatmapDataSortedLexicographically( + highchartsHeatmapAdapter.getSortedMarkerGeneData( experimentAccessions == null ? multiexperimentCellTypeMarkerGenesService.getCellTypeMarkerGeneProfile( getDecodedCellType(cellType)) : diff --git a/app/src/test/java/uk/ac/ebi/atlas/experimentpage/markergenes/HighchartsHeatmapAdapterTest.java b/app/src/test/java/uk/ac/ebi/atlas/experimentpage/markergenes/HighchartsHeatmapAdapterTest.java index 9f952d212..d038aac55 100644 --- a/app/src/test/java/uk/ac/ebi/atlas/experimentpage/markergenes/HighchartsHeatmapAdapterTest.java +++ b/app/src/test/java/uk/ac/ebi/atlas/experimentpage/markergenes/HighchartsHeatmapAdapterTest.java @@ -55,7 +55,7 @@ void markerGeneWithoutSymbolHasGeneIDAsName() { MarkerGene.create(randomGeneIds.get(2), "1", "5", 0.001, "6", 1000, 10000, EXPRESSION_UNIT)); - var result = subject.getMarkerGeneHeatmapDataSortedNaturally(markerGenes); + var result = subject.getSortedMarkerGeneData(markerGenes); assertThat(result).hasSize(3); assertThat(result).element(0).extracting("geneName").containsOnly(randomGeneSymbols.get(0)); @@ -85,7 +85,7 @@ void cellTypeMarkerGeneWithoutSymbolHasGeneIDAsName() { "T cell", 0.001, "B cell", 1000, 10000, EXPRESSION_UNIT)); - var result = subject.getMarkerGeneHeatmapDataSortedLexicographically(cellTypeMarkerGenes); + var result = subject.getSortedMarkerGeneData(cellTypeMarkerGenes); assertThat(result).hasSize(3); assertThat(result).element(0).extracting("geneName").containsOnly(randomGeneSymbols.get(0)); @@ -131,15 +131,15 @@ void mergesMultipleGeneIdsAcrossGroupsIntoOneRowWithLowestPValue() { when(bioEntityPropertyDaoMock.getSymbolsForGeneIds(ImmutableSet.of(geneId))) .thenReturn(ImmutableMap.of(geneId, geneId)); - assertThat(subject.getMarkerGeneHeatmapDataSortedLexicographically(ImmutableSet.of(markerGene1, markerGene2))) + assertThat(subject.getSortedMarkerGeneData(ImmutableSet.of(markerGene1, markerGene2))) .extracting("y") .containsOnly(0); - assertThat(subject.getMarkerGeneHeatmapDataSortedNaturally(ImmutableSet.of(markerGene1, markerGene2))) + assertThat(subject.getSortedMarkerGeneData(ImmutableSet.of(markerGene1, markerGene2))) .extracting("y") .containsOnly(0); - assertThat(subject.getMarkerGeneHeatmapDataSortedNaturally(ImmutableSet.of(markerGene1, markerGene2))) + assertThat(subject.getSortedMarkerGeneData(ImmutableSet.of(markerGene1, markerGene2))) .extracting("cellGroupValueWhereMarker") .containsOnly(cellGroupValueWhereMarker2); } @@ -167,7 +167,7 @@ void whenMarkerGeneHasNumericalClusterNames_theySortedCorrectly() { MarkerGene.create(randomGeneIds.get(2), "1", cellGroupValueWhereMarkers[2], 0.001, "6", 1000, 10000, EXPRESSION_UNIT)); - var result = subject.getMarkerGeneHeatmapDataSortedNaturally(markerGenes); + var result = subject.getSortedMarkerGeneData(markerGenes); assertThat(result).hasSize(3); assertThat(result).element(0).extracting("cellGroupValueWhereMarker") @@ -204,7 +204,7 @@ void whenMarkerGeneHasNotOnlyNumericalClusterNames_theySortedCorrectly() { MarkerGene.create(randomGeneIds.get(3), "1", cellGroupValueWhereMarkers[3], 0.001, "6", 1000, 10000, EXPRESSION_UNIT)); - var result = subject.getMarkerGeneHeatmapDataSortedNaturally(markerGenes); + var result = subject.getSortedMarkerGeneData(markerGenes); assertThat(result).hasSize(4); assertThat(result).element(0).extracting("cellGroupValueWhereMarker") @@ -243,7 +243,7 @@ void whenMarkerGeneHasOnlyStringClusterNames_theySortedCorrectly() { MarkerGene.create(randomGeneIds.get(3), "1", cellGroupValueWhereMarkers[3], 0.001, "6", 1000, 10000, EXPRESSION_UNIT)); - var result = subject.getMarkerGeneHeatmapDataSortedNaturally(markerGenes); + var result = subject.getSortedMarkerGeneData(markerGenes); assertThat(result).hasSize(4); assertThat(result).element(0).extracting("cellGroupValueWhereMarker") diff --git a/app/src/test/java/uk/ac/ebi/atlas/search/metadata/JsonMultiexperimentCellTypeMarkerGenesControllerWIT.java b/app/src/test/java/uk/ac/ebi/atlas/search/metadata/JsonMultiexperimentCellTypeMarkerGenesControllerWIT.java index d3d788eac..4729bdbbc 100644 --- a/app/src/test/java/uk/ac/ebi/atlas/search/metadata/JsonMultiexperimentCellTypeMarkerGenesControllerWIT.java +++ b/app/src/test/java/uk/ac/ebi/atlas/search/metadata/JsonMultiexperimentCellTypeMarkerGenesControllerWIT.java @@ -14,7 +14,6 @@ import org.springframework.web.context.WebApplicationContext; import uk.ac.ebi.atlas.configuration.TestConfig; -import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import java.util.Base64;