From 1d97d49603d6c88efaac61b52eeb881d5b94bfba Mon Sep 17 00:00:00 2001 From: Andrew Byrd Date: Fri, 2 Feb 2024 16:26:36 +0800 Subject: [PATCH] list of detailed itinerary entries under summary there is still no selected-link label on the detailed iterations/paths. They are just ordered immediately below the summary including them. --- .../r5/analyst/cluster/PathResult.java | 265 +++++++++++------- .../java/com/conveyal/r5/common/Util.java | 22 ++ 2 files changed, 179 insertions(+), 108 deletions(-) diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java b/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java index bc60772bd..1e5bef0cc 100644 --- a/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java +++ b/src/main/java/com/conveyal/r5/analyst/cluster/PathResult.java @@ -7,6 +7,7 @@ import com.conveyal.r5.transit.path.RouteSequence; import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; +import com.google.common.collect.Multimaps; import gnu.trove.list.TIntList; import gnu.trove.list.array.TIntArrayList; import gnu.trove.set.TIntSet; @@ -25,6 +26,7 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; +import static com.conveyal.r5.common.Util.newFixedSizeList; import static com.google.common.base.Preconditions.checkState; /** @@ -65,10 +67,10 @@ public class PathResult { private int[] nUnfilteredIterationsReachingDestination; /** - * In case the scenario includes multiple select-link modifications, we store separate groups of patterns - * for each selected link, at each destination. + * In case the scenario includes multiple select-link modifications, for each selected link we store separate + * groups of iterations at each destination. */ - private List>[] iterationsForEachSelectedLink; + private List[]> iterationsForDestinationForSelectedLink; private final TransitLayer transitLayer; @@ -101,152 +103,199 @@ public PathResult(AnalysisWorkerTask task, TransitLayer transitLayer) { // Only allocate these large arrays when select-link modifications are present. if (transitLayer.parentNetwork.selectedLinks != null) { nUnfilteredIterationsReachingDestination = new int[nDestinations]; - iterationsForEachSelectedLink = new ArrayList[nDestinations]; + iterationsForDestinationForSelectedLink = newFixedSizeList( + transitLayer.parentNetwork.selectedLinks.size(), + () -> new Multimap[nDestinations] + ); } this.transitLayer = transitLayer; } /** * Populate the multimap of path templates to iterations, reducing by using route-based keys instead of - * pattern-based keys + * pattern-based keys. + * TODO maybe we should be converting this to its final CSV form in a streaming manner instead of storing it. */ public void setTarget(int targetIndex, Multimap patterns) { // When selected link analysis is enabled, filter down the PatternSequence-Iteration Multimap to retain only // those keys passing through the selected links. // Maybe selectedLink instance should be on TransitLayer not TransportNetwork. - if (transitLayer.parentNetwork.selectedLinks != null) { + final var selectedLinks = transitLayer.parentNetwork.selectedLinks; + if (selectedLinks != null) { // The size of a multimap is the number of mappings (number of values), not number of unique keys. // This size method appears to be O(1), see: com.google.common.collect.AbstractMapBasedMultimap.size nUnfilteredIterationsReachingDestination[targetIndex] = patterns.size(); - // Unfortunate Java "effectively final" silliness, but older iteration style gets ugly here too. - final var finalPatterns = patterns; - iterationsForEachSelectedLink[targetIndex] = transitLayer.parentNetwork.selectedLinks.stream() - .map(selectedLink -> selectedLink.filterPatterns(finalPatterns)) - .map(this::collapsePatternsToRoutes) - .collect(Collectors.toList()); + for (int i = 0; i < selectedLinks.size(); i++) { + var selectedLink = selectedLinks.get(i); + var iterationsForDestination = iterationsForDestinationForSelectedLink.get(i); + iterationsForDestination[targetIndex] = collapsePatternsToRoutes(selectedLink.filterPatterns(patterns)); + } // Combining all the iterations from many select-links is tricky or nonsensical. They represent different // locations with different proportions of iterations passing through them, possibly grouped differently // into routes. It's probably best to list the non-summed iterations separately for each select-link. } else { - // Standard case: no SelectedLink filtered down the patterns-iterations map. These maps will be left - // null if the filtering did happen, to clearly distinguish between the two cases. + // Standard case: no SelectedLink filtered down the patterns-iterations map. The elements of this array + // will be left null if the filtering did happen, clearly distinguishing between the two cases. iterationsForPathTemplates[targetIndex] = collapsePatternsToRoutes(patterns); } } - // Cannot be static as it uses the transitlayer field to look up routes. + /** + * Given a map from PatternSequences to the Iterations that used those sequences of patterns, merge the map + * entries according to which route the patterns belong to. + * The only thing preventing this method from being static is that it uses the transitLayer to look up route names. + */ private Multimap collapsePatternsToRoutes ( - Multimap patterns + Multimap patternIterations ) { - final Multimap routes = HashMultimap.create(); - patterns.forEach(((patternSeq, iteration) -> routes.put(new RouteSequence(patternSeq, transitLayer), iteration))); - return routes; + // This could probably be replaced with ImmutableListMultimap.toImmutableListMultimap collector. + final Multimap routeIterations = HashMultimap.create(); + patternIterations.forEach(((patternSeq, iteration) -> + routeIterations.put(new RouteSequence(patternSeq, transitLayer), iteration)) + ); + return routeIterations; } /** * Summary of iterations for each destination, suitable for writing to a CSV. Conversion to strings happens here * (on distributed workers) to minimize pressure on the central Broker's assembler. + * TODO this would probably benefit from factoring out a bunch of named record types (return and parameter types). * * @param stat whether the reported per-leg wait times should correspond to the iteration with the minimum or mean * total waiting time. * @return For each destination, a list of String[]s summarizing path templates. For each path template, details * of the itinerary with waiting time closest to the requested stat are included. */ - public ArrayList[] summarizeIterations(Stat stat) { - ArrayList[] summary = new ArrayList[nDestinations]; + public ArrayList[] summarizeIterations (Stat stat) { + ArrayList[] csvRowsForDestinations = new ArrayList[nDestinations]; + final var selectedLinks = transitLayer.parentNetwork.selectedLinks; + // For each destination, build up a list of CSV rows representing iteration paths that reached that destination. + // Iteration order (over destinations, then selected links) is dictated by distributed computation. Workers + // are working on one origin at a time, so while we might prefer the final file to be ordered by selected link, + // without a final sort of the entire file the selected links would end up interleaved inside each origin. for (int d = 0; d < nDestinations; d++) { - // Hold all the summary CSV rows for this particular destination - summary[d] = new ArrayList<>(); - // SelectedLink case: collapse all RouteSequences and Iterations for this OD pair into one to simplify. - // iterationMap is empty (not null) for destinations that were reached without using the selected link. - // This could also be done by merging all Iterations under a single RouteSequence with all route IDs. - // Looks like array may be non-null but with null values if transit search fails (no origin linkage). - if (iterationsForEachSelectedLink != null && iterationsForEachSelectedLink[d] != null) { - // TODO transpose to list of arrays instead of array of lists, factor selectedLink iteration outside this method call - int n = 0; // TODO handle this list-zip better (Streams.zip()) - for (var patternsForSelectedLink : iterationsForEachSelectedLink[d]) { - String selectedLinkLabel = transitLayer.parentNetwork.selectedLinks.get(n++).label; - int nIterations = 0; - TIntSet allRouteIds = new TIntHashSet(); - double summedTotalTime = 0; - for (RouteSequence routeSequence : patternsForSelectedLink.keySet()) { - Collection iterations = patternsForSelectedLink.get(routeSequence); - nIterations += iterations.size(); - allRouteIds.addAll(routeSequence.routes); - summedTotalTime += iterations.stream().mapToInt(i -> i.totalTime).sum(); - } - // Many destinations will have no iterations at all passing through the SelectedLink area. - // Skip those to keep the CSV output short (and to avoid division by zero below). - if (nIterations == 0) { - continue; + if (selectedLinks != null) { + ArrayList csvRows = new ArrayList<>(); + for (int s = 0; s < selectedLinks.size(); s++) { + var selectedLink = selectedLinks.get(s); + var iterationsMap = iterationsForDestinationForSelectedLink.get(s)[d]; + var summaryRow = iterationsToSummaryCsvRow(iterationsMap, selectedLink.label, d); + if (summaryRow != null) { + csvRows.add(summaryRow); + csvRows.addAll(iterationsToCsvRows(iterationsMap, d, stat)); } - String[] row = new String[DATA_COLUMNS.length]; - Arrays.fill(row, "ALL"); - transitLayer.routeString(1, true); - String allRouteIdsPipeSeparated = Arrays.stream(allRouteIds.toArray()) - // If includeName is set to false we record only the ID without the name. - // Name works better than ID for routes added by modifications, which have random IDs. - .mapToObj(ri -> transitLayer.routeString(ri, true)) - .collect(Collectors.joining("|")); - String iterationProportion = "%.3f".formatted( - nIterations / (double) (nUnfilteredIterationsReachingDestination[d])); - row[0] = selectedLinkLabel + ": " + allRouteIdsPipeSeparated; - row[row.length - 1] = iterationProportion; - // Report average of total time over all retained iterations, different than mean/min approach below. - row[row.length - 2] = String.format("%.1f", summedTotalTime / nIterations / 60d); - summary[d].add(row); - // TODO call the standard case below, so the summary row is followed by its component parts. - // We could optionally continue to the next outer for loop iteration here, to return only the summary row. } + csvRowsForDestinations[d] = csvRows; + } else { + csvRowsForDestinations[d] = iterationsToCsvRows(iterationsForPathTemplates[d], d, stat); } - // Standard (non SelectedLink) case. - Multimap iterationMap = iterationsForPathTemplates[d]; - // It looks like this could just be: if (iterationmap == null) continue; (to clarify without indentation) - // When some destinations are reached, any unreached ones are empty maps rather than nulls. They will be - // null if no search happens at all (origin is not connected to network) in which case maybe we should skip - // this step. This makes and stores an array of CSV rows as arrayLists, one for each destination. - if (iterationMap != null) { - for (RouteSequence routeSequence: iterationMap.keySet()) { - Collection iterations = iterationMap.get(routeSequence); - int nIterations = iterations.size(); - checkState(nIterations > 0, "A path was stored without any iterations"); - String waits = null, transfer = null, totalTime = null; - String[] path = routeSequence.detailsWithGtfsIds(transitLayer, true); - double targetValue; - IntStream totalWaits = iterations.stream().mapToInt(i -> i.waitTimes.sum()); - if (stat == Stat.MINIMUM) { - targetValue = totalWaits.min().orElse(-1); - } else if (stat == Stat.MEAN){ - targetValue = totalWaits.average().orElse(-1); - } else { - throw new RuntimeException("Unrecognized statistic for path summary"); - } - double score = Double.MAX_VALUE; - for (Iteration iteration: iterations) { - // TODO clean up, maybe re-using approaches from PathScore? - double thisScore = Math.abs(targetValue - iteration.waitTimes.sum()); - if (thisScore < score) { - StringJoiner waitTimes = new StringJoiner("|"); - iteration.waitTimes.forEach(w -> { - waitTimes.add(String.format("%.1f", w / 60f)); - return true; - }); - waits = waitTimes.toString(); - transfer = String.format("%.1f", routeSequence.stopSequence.transferTime(iteration) / 60f); - totalTime = String.format("%.1f", iteration.totalTime / 60f); - if (thisScore == 0) break; - score = thisScore; - } - } - // Check above guarantees that nIterations is nonzero, so total iterations must be nonzero, - // avoiding divide by zero. - String iterationProportion = "%.3f".formatted( - nIterations / (double)(nUnfilteredIterationsReachingDestination[d])); - String[] row = ArrayUtils.addAll(path, transfer, waits, totalTime, iterationProportion); - checkState(row.length == DATA_COLUMNS.length); - summary[d].add(row); + } + return csvRowsForDestinations; + } + + /** + * SelectedLink case: collapse all RouteSequences and Iterations for this OD pair into one simple summary. + * patternIteration is empty (not null) for destinations that were reached without using the selected link. + * Looks like array may be non-null but with null values if transit search fails (no origin linkage). + * Returns null if there are no iterations here, indicating that for this selected link, this destination need not + * be included in the CSV output. + * The only thing preventing this method from being static is that it uses the transitLayer to look up route names. + */ + private String[] iterationsToSummaryCsvRow ( + Multimap patternIterations, String selectedLinkLabel, int destinationIndex + ) { + // If no transit search occurred, the iterations map may be null rather than empty. + // We could entirely skip the CSV writing in this case, but full (non-select-link) results may want + // output rows for every OD pair even when the origin did not use any transit. + if (patternIterations == null) { + return null; + } + int nIterations = 0; + TIntSet allRouteIds = new TIntHashSet(); + double summedTotalTime = 0; + for (RouteSequence routeSequence : patternIterations.keySet()) { + Collection iterations = patternIterations.get(routeSequence); + nIterations += iterations.size(); + allRouteIds.addAll(routeSequence.routes); + summedTotalTime += iterations.stream().mapToInt(i -> i.totalTime).sum(); + } + // Many destinations will have no iterations at all passing through the SelectedLink area. + // Skip those to keep the CSV output short (and to avoid division by zero below). + if (nIterations == 0) { + return null; + } + String[] row = new String[DATA_COLUMNS.length]; + Arrays.fill(row, "ALL"); + String allRouteIdsPipeSeparated = Arrays.stream(allRouteIds.toArray()) + // If includeName is set to false we record only the ID without the name. + // Name works better than ID for routes added by modifications, which have random IDs. + .mapToObj(ri -> transitLayer.routeString(ri, true)) + .collect(Collectors.joining("|")); + String iterationProportion = "%.3f".formatted( + nIterations / (double) (nUnfilteredIterationsReachingDestination[destinationIndex])); + row[0] = selectedLinkLabel + ": " + allRouteIdsPipeSeparated; + row[row.length - 1] = iterationProportion; + // Report average of total time over all retained iterations, different than mean/min approach below. + row[row.length - 2] = String.format("%.1f", summedTotalTime / nIterations / 60d); + return row; + } + + /** + * Standard (non SelectedLink) case. + * Factored out to allow repeated calls generating multiple sets of CSV rows when multiple select-links present. + * The only thing preventing this method from being static is that it uses the transitLayer to look up route names. + */ + private ArrayList iterationsToCsvRows ( + Multimap iterationMap, int destinationIndex, Stat stat + ) { + // Hold all the summary CSV rows for this particular destination. + ArrayList summary = new ArrayList<>(); + // When some destinations are reached, any unreached ones are empty maps rather than nulls. They will be + // null if no search happens at all (origin is not connected to network) in which case maybe we should skip + // this step. + if (iterationMap == null) { + return summary; // FIXME return type does not allow List.of() or EMPTY_LIST; + } + // Make and stores an array of CSV rows as arrayLists, one for each destination. + for (RouteSequence routeSequence: iterationMap.keySet()) { + Collection iterations = iterationMap.get(routeSequence); + int nIterations = iterations.size(); + checkState(nIterations > 0, "A path was stored without any iterations"); + String waits = null, transfer = null, totalTime = null; + String[] path = routeSequence.detailsWithGtfsIds(transitLayer, true); + double targetValue; + IntStream totalWaits = iterations.stream().mapToInt(i -> i.waitTimes.sum()); + if (stat == Stat.MINIMUM) { + targetValue = totalWaits.min().orElse(-1); + } else if (stat == Stat.MEAN){ + targetValue = totalWaits.average().orElse(-1); + } else { + throw new RuntimeException("Unrecognized statistic for path summary"); + } + double score = Double.MAX_VALUE; + for (Iteration iteration: iterations) { + // TODO clean up, maybe re-using approaches from PathScore? + double thisScore = Math.abs(targetValue - iteration.waitTimes.sum()); + if (thisScore < score) { + StringJoiner waitTimes = new StringJoiner("|"); + iteration.waitTimes.forEach(w -> { + waitTimes.add(String.format("%.1f", w / 60f)); + return true; + }); + waits = waitTimes.toString(); + transfer = String.format("%.1f", routeSequence.stopSequence.transferTime(iteration) / 60f); + totalTime = String.format("%.1f", iteration.totalTime / 60f); + if (thisScore == 0) break; + score = thisScore; } } + // Check above guarantees that nIterations is nonzero, so total iterations must be nonzero, + // avoiding divide by zero. + String iterationProportion = "%.3f".formatted( + nIterations / (double)(nUnfilteredIterationsReachingDestination[destinationIndex])); + String[] row = ArrayUtils.addAll(path, transfer, waits, totalTime, iterationProportion); + checkState(row.length == DATA_COLUMNS.length); + summary.add(row); } return summary; } diff --git a/src/main/java/com/conveyal/r5/common/Util.java b/src/main/java/com/conveyal/r5/common/Util.java index c79e1c168..e1ad56cbd 100644 --- a/src/main/java/com/conveyal/r5/common/Util.java +++ b/src/main/java/com/conveyal/r5/common/Util.java @@ -1,7 +1,10 @@ package com.conveyal.r5.common; +import com.amazonaws.retry.v2.OrRetryCondition; + import java.util.Arrays; import java.util.Collection; +import java.util.List; import java.util.Map; import java.util.function.Supplier; @@ -67,6 +70,24 @@ public static int[] newIntArray (int length, int defaultValue) { return array; } + /** + * Convenience method to create an immutable list filled with new instances of a class. + */ + public static List newFixedSizeList (int n, Supplier supplier) { + return Arrays.asList(newObjectArray(n, supplier)); + } + + /** + * Convenience method to create a one-dimenational array and fill it immediately with new instances of a class. + */ + public static T[] newObjectArray (int n, Supplier supplier) { + T[] result = (T[]) new Object[n]; + for (int i = 0; i < n; i++) { + result[i] = supplier.get(); + } + return result; + } + /** * Convenience method to create a 2D array and fill it immediately with new instances of a class. * The supplier can be a method reference to a constructor like ToBeInstantiated::new, and the returned @@ -75,6 +96,7 @@ public static int[] newIntArray (int length, int defaultValue) { public static T[][] newObjectArray (int d1, int d2, Supplier supplier) { T[][] result = (T[][]) new Object[d1][d2]; for (int x = 0; x < d1; x++) { + // Could be replaced with a call to 1D newObjectArray for (int y = 0; y < d2; y++) { result[x][y] = supplier.get(); }