conveyal · abyrd · Nov 2, 2023 · Oct 18, 2023 · Oct 18, 2023 · Oct 18, 2023
diff --git a/src/main/java/com/conveyal/r5/analyst/TravelTimeComputer.java b/src/main/java/com/conveyal/r5/analyst/TravelTimeComputer.java
@@ -7,6 +7,7 @@
 import com.conveyal.r5.analyst.fare.InRoutingFareCalculator;
 import com.conveyal.r5.analyst.scenario.PickupWaitTimes;
 import com.conveyal.r5.api.util.LegMode;
+import com.conveyal.r5.common.Util;
 import com.conveyal.r5.point_to_point.builder.PointToPointQuery;
 import com.conveyal.r5.profile.DominatingList;
 import com.conveyal.r5.profile.FareDominatingList;
@@ -30,6 +31,7 @@
 
 import static com.conveyal.r5.analyst.scenario.PickupWaitTimes.NO_SERVICE_HERE;
 import static com.conveyal.r5.analyst.scenario.PickupWaitTimes.NO_WAIT_ALL_STOPS;
+import static com.conveyal.r5.common.Util.isNullOrEmpty;
 import static com.conveyal.r5.profile.PerTargetPropagater.MM_PER_METER;
 
 /**
@@ -77,19 +79,21 @@ public OneOriginResult computeTravelTimes() {
 
         // Find the set of destinations for a travel time calculation, not yet linked to the street network, and with
         // no associated opportunities. By finding the extents and destinations up front, we ensure the exact same
-        // destination pointset is used for all steps below.
+        // destination PointSet is used for all steps below.
         // This reuses the logic for finding the appropriate grid size and linking, which is now in the NetworkPreloader.
         // We could change the preloader to retain these values in a compound return type, to avoid repetition here.
         PointSet destinations;
-
         if (request instanceof  RegionalTask
             && !request.makeTauiSite
             && request.destinationPointSets[0] instanceof FreeFormPointSet
         ) {
-            // Freeform; destination pointset was set by handleOneRequest in the main AnalystWorker
+            // Freeform destinations. Destination PointSet was set by handleOneRequest in the main AnalystWorker.
+            destinations = request.destinationPointSets[0];
+        } else if (!isNullOrEmpty(request.destinationPointSets)) {
+            LOG.warn("ONLY VALID IN TESTING: Using PointSet object embedded in request outside regional analysis.");
             destinations = request.destinationPointSets[0];
         } else {
-            // Gridded (non-freeform) destinations. The extents are found differently in regional and single requests.
+            // Gridded (non-freeform) destinations. This method finds them differently for regional and single requests.
             WebMercatorExtents destinationGridExtents = request.getWebMercatorExtents();
             // Make a WebMercatorGridPointSet with the right extents, referring to the network's base grid and linkage.
             destinations = AnalysisWorkerTask.gridPointSetCache.get(destinationGridExtents, network.fullExtentGridPointSet);

diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/AnalysisWorkerTask.java b/src/main/java/com/conveyal/r5/analyst/cluster/AnalysisWorkerTask.java
@@ -214,6 +214,12 @@ of travel time (e.g. waiting) and paths should also be returned.
         REGIONAL_ANALYSIS
     }
 
+    /**
+     * WARNING This whole tree of classes contains non-primitive compound fields. Cloning WILL NOT DEEP COPY these
+     * fields. Modifying some aspects of the cloned object may modify the same aspects of the one it was cloned from.
+     * Unfortunately these classes have a large number of fields and maintaining hand written copy constructors for
+     * them might be an even greater liability than carefully choosing how to use clone().
+     */
     public AnalysisWorkerTask clone () {
         // no need to catch CloneNotSupportedException, it's caught in ProfileRequest::clone
         return (AnalysisWorkerTask) super.clone();

diff --git a/src/main/java/com/conveyal/r5/analyst/cluster/RegionalTask.java b/src/main/java/com/conveyal/r5/analyst/cluster/RegionalTask.java
@@ -80,6 +80,12 @@ public WebMercatorExtents getWebMercatorExtents() {
         }
     }
 
+    /**
+     * WARNING This whole tree of classes contains non-primitive compound fields. Cloning WILL NOT DEEP COPY these
+     * fields. Modifying some aspects of the cloned object may modify the same aspects of the one it was cloned from.
+     * Unfortunately these classes have a large number of fields and maintaining hand written copy constructors for
+     * them might be an even greater liability than carefully choosing how to use clone().
+     */
     public RegionalTask clone () {
         return (RegionalTask) super.clone();
     }

diff --git a/src/main/java/com/conveyal/r5/profile/ProfileRequest.java b/src/main/java/com/conveyal/r5/profile/ProfileRequest.java
@@ -240,6 +240,12 @@ public class ProfileRequest implements Serializable, Cloneable {
      */
     public int monteCarloDraws = 220;
 
+    /**
+     * WARNING This whole tree of classes contains non-primitive compound fields. Cloning WILL NOT DEEP COPY these
+     * fields. Modifying some aspects of the cloned object may modify the same aspects of the one it was cloned from.
+     * Unfortunately these classes have a large number of fields and maintaining hand written copy constructors for
+     * them might be an even greater liability than carefully choosing how to use clone().
+     */
     public ProfileRequest clone () {
         try {
             return (ProfileRequest) super.clone();

diff --git a/src/test/java/com/conveyal/r5/analyst/network/Distribution.java b/src/test/java/com/conveyal/r5/analyst/network/Distribution.java
@@ -1,9 +1,12 @@
 package com.conveyal.r5.analyst.network;
 
 import com.conveyal.r5.analyst.cluster.TravelTimeResult;
+import com.google.common.base.Preconditions;
 
 import java.util.Arrays;
 
+import static java.lang.Math.pow;
+import static java.lang.Math.sqrt;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 /**
@@ -90,6 +93,10 @@ public void normalize () {
         }
     }
 
+    /**
+     * Print a text-based representation of the distribution to standard out.
+     * There is another method to show the distribution in a graphical plot window.
+     */
     public void illustrate () {
         final int width = 50;
         double max = Arrays.stream(masses).max().getAsDouble();
@@ -102,6 +109,12 @@ public void illustrate () {
         }
     }
 
+    /**
+     * Given a percentile such as 25 or 50, find the x bin at which that percentile is situated in this Distribution,
+     * i.e. the lowest (binned or discretized) x value for which the cumulative probability is at least percentile.
+     * In common usage: find the lowest whole-minute travel time for which the cumulative probability is greater than
+     * the supplied percentile.
+     */
     public int findPercentile (int percentile) {
         double sum = 0;
         double threshold = percentile / 100d;
@@ -123,6 +136,10 @@ public static void main (String[] args) {
         out.illustrate();
     }
 
+    /**
+     * @return the probability mass situated at a particular x value (the probability density for a particular minute
+     *         when these are used in the usual way as 1-minute bins).
+     */
     public double probabilityOf (int x) {
         if (x < skip) {
             return 0;
@@ -200,10 +217,18 @@ public static Distribution fromTravelTimeResult (TravelTimeResult travelTimeResu
     }
 
     /**
-     * Find the probability mass of the overlapping region of the two distributions. The amount of "misplaced"
-     * probability is one minus overlap. Overlap is slightly more straightforward to calculate directly than mismatch.
+     * Find the probability mass of the overlapping region of the two distributions. This can be used to determine
+     * whether two distributions, often a theoretical one and an observed one, are sufficiently similar to one another.
+     * Overlapping here means in both dimensions, travel time (horizontal) and probability density (vertical).
+     * Proceeding bin by bin through both distributions in parallel, the smaller of the two values for each bin is
+     * accumulated into the total. The amount of "misplaced" probability (located in the wrong bin in the observed
+     * distribution relative to the theoretical one) is one minus overlap. Overlap is slightly more straightforward
+     * to calculate directly than mismatch. This method is not sensitive to how evenly the error is distributed
+     * across the domain. We should prefer using a measure that emphasizes larger errors and compensates for the
+     * magnitude of the predicted values.
      */
     public double overlap (Distribution other) {
+        // TODO This min is not necessary. The overlap is by definition fully within the domain of either Distribution.
         int iMin = Math.min(this.skip, other.skip);
         int iMax = Math.max(this.fullWidth(), other.fullWidth());
         double sum = 0;
@@ -216,12 +241,45 @@ public double overlap (Distribution other) {
         return sum;
     }
 
+    /**
+     * An ad-hoc measure of goodness of fit vaguely related to Pearson's chi-squared or root-mean-square error.
+     * Traditional measures used in fitting probability distributions like Pearson's have properties that deal poorly
+     * with our need to tolerate small horizontal shifts
+     * in the results (due to the destination grid being not precisely aligned with our street corner grid).
+     * Another way to deal with this would be to ensure there is no horizontal shift, by measuring travel times at
+     * exactly the right places instead of on a grid.
+     */
+    public double weightedSquaredError (Distribution other) {
+        double sum = 0;
+        // This is kind of ugly because we're only examining bins with nonzero probability (to avoid div by zero).
+        // Observed data in a region with predicted zero probability should be an automatic fail for the model.
+        for (int i = this.skip; i < this.fullWidth(); i++) {
+            double pe = this.probabilityOf(i);
+            double po = other.probabilityOf(i);
+            Preconditions.checkState(pe >= 0); // Ensure non-negative for good measure.
+            if (pe == 0) {
+                System.out.println("Zero (expected probability; skipping.");
+                continue;
+            }
+            // Errors are measured relative to the expected values, and stronger deviations emphasized by squaring.
+            // Measuring relative to expected density compensates for the case where it is spread over a wider domain.
+            sum += pow(po - pe, 2) / pe;
+        }
+        System.out.println("Squared error: " + sum);
+        System.out.println("Root Squared error: " + sqrt(sum));
+        return sum;
+    }
+
     public void assertSimilar (Distribution observed) {
+        double squaredError = this.weightedSquaredError(observed);
+        showChartsIfEnabled(observed);
+        assertTrue(squaredError < 0.025, String.format("Error metric too high at at %3f", squaredError));
+    }
+
+    public void showChartsIfEnabled (Distribution observed) {
         if (SHOW_CHARTS) {
             DistributionChart.showChart(this, observed);
         }
-        double overlapPercent = this.overlap(observed) * 100;
-        assertTrue(overlapPercent >= 95, String.format("Overlap less than 95%% at %3f", overlapPercent));
     }
 
     // This is ugly, it should be done some other way e.g. firstNonzero
@@ -249,4 +307,19 @@ public void trim () {
         }
         masses = Arrays.copyOfRange(masses, firstNonzero, lastNonzero + 1);
     }
+
+    /**
+     * Here we are performing two related checks for a bit of redundancy and to check different parts of the system:
+     * checking percentiles drawn from the observed distribution, as well as the full histogram of the distribution.
+     * This double comparison could be done automatically with a method like Distribution.assertSimilar(TravelTimeResult).
+     * @param destination the flattened 1D index into the pointset, which will be zero for single freeform points.
+     */
+    public void multiAssertSimilar(TravelTimeResult travelTimes, int destination) {
+        // Check a goodness-of-fit metric on the observed distribution relative to this distribution.
+        Distribution observed = Distribution.fromTravelTimeResult(travelTimes, destination);
+        this.assertSimilar(observed);
+        // Check that percentiles extracted from observed are similar to those predicted by this distribution.
+        int[] travelTimePercentiles = travelTimes.getTarget(destination);
+        DistributionTester.assertExpectedDistribution(this, travelTimePercentiles);
+    }
 }
diff --git a/src/test/java/com/conveyal/r5/analyst/network/DistributionChart.java b/src/test/java/com/conveyal/r5/analyst/network/DistributionChart.java
@@ -61,10 +61,13 @@ public JFreeChart createChart (Distribution... distributions) {
         return chart;
     }
 
+    // Note that the points are placed at the minute boundary, though the numbers represent densities over one minute.
+    // They should probably be represented as filled bars across the minute or as points midway across the minute.
     private static TimeSeriesCollection createTimeSeriesDataset (Distribution... distributions) {
         TimeSeriesCollection dataset = new TimeSeriesCollection();
+        int d = 0;
         for (Distribution distribution : distributions) {
-            TimeSeries ts = new TimeSeries("X");
+            TimeSeries ts = new TimeSeries("Distribution " + (d++));
             for (int i = distribution.skip(); i < distribution.fullWidth(); i++) {
                 double p = distribution.probabilityOf(i);
                 ts.add(new Minute(i, 0, 1, 1, 2000), p);

diff --git a/src/test/java/com/conveyal/r5/analyst/network/DistributionTester.java b/src/test/java/com/conveyal/r5/analyst/network/DistributionTester.java
@@ -24,6 +24,11 @@ public static void assertUniformlyDistributed (int[] sortedPercentiles, int min,
         }
     }
 
+    /**
+     * Given an expected distribution of travel times at a destination and the standard five percentiles of travel time
+     * at that same destination as computed by our router, check that the computed values seem to be drawn from the
+     * theoretically correct distribution.
+     */
     public static void assertExpectedDistribution (Distribution expectedDistribution, int[] values) {
         for (int p = 0; p < PERCENTILES.length; p++) {
             int expected = expectedDistribution.findPercentile(PERCENTILES[p]);

diff --git a/src/test/java/com/conveyal/r5/analyst/network/GridLayout.java b/src/test/java/com/conveyal/r5/analyst/network/GridLayout.java
@@ -169,47 +169,12 @@ public GridSinglePointTaskBuilder newTaskBuilder() {
         return new GridSinglePointTaskBuilder(this);
     }
 
-    public GridSinglePointTaskBuilder copyTask(AnalysisWorkerTask task) {
-        return new GridSinglePointTaskBuilder(this, task);
-    }
-
     /** Get the minimum envelope containing all the points in this grid. */
     public Envelope gridEnvelope () {
         Coordinate farCorner = getIntersectionLatLon(widthAndHeightInBlocks, widthAndHeightInBlocks);
         return new Envelope(originPoint.x, farCorner.x, originPoint.y, farCorner.y);
     }
 
-    /**
-     * Create a gridded pointset coextensive with this grid, with the given number of opportunities in each cell.
-     * TODO maybe derive this from the network's gridded pointset rather than the GridLayout.
-     */
-    public Grid makeUniformOpportunityDataset (double density) {
-        Grid grid = new Grid(DEFAULT_ZOOM, this.gridEnvelope());
-        for (double[] column : grid.grid) {
-            Arrays.fill(column, density);
-        }
-        return grid;
-    }
-
-    /**
-     * Create a gridded pointset coextensive with this grid, with the given number of opportunities in each cell in the
-     * eastern half of the grid.
-     */
-    public Grid makeRightHalfOpportunityDataset (double density) {
-        Grid grid = new Grid(DEFAULT_ZOOM, this.gridEnvelope());
-        for (int c = grid.grid.length / 2; c < grid.grid.length; c++) {
-            Arrays.fill(grid.grid[c], density);
-        }
-        return grid;
-    }
-
-    public int pointIndex(AnalysisWorkerTask task, int x, int y) {
-        Coordinate destLatLon = this.getIntersectionLatLon(x, y);
-        // Here is a bit of awkwardness where WebMercatorGridPointSet and Grid both extend PointSet, but don't share
-        // their grid referencing code, so one would have to be converted to the other to get the point index.
-        return new WebMercatorGridPointSet(task.getWebMercatorExtents()).getPointIndexContaining(destLatLon);
-    }
-
     public String nextIntegerId() {
         return Integer.toString(nextIntegerId++);
     }

diff --git a/src/test/java/com/conveyal/r5/analyst/network/GridRoute.java b/src/test/java/com/conveyal/r5/analyst/network/GridRoute.java
@@ -21,7 +21,7 @@ public class GridRoute {
     public Orientation orientation;
     public boolean bidirectional;
 
-    /** Explicit departure times from first stop; if set, startHour and endHour will be ignored*/
+    /** Explicit departure times from first stop; if set, startHour and endHour will be ignored. */
     public int[] startTimes;
 
     /** Override default hop times. Map of (trip, stopAtStartOfHop) to factor by which default hop is multiplied. */