Skip to content

Commit

Permalink
Merge pull request #896 from conveyal/revise-simpson-desert
Browse files Browse the repository at this point in the history
Revise Simpson Desert tests
  • Loading branch information
abyrd authored Nov 2, 2023
2 parents 6741964 + c7f69ab commit 7970690
Show file tree
Hide file tree
Showing 16 changed files with 254 additions and 171 deletions.
11 changes: 7 additions & 4 deletions src/main/java/com/conveyal/r5/analyst/TravelTimeComputer.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

import static com.conveyal.r5.analyst.scenario.PickupWaitTimes.NO_SERVICE_HERE;
import static com.conveyal.r5.analyst.scenario.PickupWaitTimes.NO_WAIT_ALL_STOPS;
import static com.conveyal.r5.common.Util.isNullOrEmpty;
import static com.conveyal.r5.profile.PerTargetPropagater.MM_PER_METER;

/**
Expand Down Expand Up @@ -77,19 +78,21 @@ public OneOriginResult computeTravelTimes() {

// Find the set of destinations for a travel time calculation, not yet linked to the street network, and with
// no associated opportunities. By finding the extents and destinations up front, we ensure the exact same
// destination pointset is used for all steps below.
// destination PointSet is used for all steps below.
// This reuses the logic for finding the appropriate grid size and linking, which is now in the NetworkPreloader.
// We could change the preloader to retain these values in a compound return type, to avoid repetition here.
PointSet destinations;

if (request instanceof RegionalTask
&& !request.makeTauiSite
&& request.destinationPointSets[0] instanceof FreeFormPointSet
) {
// Freeform; destination pointset was set by handleOneRequest in the main AnalystWorker
// Freeform destinations. Destination PointSet was set by handleOneRequest in the main AnalystWorker.
destinations = request.destinationPointSets[0];
} else if (!isNullOrEmpty(request.destinationPointSets)) {
LOG.warn("ONLY VALID IN TESTING: Using PointSet object embedded in request outside regional analysis.");
destinations = request.destinationPointSets[0];
} else {
// Gridded (non-freeform) destinations. The extents are found differently in regional and single requests.
// Gridded (non-freeform) destinations. This method finds them differently for regional and single requests.
WebMercatorExtents destinationGridExtents = request.getWebMercatorExtents();
// Make a WebMercatorGridPointSet with the right extents, referring to the network's base grid and linkage.
destinations = AnalysisWorkerTask.gridPointSetCache.get(destinationGridExtents, network.fullExtentGridPointSet);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,12 @@ of travel time (e.g. waiting) and paths should also be returned.
REGIONAL_ANALYSIS
}

/**
* WARNING This whole tree of classes contains non-primitive compound fields. Cloning WILL NOT DEEP COPY these
* fields. Modifying some aspects of the cloned object may modify the same aspects of the one it was cloned from.
* Unfortunately these classes have a large number of fields and maintaining hand written copy constructors for
* them might be an even greater liability than carefully choosing how to use clone().
*/
public AnalysisWorkerTask clone () {
// no need to catch CloneNotSupportedException, it's caught in ProfileRequest::clone
return (AnalysisWorkerTask) super.clone();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ public WebMercatorExtents getWebMercatorExtents() {
}
}

/**
* WARNING This whole tree of classes contains non-primitive compound fields. Cloning WILL NOT DEEP COPY these
* fields. Modifying some aspects of the cloned object may modify the same aspects of the one it was cloned from.
* Unfortunately these classes have a large number of fields and maintaining hand written copy constructors for
* them might be an even greater liability than carefully choosing how to use clone().
*/
public RegionalTask clone () {
return (RegionalTask) super.clone();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
package com.conveyal.r5.analyst.cluster;

import com.conveyal.r5.analyst.FreeFormPointSet;
import com.conveyal.r5.analyst.PointSet;
import com.conveyal.r5.analyst.WebMercatorExtents;
import com.conveyal.r5.profile.ProfileRequest;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.lang.invoke.MethodHandles;

import static com.conveyal.r5.common.Util.isNullOrEmpty;
import static com.google.common.base.Preconditions.checkState;

/**
* Instances are serialized and sent from the backend to workers processing single point,
Expand All @@ -13,6 +23,8 @@
*/
public class TravelTimeSurfaceTask extends AnalysisWorkerTask {

private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

// FIXME red flag - what is this enum enumerating Java types?

@Override
Expand Down Expand Up @@ -48,8 +60,22 @@ public WebMercatorExtents getWebMercatorExtents() {

@Override
public int nTargetsPerOrigin () {
// In TravelTimeSurfaceTasks, the set of destinations is always determined by the web mercator extents.
return width * height;
// In TravelTimeSurfaceTasks, the set of destinations is always determined by the web mercator extents in the
// request. A single WebMercatorGridPointSet is created with those extents. A single small freeform PointSet may
// also be present, but only in testing situations. The checkState assertions serve to verify assumptions that
// destinationPointSets are always set and we can polymorphically fetch the number of items for all normal and
// testing use cases. Once that is clearly working the assertions could be removed.
checkState(!isNullOrEmpty(destinationPointSets), "Expected destination PointSets to be present.");
checkState(destinationPointSets.length == 1, "Expected a single destination PointSet in TravelTimeSurfaceTask.");
PointSet destinations = destinationPointSets[0];
int nFeatures = destinations.featureCount();
if (destinations instanceof FreeFormPointSet) {
LOG.warn("Should currently be used only in testing: FreeFormPointSet specified in TravelTimeSurfaceTask.");
checkState(nFeatures == 1);
} else {
checkState(nFeatures == width * height);
}
return nFeatures;
}

}
12 changes: 4 additions & 8 deletions src/main/java/com/conveyal/r5/profile/FastRaptorWorker.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,6 @@ public class FastRaptorWorker {
*/
public static final int UNREACHED = Integer.MAX_VALUE;

/**
* Minimum time between alighting from one vehicle and boarding another, in seconds.
* TODO make this configurable, and use loop-transfers from transfers.txt.
*/
public static final int BOARD_SLACK_SECONDS = 60;

public static final int SECONDS_PER_MINUTE = 60;

/**
Expand All @@ -70,8 +64,10 @@ public class FastRaptorWorker {
private static final int DEPARTURE_STEP_SEC = 60;

/**
* Minimum wait for boarding to account for schedule variation.
* FIXME clarify why this is separate from BOARD_SLACK. If it is not, merge the two constants into BOARD_SLACK_SEC.
* To be considered for boarding, a vehicle must depart at least this long after the rider arrives at the stop.
* Intuitively, "leave this long for transfers" to account for schedule variation or other unexpected variations.
* This is separate from BOARD_SLACK_SECONDS used in McRaptor and point-to-point searches, in case someone needs
* to set them independently.
*/
private static final int MINIMUM_BOARD_WAIT_SEC = 60;

Expand Down
12 changes: 10 additions & 2 deletions src/main/java/com/conveyal/r5/profile/PathWithTimes.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@
* A path that also includes itineraries and bounds for all possible trips on the paths (even those which may not be optimal)
*/
public class PathWithTimes extends Path {

/**
* Minimum time between alighting from one vehicle and boarding another, in seconds.
* Appears to be used only in McRaptor and point-to-point searches.
* TODO make this configurable, and use loop-transfers from transfers.txt.
*/
public static final int BOARD_SLACK_SECONDS = 60;

/** Stats for the entire path */
public Stats stats;

Expand Down Expand Up @@ -92,7 +100,7 @@ private void computeTimes (TransportNetwork network, ProfileRequest req, TIntInt
// loop over departures within the time window
// firstTrip is the trip on the first pattern
int firstTrip = 0;
while (times[0][firstTrip][0] < req.fromTime + accessTime + FastRaptorWorker.BOARD_SLACK_SECONDS) firstTrip++;
while (times[0][firstTrip][0] < req.fromTime + accessTime + BOARD_SLACK_SECONDS) firstTrip++;

// now interleave times
double walkSpeedMillimetersPerSecond = req.walkSpeed * 1000;
Expand Down Expand Up @@ -137,7 +145,7 @@ private void computeTimes (TransportNetwork network, ProfileRequest req, TIntInt

// TODO should board slack be applied at the origin stop? Is this done in RaptorWorker?
// See also below in computeStatistics
time = times[patIdx][trip][1] + transferTime + FastRaptorWorker.BOARD_SLACK_SECONDS;
time = times[patIdx][trip][1] + transferTime + BOARD_SLACK_SECONDS;
itin.arriveAtBoardStopTimes[patIdx + 1] = time;
}
}
Expand Down
6 changes: 6 additions & 0 deletions src/main/java/com/conveyal/r5/profile/ProfileRequest.java
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,12 @@ public class ProfileRequest implements Serializable, Cloneable {
*/
public int monteCarloDraws = 220;

/**
* WARNING This whole tree of classes contains non-primitive compound fields. Cloning WILL NOT DEEP COPY these
* fields. Modifying some aspects of the cloned object may modify the same aspects of the one it was cloned from.
* Unfortunately these classes have a large number of fields and maintaining hand written copy constructors for
* them might be an even greater liability than carefully choosing how to use clone().
*/
public ProfileRequest clone () {
try {
return (ProfileRequest) super.clone();
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/conveyal/r5/profile/StatsCalculator.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public static StatsCollection computeStatistics (ProfileRequest req, int accessT

for (int start = req.fromTime; start < req.toTime; start += 60) {
// TODO should board slack be applied at the origin stop? Is this done in RaptorWorker?
int timeAtOriginStop = start + accessTime + FastRaptorWorker.BOARD_SLACK_SECONDS;
int timeAtOriginStop = start + accessTime + PathWithTimes.BOARD_SLACK_SECONDS;
int bestTimeAtDestinationStop = Integer.MAX_VALUE;

PathWithTimes.Itinerary bestItinerary = null;
Expand Down
81 changes: 77 additions & 4 deletions src/test/java/com/conveyal/r5/analyst/network/Distribution.java
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
package com.conveyal.r5.analyst.network;

import com.conveyal.r5.analyst.cluster.TravelTimeResult;
import com.google.common.base.Preconditions;

import java.util.Arrays;

import static java.lang.Math.pow;
import static java.lang.Math.sqrt;
import static org.junit.jupiter.api.Assertions.assertTrue;

/**
Expand Down Expand Up @@ -90,6 +93,10 @@ public void normalize () {
}
}

/**
* Print a text-based representation of the distribution to standard out.
* There is another method to show the distribution in a graphical plot window.
*/
public void illustrate () {
final int width = 50;
double max = Arrays.stream(masses).max().getAsDouble();
Expand All @@ -102,6 +109,12 @@ public void illustrate () {
}
}

/**
* Given a percentile such as 25 or 50, find the x bin at which that percentile is situated in this Distribution,
* i.e. the lowest (binned or discretized) x value for which the cumulative probability is at least percentile.
* In common usage: find the lowest whole-minute travel time for which the cumulative probability is greater than
* the supplied percentile.
*/
public int findPercentile (int percentile) {
double sum = 0;
double threshold = percentile / 100d;
Expand All @@ -123,6 +136,10 @@ public static void main (String[] args) {
out.illustrate();
}

/**
* @return the probability mass situated at a particular x value (the probability density for a particular minute
* when these are used in the usual way as 1-minute bins).
*/
public double probabilityOf (int x) {
if (x < skip) {
return 0;
Expand Down Expand Up @@ -200,10 +217,18 @@ public static Distribution fromTravelTimeResult (TravelTimeResult travelTimeResu
}

/**
* Find the probability mass of the overlapping region of the two distributions. The amount of "misplaced"
* probability is one minus overlap. Overlap is slightly more straightforward to calculate directly than mismatch.
* Find the probability mass of the overlapping region of the two distributions. This can be used to determine
* whether two distributions, often a theoretical one and an observed one, are sufficiently similar to one another.
* Overlapping here means in both dimensions, travel time (horizontal) and probability density (vertical).
* Proceeding bin by bin through both distributions in parallel, the smaller of the two values for each bin is
* accumulated into the total. The amount of "misplaced" probability (located in the wrong bin in the observed
* distribution relative to the theoretical one) is one minus overlap. Overlap is slightly more straightforward
* to calculate directly than mismatch. This method is not sensitive to how evenly the error is distributed
* across the domain. We should prefer using a measure that emphasizes larger errors and compensates for the
* magnitude of the predicted values.
*/
public double overlap (Distribution other) {
// TODO This min is not necessary. The overlap is by definition fully within the domain of either Distribution.
int iMin = Math.min(this.skip, other.skip);
int iMax = Math.max(this.fullWidth(), other.fullWidth());
double sum = 0;
Expand All @@ -216,12 +241,45 @@ public double overlap (Distribution other) {
return sum;
}

/**
* An ad-hoc measure of goodness of fit vaguely related to Pearson's chi-squared or root-mean-square error.
* Traditional measures used in fitting probability distributions like Pearson's have properties that deal poorly
* with our need to tolerate small horizontal shifts
* in the results (due to the destination grid being not precisely aligned with our street corner grid).
* Another way to deal with this would be to ensure there is no horizontal shift, by measuring travel times at
* exactly the right places instead of on a grid.
*/
public double weightedSquaredError (Distribution other) {
double sum = 0;
// This is kind of ugly because we're only examining bins with nonzero probability (to avoid div by zero).
// Observed data in a region with predicted zero probability should be an automatic fail for the model.
for (int i = this.skip; i < this.fullWidth(); i++) {
double pe = this.probabilityOf(i);
double po = other.probabilityOf(i);
Preconditions.checkState(pe >= 0); // Ensure non-negative for good measure.
if (pe == 0) {
System.out.println("Zero (expected probability; skipping.");
continue;
}
// Errors are measured relative to the expected values, and stronger deviations emphasized by squaring.
// Measuring relative to expected density compensates for the case where it is spread over a wider domain.
sum += pow(po - pe, 2) / pe;
}
System.out.println("Squared error: " + sum);
System.out.println("Root Squared error: " + sqrt(sum));
return sum;
}

public void assertSimilar (Distribution observed) {
double squaredError = this.weightedSquaredError(observed);
showChartsIfEnabled(observed);
assertTrue(squaredError < 0.025, String.format("Error metric too high at at %3f", squaredError));
}

public void showChartsIfEnabled (Distribution observed) {
if (SHOW_CHARTS) {
DistributionChart.showChart(this, observed);
}
double overlapPercent = this.overlap(observed) * 100;
assertTrue(overlapPercent >= 95, String.format("Overlap less than 95%% at %3f", overlapPercent));
}

// This is ugly, it should be done some other way e.g. firstNonzero
Expand Down Expand Up @@ -249,4 +307,19 @@ public void trim () {
}
masses = Arrays.copyOfRange(masses, firstNonzero, lastNonzero + 1);
}

/**
* Here we are performing two related checks for a bit of redundancy and to check different parts of the system:
* checking percentiles drawn from the observed distribution, as well as the full histogram of the distribution.
* This double comparison could be done automatically with a method like Distribution.assertSimilar(TravelTimeResult).
* @param destination the flattened 1D index into the pointset, which will be zero for single freeform points.
*/
public void multiAssertSimilar(TravelTimeResult travelTimes, int destination) {
// Check a goodness-of-fit metric on the observed distribution relative to this distribution.
Distribution observed = Distribution.fromTravelTimeResult(travelTimes, destination);
this.assertSimilar(observed);
// Check that percentiles extracted from observed are similar to those predicted by this distribution.
int[] travelTimePercentiles = travelTimes.getTarget(destination);
DistributionTester.assertExpectedDistribution(this, travelTimePercentiles);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,13 @@ public JFreeChart createChart (Distribution... distributions) {
return chart;
}

// Note that the points are placed at the minute boundary, though the numbers represent densities over one minute.
// They should probably be represented as filled bars across the minute or as points midway across the minute.
private static TimeSeriesCollection createTimeSeriesDataset (Distribution... distributions) {
TimeSeriesCollection dataset = new TimeSeriesCollection();
int d = 0;
for (Distribution distribution : distributions) {
TimeSeries ts = new TimeSeries("X");
TimeSeries ts = new TimeSeries("Distribution " + (d++));
for (int i = distribution.skip(); i < distribution.fullWidth(); i++) {
double p = distribution.probabilityOf(i);
ts.add(new Minute(i, 0, 1, 1, 2000), p);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ public static void assertUniformlyDistributed (int[] sortedPercentiles, int min,
}
}

/**
* Given an expected distribution of travel times at a destination and the standard five percentiles of travel time
* at that same destination as computed by our router, check that the computed values seem to be drawn from the
* theoretically correct distribution.
*/
public static void assertExpectedDistribution (Distribution expectedDistribution, int[] values) {
for (int p = 0; p < PERCENTILES.length; p++) {
int expected = expectedDistribution.findPercentile(PERCENTILES[p]);
Expand Down
Loading

0 comments on commit 7970690

Please sign in to comment.