Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revise Simpson Desert tests #896

Merged
merged 9 commits into from
Nov 2, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions src/main/java/com/conveyal/r5/analyst/TravelTimeComputer.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import com.conveyal.r5.analyst.fare.InRoutingFareCalculator;
import com.conveyal.r5.analyst.scenario.PickupWaitTimes;
import com.conveyal.r5.api.util.LegMode;
import com.conveyal.r5.common.Util;
abyrd marked this conversation as resolved.
Show resolved Hide resolved
import com.conveyal.r5.point_to_point.builder.PointToPointQuery;
import com.conveyal.r5.profile.DominatingList;
import com.conveyal.r5.profile.FareDominatingList;
Expand All @@ -30,6 +31,7 @@

import static com.conveyal.r5.analyst.scenario.PickupWaitTimes.NO_SERVICE_HERE;
import static com.conveyal.r5.analyst.scenario.PickupWaitTimes.NO_WAIT_ALL_STOPS;
import static com.conveyal.r5.common.Util.isNullOrEmpty;
import static com.conveyal.r5.profile.PerTargetPropagater.MM_PER_METER;

/**
Expand Down Expand Up @@ -77,19 +79,21 @@ public OneOriginResult computeTravelTimes() {

// Find the set of destinations for a travel time calculation, not yet linked to the street network, and with
// no associated opportunities. By finding the extents and destinations up front, we ensure the exact same
// destination pointset is used for all steps below.
// destination PointSet is used for all steps below.
// This reuses the logic for finding the appropriate grid size and linking, which is now in the NetworkPreloader.
// We could change the preloader to retain these values in a compound return type, to avoid repetition here.
PointSet destinations;

if (request instanceof RegionalTask
&& !request.makeTauiSite
&& request.destinationPointSets[0] instanceof FreeFormPointSet
) {
// Freeform; destination pointset was set by handleOneRequest in the main AnalystWorker
// Freeform destinations. Destination PointSet was set by handleOneRequest in the main AnalystWorker.
destinations = request.destinationPointSets[0];
} else if (!isNullOrEmpty(request.destinationPointSets)) {
LOG.warn("ONLY VALID IN TESTING: Using PointSet object embedded in request outside regional analysis.");
destinations = request.destinationPointSets[0];
} else {
// Gridded (non-freeform) destinations. The extents are found differently in regional and single requests.
// Gridded (non-freeform) destinations. This method finds them differently for regional and single requests.
WebMercatorExtents destinationGridExtents = request.getWebMercatorExtents();
// Make a WebMercatorGridPointSet with the right extents, referring to the network's base grid and linkage.
destinations = AnalysisWorkerTask.gridPointSetCache.get(destinationGridExtents, network.fullExtentGridPointSet);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,12 @@ of travel time (e.g. waiting) and paths should also be returned.
REGIONAL_ANALYSIS
}

/**
* WARNING This whole tree of classes contains non-primitive compound fields. Cloning WILL NOT DEEP COPY these
* fields. Modifying some aspects of the cloned object may modify the same aspects of the one it was cloned from.
* Unfortunately these classes have a large number of fields and maintaining hand written copy constructors for
* them might be an even greater liability than carefully choosing how to use clone().
*/
public AnalysisWorkerTask clone () {
// no need to catch CloneNotSupportedException, it's caught in ProfileRequest::clone
return (AnalysisWorkerTask) super.clone();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ public WebMercatorExtents getWebMercatorExtents() {
}
}

/**
* WARNING This whole tree of classes contains non-primitive compound fields. Cloning WILL NOT DEEP COPY these
* fields. Modifying some aspects of the cloned object may modify the same aspects of the one it was cloned from.
* Unfortunately these classes have a large number of fields and maintaining hand written copy constructors for
* them might be an even greater liability than carefully choosing how to use clone().
*/
public RegionalTask clone () {
return (RegionalTask) super.clone();
}
Expand Down
6 changes: 6 additions & 0 deletions src/main/java/com/conveyal/r5/profile/ProfileRequest.java
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,12 @@ public class ProfileRequest implements Serializable, Cloneable {
*/
public int monteCarloDraws = 220;

/**
* WARNING This whole tree of classes contains non-primitive compound fields. Cloning WILL NOT DEEP COPY these
* fields. Modifying some aspects of the cloned object may modify the same aspects of the one it was cloned from.
* Unfortunately these classes have a large number of fields and maintaining hand written copy constructors for
* them might be an even greater liability than carefully choosing how to use clone().
*/
public ProfileRequest clone () {
try {
return (ProfileRequest) super.clone();
Expand Down
81 changes: 77 additions & 4 deletions src/test/java/com/conveyal/r5/analyst/network/Distribution.java
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
package com.conveyal.r5.analyst.network;

import com.conveyal.r5.analyst.cluster.TravelTimeResult;
import com.google.common.base.Preconditions;

import java.util.Arrays;

import static java.lang.Math.pow;
import static java.lang.Math.sqrt;
import static org.junit.jupiter.api.Assertions.assertTrue;

/**
Expand Down Expand Up @@ -90,6 +93,10 @@ public void normalize () {
}
}

/**
* Print a text-based representation of the distribution to standard out.
* There is another method to show the distribution in a graphical plot window.
*/
public void illustrate () {
final int width = 50;
double max = Arrays.stream(masses).max().getAsDouble();
Expand All @@ -102,6 +109,12 @@ public void illustrate () {
}
}

/**
* Given a percentile such as 25 or 50, find the x bin at which that percentile is situated in this Distribution,
* i.e. the lowest (binned or discretized) x value for which the cumulative probability is at least percentile.
* In common usage: find the lowest whole-minute travel time for which the cumulative probability is greater than
* the supplied percentile.
*/
public int findPercentile (int percentile) {
double sum = 0;
double threshold = percentile / 100d;
Expand All @@ -123,6 +136,10 @@ public static void main (String[] args) {
out.illustrate();
}

/**
* @return the probability mass situated at a particular x value (the probability density for a particular minute
* when these are used in the usual way as 1-minute bins).
*/
public double probabilityOf (int x) {
if (x < skip) {
return 0;
Expand Down Expand Up @@ -200,10 +217,18 @@ public static Distribution fromTravelTimeResult (TravelTimeResult travelTimeResu
}

/**
* Find the probability mass of the overlapping region of the two distributions. The amount of "misplaced"
* probability is one minus overlap. Overlap is slightly more straightforward to calculate directly than mismatch.
* Find the probability mass of the overlapping region of the two distributions. This can be used to determine
* whether two distributions, often a theoretical one and an observed one, are sufficiently similar to one another.
* Overlapping here means in both dimensions, travel time (horizontal) and probability density (vertical).
* Proceeding bin by bin through both distributions in parallel, the smaller of the two values for each bin is
* accumulated into the total. The amount of "misplaced" probability (located in the wrong bin in the observed
* distribution relative to the theoretical one) is one minus overlap. Overlap is slightly more straightforward
* to calculate directly than mismatch. This method is not sensitive to how evenly the error is distributed
* across the domain. We should prefer using a measure that emphasizes larger errors and compensates for the
* magnitude of the predicted values.
*/
public double overlap (Distribution other) {
// TODO This min is not necessary. The overlap is by definition fully within the domain of either Distribution.
int iMin = Math.min(this.skip, other.skip);
int iMax = Math.max(this.fullWidth(), other.fullWidth());
double sum = 0;
Expand All @@ -216,12 +241,45 @@ public double overlap (Distribution other) {
return sum;
}

/**
* An ad-hoc measure of goodness of fit vaguely related to Pearson's chi-squared or root-mean-square error.
* Traditional measures used in fitting probability distributions like Pearson's have properties that deal poorly
* with our need to tolerate small horizontal shifts
* in the results (due to the destination grid being not precisely aligned with our street corner grid).
* Another way to deal with this would be to ensure there is no horizontal shift, by measuring travel times at
* exactly the right places instead of on a grid.
*/
public double weightedSquaredError (Distribution other) {
double sum = 0;
// This is kind of ugly because we're only examining bins with nonzero probability (to avoid div by zero).
// Observed data in a region with predicted zero probability should be an automatic fail for the model.
for (int i = this.skip; i < this.fullWidth(); i++) {
double pe = this.probabilityOf(i);
double po = other.probabilityOf(i);
Preconditions.checkState(pe >= 0); // Ensure non-negative for good measure.
if (pe == 0) {
System.out.println("Zero (expected probability; skipping.");
continue;
}
// Errors are measured relative to the expected values, and stronger deviations emphasized by squaring.
// Measuring relative to expected density compensates for the case where it is spread over a wider domain.
sum += pow(po - pe, 2) / pe;
}
System.out.println("Squared error: " + sum);
System.out.println("Root Squared error: " + sqrt(sum));
return sum;
}

public void assertSimilar (Distribution observed) {
double squaredError = this.weightedSquaredError(observed);
showChartsIfEnabled(observed);
assertTrue(squaredError < 0.025, String.format("Error metric too high at at %3f", squaredError));
}

public void showChartsIfEnabled (Distribution observed) {
if (SHOW_CHARTS) {
DistributionChart.showChart(this, observed);
}
double overlapPercent = this.overlap(observed) * 100;
assertTrue(overlapPercent >= 95, String.format("Overlap less than 95%% at %3f", overlapPercent));
}

// This is ugly, it should be done some other way e.g. firstNonzero
Expand Down Expand Up @@ -249,4 +307,19 @@ public void trim () {
}
masses = Arrays.copyOfRange(masses, firstNonzero, lastNonzero + 1);
}

/**
* Here we are performing two related checks for a bit of redundancy and to check different parts of the system:
* checking percentiles drawn from the observed distribution, as well as the full histogram of the distribution.
* This double comparison could be done automatically with a method like Distribution.assertSimilar(TravelTimeResult).
* @param destination the flattened 1D index into the pointset, which will be zero for single freeform points.
*/
public void multiAssertSimilar(TravelTimeResult travelTimes, int destination) {
// Check a goodness-of-fit metric on the observed distribution relative to this distribution.
Distribution observed = Distribution.fromTravelTimeResult(travelTimes, destination);
this.assertSimilar(observed);
// Check that percentiles extracted from observed are similar to those predicted by this distribution.
int[] travelTimePercentiles = travelTimes.getTarget(destination);
DistributionTester.assertExpectedDistribution(this, travelTimePercentiles);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,13 @@ public JFreeChart createChart (Distribution... distributions) {
return chart;
}

// Note that the points are placed at the minute boundary, though the numbers represent densities over one minute.
// They should probably be represented as filled bars across the minute or as points midway across the minute.
private static TimeSeriesCollection createTimeSeriesDataset (Distribution... distributions) {
TimeSeriesCollection dataset = new TimeSeriesCollection();
int d = 0;
for (Distribution distribution : distributions) {
TimeSeries ts = new TimeSeries("X");
TimeSeries ts = new TimeSeries("Distribution " + (d++));
for (int i = distribution.skip(); i < distribution.fullWidth(); i++) {
double p = distribution.probabilityOf(i);
ts.add(new Minute(i, 0, 1, 1, 2000), p);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ public static void assertUniformlyDistributed (int[] sortedPercentiles, int min,
}
}

/**
* Given an expected distribution of travel times at a destination and the standard five percentiles of travel time
* at that same destination as computed by our router, check that the computed values seem to be drawn from the
* theoretically correct distribution.
*/
public static void assertExpectedDistribution (Distribution expectedDistribution, int[] values) {
for (int p = 0; p < PERCENTILES.length; p++) {
int expected = expectedDistribution.findPercentile(PERCENTILES[p]);
Expand Down
35 changes: 0 additions & 35 deletions src/test/java/com/conveyal/r5/analyst/network/GridLayout.java
Original file line number Diff line number Diff line change
Expand Up @@ -169,47 +169,12 @@ public GridSinglePointTaskBuilder newTaskBuilder() {
return new GridSinglePointTaskBuilder(this);
}

public GridSinglePointTaskBuilder copyTask(AnalysisWorkerTask task) {
return new GridSinglePointTaskBuilder(this, task);
}

/** Get the minimum envelope containing all the points in this grid. */
public Envelope gridEnvelope () {
Coordinate farCorner = getIntersectionLatLon(widthAndHeightInBlocks, widthAndHeightInBlocks);
return new Envelope(originPoint.x, farCorner.x, originPoint.y, farCorner.y);
}

/**
* Create a gridded pointset coextensive with this grid, with the given number of opportunities in each cell.
* TODO maybe derive this from the network's gridded pointset rather than the GridLayout.
*/
public Grid makeUniformOpportunityDataset (double density) {
Grid grid = new Grid(DEFAULT_ZOOM, this.gridEnvelope());
for (double[] column : grid.grid) {
Arrays.fill(column, density);
}
return grid;
}

/**
* Create a gridded pointset coextensive with this grid, with the given number of opportunities in each cell in the
* eastern half of the grid.
*/
public Grid makeRightHalfOpportunityDataset (double density) {
Grid grid = new Grid(DEFAULT_ZOOM, this.gridEnvelope());
for (int c = grid.grid.length / 2; c < grid.grid.length; c++) {
Arrays.fill(grid.grid[c], density);
}
return grid;
}

public int pointIndex(AnalysisWorkerTask task, int x, int y) {
Coordinate destLatLon = this.getIntersectionLatLon(x, y);
// Here is a bit of awkwardness where WebMercatorGridPointSet and Grid both extend PointSet, but don't share
// their grid referencing code, so one would have to be converted to the other to get the point index.
return new WebMercatorGridPointSet(task.getWebMercatorExtents()).getPointIndexContaining(destLatLon);
}

public String nextIntegerId() {
return Integer.toString(nextIntegerId++);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public class GridRoute {
public Orientation orientation;
public boolean bidirectional;

/** Explicit departure times from first stop; if set, startHour and endHour will be ignored*/
/** Explicit departure times from first stop; if set, startHour and endHour will be ignored. */
public int[] startTimes;

/** Override default hop times. Map of (trip, stopAtStartOfHop) to factor by which default hop is multiplied. */
Expand Down
Loading
Loading