diff --git a/README.md b/README.md index a210c74ec..fdd5c00e7 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,9 @@ We refer to the routing method as "realistic" because it works by planning door- We say "Real-world and Reimagined" networks because R5's networks are built from widely available open OSM and GTFS data describing baseline transportation systems, but R5 includes a system for applying light-weight patches to those networks for immediate, interactive scenario comparison. -**Please note** that the Conveyal team does not provide technical support for third-party deployments of its analysis platform. We provide paid subscriptions to a cloud-based deployment of this system, which performs these complex calculations hundreds of times faster using a compute cluster. This project is open source primarily to ensure transparency and reproducibility in public planning and decision making processes, and in hopes that it may help researchers, students, and potential collaborators to understand and build upon our methodology. +**Please note** that the Conveyal team does not provide technical support for third-party deployments. R5 is a component of a specialized commercial system, and we align development efforts with our roadmap and the needs of subscribers to our hosted service. This service is designed to facilitate secure online collaboration, user-friendly data management and scenario editing through a web interface, and complex calculations performed hundreds of times faster using a compute cluster. These design goals may not align well with other use cases. This project is open source primarily to ensure transparency and reproducibility in public planning and decision making processes, and in hopes that it may help researchers, students, and potential collaborators to understand and build upon our methodology. + +While the Conveyal team provides ongoing support and compatibility to subscribers, third-party projects using R5 as a library may not work with future releases. R5 does not currently expose a stable programming interface ("API" or "SDK"). As we release new features, previous functions and data types may change. The practical effect is that third-party wrappers or language bindings (e.g., for R or Python) may need to continue using an older release of R5 for feature compatibility (though not necessarily result compatibility, as the methods used in R5 are now relatively mature). ## Methodology @@ -19,7 +21,7 @@ For details on the core methods implemented in Conveyal Analysis and R5, see: ### Citations -The Conveyal team is always eager to see cutting-edge uses of our software, so feel free to send us a copy of any thesis, report, or paper produced using this software. We also ask that any academic publications using this software cite the papers above, where relevant and appropriate. +The Conveyal team is always eager to see cutting-edge uses of our software, so feel free to send us a copy of any thesis, report, or paper produced using this software. We also ask that any academic or research publications using this software cite the papers above, where relevant and appropriate. ## Configuration @@ -52,7 +54,7 @@ By default, IntelliJ will follow common Gradle practice and build R5 using the " ## Structured Commit Messages -We use structured commit messages to help generate changelogs and determine version numbers. +We use structured commit messages to help generate changelogs. The first line of these messages is in the following format: `(): ` @@ -68,10 +70,6 @@ The `()` is optional and is often a class name. The `` should be - devops: Changes to code that only affect deployment, logging, etc. No changes to user code. - chore: Any other changes causing no changes to user code. -The body of the commit message (if any) should begin after one blank line. If the commit meets the definition of a major version change according to semantic versioning (e.g. a change in API visible to an external module), the commit message body should begin with `BREAKING CHANGE: `. - -Presence of a `fix` commit in a release should increment the number in the third (PATCH) position. -Presence of a `feat` commit in a release should increment the number in the second (MINOR) position. -Presence of a `BREAKING CHANGE` commit in a release should increment the number in the first (MAJOR) position. +The body of the commit message (if any) should begin after one blank line. -This is based on https://www.conventionalcommits.org. +From 2018 to 2020, we used major/minor/patch release numbering as suggested by https://www.conventionalcommits.org. Starting in 2021, we switched to major/minor release numbering, incrementing the minor version with regular feature releases and the major version only when there are substantial changes to the cluster computing components of our system. Because there is no public API at this time, the conventional definition of breaking changes under semantic versioning does not apply. diff --git a/src/main/java/com/conveyal/analysis/components/TaskScheduler.java b/src/main/java/com/conveyal/analysis/components/TaskScheduler.java index 53ba806d3..9cac27fc1 100644 --- a/src/main/java/com/conveyal/analysis/components/TaskScheduler.java +++ b/src/main/java/com/conveyal/analysis/components/TaskScheduler.java @@ -168,11 +168,39 @@ public final void run () { List apiTaskList = tasks.stream() .map(Task::toApiTask) .collect(Collectors.toUnmodifiableList()); - tasks.removeIf(t -> t.durationComplete().getSeconds() > 60); + // Purge old tasks. + tasks.removeIf(t -> t.durationComplete().toDays() > 3); return apiTaskList; } } + /** + * Return a single task. Does not purge old tasks. + */ + public Task getTaskForUser (String userEmail, String taskId) { + synchronized (tasksForUser) { + Set tasks = tasksForUser.get(userEmail); + if (tasks == null) return null; + for (Task t : tasks) { + if (t.id.toString().equals(taskId)) { + return t; + } + } + return null; + } + } + + /** + * Remove a task. Returns false if task does not exist. Returns true if task exists and is properly removed. + */ + public boolean removeTaskForUser (String userEmail, String taskId) { + synchronized (tasksForUser) { + Set tasks = tasksForUser.get(userEmail); + if (tasks == null) return false; + return tasks.removeIf(t -> t.id.toString().equals(taskId)); + } + } + // Q: Should the caller ever create its own Tasks, or if are tasks created here inside the TaskScheduler from // other raw information? Having the caller creating a Task seems like a good way to configure execution details // like heavy/light/periodic, and submit user information without passing it in. That task request could be separate diff --git a/src/main/java/com/conveyal/analysis/controllers/BundleController.java b/src/main/java/com/conveyal/analysis/controllers/BundleController.java index 9e08505d2..269189407 100644 --- a/src/main/java/com/conveyal/analysis/controllers/BundleController.java +++ b/src/main/java/com/conveyal/analysis/controllers/BundleController.java @@ -16,6 +16,7 @@ import com.conveyal.gtfs.error.GTFSError; import com.conveyal.gtfs.error.GeneralError; import com.conveyal.gtfs.model.Stop; +import com.conveyal.gtfs.validator.PostLoadValidator; import com.conveyal.osmlib.Node; import com.conveyal.osmlib.OSM; import com.conveyal.r5.analyst.progress.ProgressInputStream; @@ -199,6 +200,9 @@ private Bundle create (Request req, Response res) { feed.progressListener = progressListener; feed.loadFromFile(zipFile, new ObjectId().toString()); + // Perform any more complex validation that requires cross-table checks. + new PostLoadValidator(feed).validate(); + // Find and validate the extents of the GTFS, defined by all stops in the feed. for (Stop s : feed.stops.values()) { bundleBounds.expandToInclude(s.stop_lon, s.stop_lat); diff --git a/src/main/java/com/conveyal/analysis/controllers/GtfsController.java b/src/main/java/com/conveyal/analysis/controllers/GtfsController.java index cc978131d..1e2e55f05 100644 --- a/src/main/java/com/conveyal/analysis/controllers/GtfsController.java +++ b/src/main/java/com/conveyal/analysis/controllers/GtfsController.java @@ -151,7 +151,8 @@ static class StopApiResponse extends BaseApiResponse { } } /** - * Return StopApiResponse values for GTFS stops (location_type = 0) in a single feed + * Return StopApiResponse values for GTFS stops (location_type = 0) in a single feed. + * All other location_types (station, entrance, generic node, boarding area) are skipped. */ private List getAllStopsForOneFeed(Request req, Response res) { GTFSFeed feed = getFeedFromRequest(req); @@ -160,8 +161,8 @@ private List getAllStopsForOneFeed(Request req, Response res) { } /** - * Groups the feedId and stops (location_type = 0; not parent stations, entrances/exits, generic nodes, etc.) for a - * given GTFS feed + * Compound return type for the feedId and stops with location_type = 0 for a given GTFS feed. + * All other location_types (station, entrance, generic node, boarding area) are skipped. */ static class FeedGroupStopsApiResponse { public final String feedId; diff --git a/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java b/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java index 012309ee1..cd2932c03 100644 --- a/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java +++ b/src/main/java/com/conveyal/analysis/controllers/OpportunityDatasetController.java @@ -24,7 +24,6 @@ import com.conveyal.r5.analyst.progress.NoopProgressListener; import com.conveyal.r5.analyst.progress.Task; import com.conveyal.r5.analyst.progress.WorkProduct; -import com.conveyal.r5.analyst.progress.WorkProductType; import com.conveyal.r5.util.ExceptionUtils; import com.conveyal.r5.util.InputStreamProvider; import com.conveyal.r5.util.ProgressListener; @@ -32,10 +31,6 @@ import com.google.common.io.Files; import com.mongodb.QueryBuilder; import org.apache.commons.fileupload.FileItem; -import org.apache.commons.fileupload.FileItemFactory; -import org.apache.commons.fileupload.FileUploadException; -import org.apache.commons.fileupload.disk.DiskFileItemFactory; -import org.apache.commons.fileupload.servlet.ServletFileUpload; import org.apache.commons.io.FilenameUtils; import org.bson.types.ObjectId; import org.slf4j.Logger; @@ -210,7 +205,7 @@ private void updateAndStoreDatasets (SpatialDataSource source, if (dataset.format == FileStorageFormat.FREEFORM) { dataset.name = String.join(" ", pointSet.name, "(freeform)"); } - dataset.setWebMercatorExtents(pointSet); + dataset.setWebMercatorExtents(pointSet.getWebMercatorExtents()); // TODO make origin and destination pointsets reference each other and indicate they are suitable // for one-to-one analyses @@ -327,7 +322,7 @@ private OpportunityDatasetUploadStatus createOpportunityDataset(Request req, Res // Parse required fields. Will throw a ServerException on failure. final String sourceName = HttpUtils.getFormField(formFields, "Name", true); final String regionId = HttpUtils.getFormField(formFields, "regionId", true); - final int zoom = parseZoom(HttpUtils.getFormField(formFields, "zoom", false)); + final int zoom = parseZoom(HttpUtils.getFormField(formFields, "zoom", true)); // Create a region-wide status object tracking the processing of opportunity data. // Create the status object before doing anything including input and parameter validation, so that any problems diff --git a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java index 60197114b..32336fd7c 100644 --- a/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java +++ b/src/main/java/com/conveyal/analysis/controllers/RegionalAnalysisController.java @@ -4,7 +4,6 @@ import com.conveyal.analysis.SelectingGridReducer; import com.conveyal.analysis.UserPermissions; import com.conveyal.analysis.components.broker.Broker; -import com.conveyal.analysis.components.broker.Job; import com.conveyal.analysis.components.broker.JobStatus; import com.conveyal.analysis.models.AnalysisRequest; import com.conveyal.analysis.models.OpportunityDataset; @@ -33,7 +32,6 @@ import java.io.File; import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; @@ -402,7 +400,7 @@ private RegionalAnalysis createRegionalAnalysis (Request req, Response res) thro ); } else { checkArgument( - dataset.getWebMercatorExtents().zoom == opportunityDatasets.get(0).getWebMercatorExtents().zoom, + dataset.zoom == opportunityDatasets.get(0).zoom, "If multiple grids are specified as destinations, they must have identical resolutions (web mercator zoom levels)." ); } diff --git a/src/main/java/com/conveyal/analysis/controllers/UserActivityController.java b/src/main/java/com/conveyal/analysis/controllers/UserActivityController.java index ac7b2ecb4..c2beffd30 100644 --- a/src/main/java/com/conveyal/analysis/controllers/UserActivityController.java +++ b/src/main/java/com/conveyal/analysis/controllers/UserActivityController.java @@ -1,15 +1,15 @@ package com.conveyal.analysis.controllers; +import com.conveyal.analysis.AnalysisServerException; import com.conveyal.analysis.UserPermissions; import com.conveyal.analysis.components.TaskScheduler; import com.conveyal.r5.analyst.progress.ApiTask; -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; +import com.conveyal.r5.analyst.progress.Task; +import com.google.common.collect.ImmutableMap; import spark.Request; import spark.Response; import spark.Service; -import java.util.ArrayList; import java.util.List; import static com.conveyal.analysis.util.JsonUtil.toJson; @@ -40,6 +40,7 @@ public UserActivityController (TaskScheduler taskScheduler) { @Override public void registerEndpoints (Service sparkService) { sparkService.get("/api/activity", this::getActivity, toJson); + sparkService.delete("/api/activity/:id", this::removeActivity, toJson); } private ResponseModel getActivity (Request req, Response res) { @@ -53,6 +54,21 @@ private ResponseModel getActivity (Request req, Response res) { return responseModel; } + private Object removeActivity (Request req, Response res) { + UserPermissions userPermissions = UserPermissions.from(req); + String id = req.params("id"); + Task task = taskScheduler.getTaskForUser(userPermissions.email, id); + // Disallow removing active tasks via the API. + if (task.state.equals(Task.State.ACTIVE)) { + throw AnalysisServerException.badRequest("Cannot clear an active task."); + } + if (taskScheduler.removeTaskForUser(userPermissions.email, id)) { + return ImmutableMap.of("message", "Successfully cleared task."); + } else { + throw AnalysisServerException.badRequest("Failed to clear task."); + } + } + /** API model used only to structure activity JSON messages sent back to UI. */ public static class ResponseModel { /** For example: "Server going down at 17:00 GMT for maintenance" or "Working to resolve known issue [link]." */ diff --git a/src/main/java/com/conveyal/analysis/datasource/derivation/AggregationAreaDerivation.java b/src/main/java/com/conveyal/analysis/datasource/derivation/AggregationAreaDerivation.java index 76ca0177f..dd16c11a1 100644 --- a/src/main/java/com/conveyal/analysis/datasource/derivation/AggregationAreaDerivation.java +++ b/src/main/java/com/conveyal/analysis/datasource/derivation/AggregationAreaDerivation.java @@ -22,8 +22,6 @@ import org.locationtech.jts.geom.Geometry; import org.locationtech.jts.operation.union.UnaryUnionOp; import org.opengis.feature.simple.SimpleFeature; -import org.opengis.referencing.FactoryException; -import org.opengis.referencing.operation.TransformException; import spark.Request; import java.io.File; @@ -37,7 +35,6 @@ import java.util.stream.Collectors; import java.util.zip.GZIPOutputStream; -import static com.conveyal.file.FileStorageFormat.GEOJSON; import static com.conveyal.file.FileStorageFormat.SHP; import static com.conveyal.r5.analyst.WebMercatorGridPointSet.parseZoom; import static com.conveyal.r5.analyst.progress.WorkProductType.AGGREGATION_AREA; @@ -175,7 +172,7 @@ private static void prefetchDataSource (String baseName, String extension, FileS public void action (ProgressListener progressListener) throws Exception { ArrayList aggregationAreas = new ArrayList<>(); - String groupDescription = "Aggregation areas from polygons"; + String groupDescription = "z" + this.zoom + ": aggregation areas"; DataGroup dataGroup = new DataGroup(userPermissions, spatialDataSource._id.toString(), groupDescription); progressListener.beginTask("Reading data source", finalFeatures.size() + 1); @@ -209,7 +206,7 @@ public void action (ProgressListener progressListener) throws Exception { maskGrid.grid[pixel.x][pixel.y] = pixel.weight * 100_000; }); - AggregationArea aggregationArea = new AggregationArea(userPermissions, name, spatialDataSource); + AggregationArea aggregationArea = new AggregationArea(userPermissions, name, spatialDataSource, zoom); try { File gridFile = FileUtils.createScratchFile("grid"); diff --git a/src/main/java/com/conveyal/analysis/models/AggregationArea.java b/src/main/java/com/conveyal/analysis/models/AggregationArea.java index a0a34be19..d2d62e89b 100644 --- a/src/main/java/com/conveyal/analysis/models/AggregationArea.java +++ b/src/main/java/com/conveyal/analysis/models/AggregationArea.java @@ -22,13 +22,16 @@ public class AggregationArea extends BaseModel { public String dataSourceId; public String dataGroupId; + public int zoom; + /** Zero-argument constructor required for Mongo automatic POJO deserialization. */ public AggregationArea () { } - public AggregationArea(UserPermissions user, String name, SpatialDataSource dataSource) { + public AggregationArea(UserPermissions user, String name, SpatialDataSource dataSource, int zoom) { super(user, name); this.regionId = dataSource.regionId; this.dataSourceId = dataSource._id.toString(); + this.zoom = zoom; } @JsonIgnore diff --git a/src/main/java/com/conveyal/analysis/models/DataSourceValidationIssue.java b/src/main/java/com/conveyal/analysis/models/DataSourceValidationIssue.java index 515abc5a9..5ad1128e8 100644 --- a/src/main/java/com/conveyal/analysis/models/DataSourceValidationIssue.java +++ b/src/main/java/com/conveyal/analysis/models/DataSourceValidationIssue.java @@ -13,6 +13,11 @@ public enum Level { ERROR, WARN, INFO } + /** Zero argument constructor required for MongoDB driver automatic POJO deserialization. */ + public DataSourceValidationIssue () { + + } + public DataSourceValidationIssue (Level level, String description) { this.level = level; this.description = description; diff --git a/src/main/java/com/conveyal/analysis/models/OpportunityDataset.java b/src/main/java/com/conveyal/analysis/models/OpportunityDataset.java index 773c4efd2..ecf66134b 100644 --- a/src/main/java/com/conveyal/analysis/models/OpportunityDataset.java +++ b/src/main/java/com/conveyal/analysis/models/OpportunityDataset.java @@ -2,12 +2,10 @@ import com.conveyal.file.FileStorageFormat; import com.conveyal.file.FileStorageKey; -import com.conveyal.r5.analyst.PointSet; import com.conveyal.r5.analyst.WebMercatorExtents; import com.fasterxml.jackson.annotation.JsonIgnore; import static com.conveyal.file.FileCategory.GRIDS; -import static com.conveyal.r5.analyst.WebMercatorGridPointSet.DEFAULT_ZOOM; /** * A model object for storing metadata about opportunity datasets in Mongo, for sharing it with the frontend. @@ -34,13 +32,18 @@ public class OpportunityDataset extends Model { public String bucketName; /** - * Bounds in web Mercator pixels. Note that no zoom level is specified here, it's fixed to a constant 9. + * Bounds in web Mercator pixels. */ public int north; public int west; public int width; public int height; + /** + * The zoom level this opportunity dataset was rasterized at. + */ + public int zoom; + /** * Total number of opportunities in the dataset, i.e. the sum of all opportunity counts at all points / grid cells. * It appears the UI doesn't use this now, but it could. We might want to remove it. @@ -103,21 +106,16 @@ public FileStorageKey getStorageKey (FileStorageFormat fileFormat) { } @JsonIgnore - public WebMercatorExtents getWebMercatorExtents () { - return new WebMercatorExtents(west, north, width, height, DEFAULT_ZOOM); - } - - @JsonIgnore - public void setWebMercatorExtents (PointSet pointset) { + public void setWebMercatorExtents (WebMercatorExtents extents) { // These bounds are currently in web Mercator pixels, which are relevant to Grids but are not natural units // for FreeformPointSets. There are only unique minimal web Mercator bounds for FreeformPointSets if - // the zoom level is fixed in OpportunityDataset (FIXME we may change this soon). + // the zoom level is fixed in OpportunityDataset. // Perhaps these metadata bounds should be WGS84 instead, it depends how the UI uses them. - WebMercatorExtents extents = pointset.getWebMercatorExtents(); this.west = extents.west; this.north = extents.north; this.width = extents.width; this.height = extents.height; + this.zoom = extents.zoom; } /** Analysis region this dataset was uploaded in. */ diff --git a/src/main/java/com/conveyal/gtfs/GTFSCache.java b/src/main/java/com/conveyal/gtfs/GTFSCache.java index 74ae4a483..6a5b27cbd 100644 --- a/src/main/java/com/conveyal/gtfs/GTFSCache.java +++ b/src/main/java/com/conveyal/gtfs/GTFSCache.java @@ -220,6 +220,12 @@ private void buildStopsIndex (String bundleScopedFeedId, STRtree tree) { final GTFSFeed feed = this.get(bundleScopedFeedId); LOG.info("{}: indexing {} stops", feed.feedId, feed.stops.size()); for (Stop stop : feed.stops.values()) { + // To match existing GTFS API, include only stop objects that have location_type 0. + // All other location_types (station, entrance, generic node, boarding area) are skipped. + // Missing (NaN) coordinates will confuse the spatial index and cascade hundreds of errors. + if (stop.location_type != 0 || !Double.isFinite(stop.stop_lat) || !Double.isFinite(stop.stop_lon)) { + continue; + } Envelope stopEnvelope = new Envelope(stop.stop_lon, stop.stop_lon, stop.stop_lat, stop.stop_lat); Point point = GeometryUtils.geometryFactory.createPoint(new Coordinate(stop.stop_lon, stop.stop_lat)); diff --git a/src/main/java/com/conveyal/gtfs/GTFSFeed.java b/src/main/java/com/conveyal/gtfs/GTFSFeed.java index 49b4840bd..f1f380601 100644 --- a/src/main/java/com/conveyal/gtfs/GTFSFeed.java +++ b/src/main/java/com/conveyal/gtfs/GTFSFeed.java @@ -20,7 +20,6 @@ import com.conveyal.gtfs.model.Transfer; import com.conveyal.gtfs.model.Trip; import com.conveyal.gtfs.validator.service.GeoUtils; -import com.conveyal.r5.analyst.progress.ProgressInputStream; import com.conveyal.r5.analyst.progress.ProgressListener; import com.google.common.collect.HashMultimap; import com.google.common.collect.Iterables; @@ -48,7 +47,6 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; -import java.nio.file.Files; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.util.ArrayList; @@ -225,20 +223,22 @@ public void loadFromFile(ZipFile zip, String fid) throws Exception { // calendars and calendar dates are joined into services. This means a lot of manipulating service objects as // they are loaded; since mapdb keys/values are immutable, load them in memory then copy them to MapDB once // we're done loading them - Map serviceTable = new HashMap<>(); - new Calendar.Loader(this, serviceTable).loadTable(zip); - new CalendarDate.Loader(this, serviceTable).loadTable(zip); - this.services.putAll(serviceTable); - // Joined Services have been persisted to MapDB. Release in-memory HashMap for garbage collection. - serviceTable = null; + { + Map serviceTable = new HashMap<>(); + new Calendar.Loader(this, serviceTable).loadTable(zip); + new CalendarDate.Loader(this, serviceTable).loadTable(zip); + this.services.putAll(serviceTable); + // Joined Services have been persisted to MapDB. In-memory HashMap goes out of scope for garbage collection. + } // Joining is performed for Fares as for Services above. - Map fares = new HashMap<>(); - new FareAttribute.Loader(this, fares).loadTable(zip); - new FareRule.Loader(this, fares).loadTable(zip); - this.fares.putAll(fares); - // Joined Fares have been persisted to MapDB. Release in-memory HashMap for garbage collection. - fares = null; + { + Map fares = new HashMap<>(); + new FareAttribute.Loader(this, fares).loadTable(zip); + new FareRule.Loader(this, fares).loadTable(zip); + this.fares.putAll(fares); + // Joined Fares have been persisted to MapDB. In-memory HashMap goes out of scope for garbage collection. + } // Comment out the StopTime and/or ShapePoint loaders for quick testing on large feeds. new Route.Loader(this).loadTable(zip); diff --git a/src/main/java/com/conveyal/gtfs/error/ForbiddenFieldError.java b/src/main/java/com/conveyal/gtfs/error/ForbiddenFieldError.java new file mode 100644 index 000000000..4f87476c3 --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/error/ForbiddenFieldError.java @@ -0,0 +1,22 @@ +package com.conveyal.gtfs.error; + +import com.conveyal.gtfs.validator.model.Priority; + +import java.io.Serializable; + +/** Indicates that a field considered forbidden is present in a GTFS feed on a particular line. */ +public class ForbiddenFieldError extends GTFSError implements Serializable { + public static final long serialVersionUID = 1L; + + public ForbiddenFieldError (String file, long line, String field) { + super(file, line, field); + } + + @Override public String getMessage() { + return String.format("A value was supplied for a forbidden column."); + } + + @Override public Priority getPriority() { + return Priority.MEDIUM; + } +} diff --git a/src/main/java/com/conveyal/gtfs/model/Calendar.java b/src/main/java/com/conveyal/gtfs/model/Calendar.java index b6d44aa56..e0f0be87f 100644 --- a/src/main/java/com/conveyal/gtfs/model/Calendar.java +++ b/src/main/java/com/conveyal/gtfs/model/Calendar.java @@ -41,7 +41,8 @@ public Loader(GTFSFeed feed, Map services) { @Override protected boolean isRequired() { - return true; + // One of calendar or calendar_dates must be present - check in more flexible modular validation later. + return false; } @Override diff --git a/src/main/java/com/conveyal/gtfs/model/CalendarDate.java b/src/main/java/com/conveyal/gtfs/model/CalendarDate.java index 550dd6467..081f4c8b3 100644 --- a/src/main/java/com/conveyal/gtfs/model/CalendarDate.java +++ b/src/main/java/com/conveyal/gtfs/model/CalendarDate.java @@ -41,6 +41,7 @@ public Loader(GTFSFeed feed, Map services) { @Override protected boolean isRequired() { + // One of calendar or calendar_dates must be present - check in more flexible modular validation later. return false; } diff --git a/src/main/java/com/conveyal/gtfs/model/Entity.java b/src/main/java/com/conveyal/gtfs/model/Entity.java index 7e3800778..c445f2a41 100644 --- a/src/main/java/com/conveyal/gtfs/model/Entity.java +++ b/src/main/java/com/conveyal/gtfs/model/Entity.java @@ -208,6 +208,7 @@ protected URL getUrlField(String column, boolean required) throws IOException { return url; } + /** @return NaN if the number is missing or cannot be parsed. */ protected double getDoubleField(String column, boolean required, double min, double max) throws IOException { String str = getFieldCheckRequired(column, required); double val = Double.NaN; diff --git a/src/main/java/com/conveyal/gtfs/model/Stop.java b/src/main/java/com/conveyal/gtfs/model/Stop.java index b4fa8dee2..580030bef 100644 --- a/src/main/java/com/conveyal/gtfs/model/Stop.java +++ b/src/main/java/com/conveyal/gtfs/model/Stop.java @@ -48,17 +48,20 @@ public void loadOneRow() throws IOException { s.stop_code = getStringField("stop_code", false); s.stop_name = getStringField("stop_name", true); s.stop_desc = getStringField("stop_desc", false); - s.stop_lat = getDoubleField("stop_lat", true, -90D, 90D); - s.stop_lon = getDoubleField("stop_lon", true, -180D, 180D); s.zone_id = getStringField("zone_id", false); s.stop_url = getUrlField("stop_url", false); - s.location_type = getIntField("location_type", false, 0, 1); - s.parent_station = getStringField("parent_station", false); + s.location_type = getIntField("location_type", false, 0, 4); + // 0...2 are stop, station, and entrance which must have lat and lon. Other nodes do not need coordinates. + boolean coord_required = s.location_type <= 2; + s.stop_lat = getDoubleField("stop_lat", coord_required, -90D, 90D); + s.stop_lon = getDoubleField("stop_lon", coord_required, -180D, 180D); + // Required for entrances, generic nodes, and boarding areas. Optional for stops, forbidden for stations. + boolean parent_station_required = s.location_type >= 2; + s.parent_station = getStringField("parent_station", parent_station_required); s.stop_timezone = getStringField("stop_timezone", false); s.wheelchair_boarding = getStringField("wheelchair_boarding", false); s.feed_id = feed.feedId; /* TODO check ref integrity later, this table self-references via parent_station */ - feed.stops.put(s.stop_id, s); } diff --git a/src/main/java/com/conveyal/gtfs/validator/PostLoadValidator.java b/src/main/java/com/conveyal/gtfs/validator/PostLoadValidator.java new file mode 100644 index 000000000..303bc3537 --- /dev/null +++ b/src/main/java/com/conveyal/gtfs/validator/PostLoadValidator.java @@ -0,0 +1,152 @@ +package com.conveyal.gtfs.validator; + +import com.conveyal.gtfs.GTFSFeed; +import com.conveyal.gtfs.error.EmptyFieldError; +import com.conveyal.gtfs.error.ForbiddenFieldError; +import com.conveyal.gtfs.error.GeneralError; +import com.conveyal.gtfs.error.RangeError; +import com.conveyal.gtfs.error.ReferentialIntegrityError; +import com.conveyal.gtfs.model.Stop; + +import java.util.List; + +/** + * Currently we perform a lot of validation while we're loading the rows out of the input GTFS feed. + * This can only catch certain categories of problems. Other problems must be found after tables are fully loaded. + * These include self-referential tables like stops (which reference other stops as parent_stations). + * This could also be expressed as a postLoadValidation method on Entity.Loader. + * + * In the original RDBMS-enabled gtfs-lib we had a lot of validation classes that would perform checks after loading. + * This included more complex semantic checks of the kind we do in r5 while building networks. We might want to + * re-import those gtfs-lib validators and adapt them to operate on MapDB only for the purposes of r5. + * However, validating a feed takes a lot of sorting and grouping of stop_times that will need to be repeated when we + * build a network. It's debatable whether we should make the user wait twice for this, as it's one of the slower steps. + */ +public class PostLoadValidator { + + private GTFSFeed feed; + + public PostLoadValidator (GTFSFeed feed) { + this.feed = feed; + } + + public void validate () { + validateCalendarServices(); + validateParentStations(); + } + + /** + * calendars and calendar_dates are the only conditionally required tables: at least one of the two must be present. + * The underlying requirement is that at least some service is defined. + */ + private void validateCalendarServices () { + if (feed.services.isEmpty()) { + feed.errors.add(new GeneralError("calendar.txt", 0, "service_id", + "Feed does not define any services in calendar or calendar_dates.")); + } + } + + /** + * Validate location_type and parent_station constraints as well as referential integrity. + * Individual validation actions like this could be factored out into separate classes (PostLoadValidators) + * but as long as we only have two or three of them they can live together as methods in this one class. + */ + private void validateParentStations() { + for (Stop stop : feed.stops.values()) { + for (ParentStationRule parentStationRule : PARENT_STATION_RULES) { + if (parentStationRule.check(stop, feed)) break; + } + } + } + + private static final String FILE = "stops.txt"; + private static final String FIELD = "parent_station"; + + /** GTFS location_type codes. */ + private enum LocationType { + STOP(0), + STATION(1), + ENTRANCE(2), + GENERIC(3), + BOARDING(4); + public final int code; + LocationType(int code) { + this.code = code; + } + } + + private enum Requirement { + OPTIONAL, REQUIRED, FORBIDDEN + } + + /** + * These rules are used specifically for validating parent_stations on stops.txt entries. + * For now we only have two kind of post-load validation, so these classes specific to one kind of validation + * are all grouped together under the PostLoadValidator class. + */ + private static class ParentStationRule { + // The location_type to which the constraint applies. + LocationType fromLocationType; + // Whether the parent_station is required, forbidden, or optional. + Requirement parentRequirement; + // If the parent_station is present and not forbidden, what location_type it must be. + LocationType parentLocationType; + + public ParentStationRule( + LocationType fromLocationType, + Requirement parentRequirement, + LocationType parentLocationType + ) { + this.fromLocationType = fromLocationType; + this.parentRequirement = parentRequirement; + this.parentLocationType = parentLocationType; + } + + /** + * Call this method on rules for each location_type in turn, enforcing the rule when the supplied stop matches. + * @return true if this rule validated the stop, or false if more rules for other location_types must be checked. + */ + public boolean check (Stop stop, GTFSFeed feed) { + // If this rule does not apply to this stop (does not apply to its location_type), do not perform checks. + if (stop.location_type != fromLocationType.code) { + return false; + } + if (stop.parent_station == null) { + if (parentRequirement == Requirement.REQUIRED) { + feed.errors.add(new EmptyFieldError(FILE, stop.sourceFileLine, FIELD)); + } + } else { + // Parent station reference was supplied. + if (parentRequirement == Requirement.FORBIDDEN) { + feed.errors.add(new ForbiddenFieldError(FILE, stop.sourceFileLine, FIELD)); + } + Stop parentStation = feed.stops.get(stop.parent_station); + if (parentStation == null) { + feed.errors.add(new ReferentialIntegrityError(FILE, stop.sourceFileLine, FIELD, stop.parent_station)); + } else { + if (parentStation.location_type != parentLocationType.code) { + feed.errors.add(new RangeError( + FILE, stop.sourceFileLine, FIELD, + parentLocationType.code, parentLocationType.code, + parentStation.location_type) + ); + } + } + } + return true; + } + } + + /** + * Representation in code of the constraints described at https://gtfs.org/schedule/reference/#stopstxt + * subsections on location_type and parent_station. + */ + private static final List PARENT_STATION_RULES = List.of( + new ParentStationRule(LocationType.STOP, Requirement.OPTIONAL, LocationType.STATION), + new ParentStationRule(LocationType.STATION, Requirement.FORBIDDEN, LocationType.STATION), + new ParentStationRule(LocationType.ENTRANCE, Requirement.REQUIRED, LocationType.STATION), + new ParentStationRule(LocationType.GENERIC, Requirement.REQUIRED, LocationType.STATION), + new ParentStationRule(LocationType.BOARDING, Requirement.REQUIRED, LocationType.STOP) + ); + +} diff --git a/src/main/java/com/conveyal/r5/analyst/scenario/IndexedPolygonCollection.java b/src/main/java/com/conveyal/r5/analyst/scenario/IndexedPolygonCollection.java index d61f6066a..7c180ff77 100644 --- a/src/main/java/com/conveyal/r5/analyst/scenario/IndexedPolygonCollection.java +++ b/src/main/java/com/conveyal/r5/analyst/scenario/IndexedPolygonCollection.java @@ -132,7 +132,7 @@ public void loadFromS3GeoJson() throws Exception { boolean indexThisFeature = true; if (id == null) { allPolygonsHaveIds = false; - } else if (!(name instanceof String)) { + } else if (!(id instanceof String)) { errors.add(String.format("Value '%s' of attribute '%s' of feature %d should be a string.", id, idAttribute, featureNumber)); indexThisFeature = false; diff --git a/src/main/java/com/conveyal/r5/common/GeometryUtils.java b/src/main/java/com/conveyal/r5/common/GeometryUtils.java index c56c38ea4..c41eef536 100644 --- a/src/main/java/com/conveyal/r5/common/GeometryUtils.java +++ b/src/main/java/com/conveyal/r5/common/GeometryUtils.java @@ -23,8 +23,8 @@ public class GeometryUtils { /** Average of polar and equatorial radii, https://en.wikipedia.org/wiki/Earth */ public static final double RADIUS_OF_EARTH_M = 6_367_450; - /** Maximum area allowed for the bounding box of an uploaded shapefile -- large enough for New York State. */ - private static final double MAX_BOUNDING_BOX_AREA_SQ_KM = 250_000; + /** Maximum area allowed for the bounding box of uploaded files -- large enough for California. */ + private static final double MAX_BOUNDING_BOX_AREA_SQ_KM = 975_000; /** * Haversine formula for distance on the sphere. We used to have a fastDistance function that would estimate this