Skip to content

Commit

Permalink
Merge branch 'dev' into force-sidewalk-use
Browse files Browse the repository at this point in the history
  • Loading branch information
abyrd committed Oct 3, 2024
2 parents 1710107 + 68c6772 commit 16f2b03
Show file tree
Hide file tree
Showing 10 changed files with 126 additions and 96 deletions.
3 changes: 2 additions & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ java {
toolchain {
languageVersion.set(JavaLanguageVersion.of(21))
}
withSourcesJar()
}

jar {
Expand Down Expand Up @@ -164,7 +165,7 @@ dependencies {
}

// Database driver.
implementation 'org.mongodb:mongo-java-driver:3.11.0'
implementation 'org.mongodb:mongodb-driver-legacy:5.2.0'

// Legacy system for storing Java objects, this functionality is now provided by the MongoDB driver itself.
implementation 'org.mongojack:mongojack:2.10.1'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import com.conveyal.file.FileUtils;
import com.conveyal.gtfs.GTFSCache;
import com.conveyal.gtfs.GTFSFeed;
import com.conveyal.gtfs.error.GTFSError;
import com.conveyal.gtfs.error.GeneralError;
import com.conveyal.gtfs.model.Stop;
import com.conveyal.gtfs.validator.PostLoadValidator;
Expand Down Expand Up @@ -81,6 +80,7 @@ public BundleController (BackendComponents components) {
public void registerEndpoints (Service sparkService) {
sparkService.path("/api/bundle", () -> {
sparkService.get("", this::getBundles, toJson);
sparkService.get("/:_id/config", this::getBundleConfig, toJson);
sparkService.get("/:_id", this::getBundle, toJson);
sparkService.post("", this::create, toJson);
sparkService.put("/:_id", this::update, toJson);
Expand Down Expand Up @@ -110,15 +110,13 @@ private Bundle create (Request req, Response res) {
try {
bundle.name = files.get("bundleName").get(0).getString("UTF-8");
bundle.regionId = files.get("regionId").get(0).getString("UTF-8");

if (files.get("osmId") != null) {
bundle.osmId = files.get("osmId").get(0).getString("UTF-8");
Bundle bundleWithOsm = Persistence.bundles.find(QueryBuilder.start("osmId").is(bundle.osmId).get()).next();
if (bundleWithOsm == null) {
throw AnalysisServerException.badRequest("Selected OSM does not exist.");
}
}

if (files.get("feedGroupId") != null) {
bundle.feedGroupId = files.get("feedGroupId").get(0).getString("UTF-8");
Bundle bundleWithFeed = Persistence.bundles.find(QueryBuilder.start("feedGroupId").is(bundle.feedGroupId).get()).next();
Expand All @@ -135,6 +133,13 @@ private Bundle create (Request req, Response res) {
bundle.feedsComplete = bundleWithFeed.feedsComplete;
bundle.totalFeeds = bundleWithFeed.totalFeeds;
}
if (files.get("config") != null) {
// Validation by deserializing into a model class instance. Unknown fields are ignored to
// allow sending config to custom or experimental workers with features unknown to the backend.
// The fields specifying OSM and GTFS IDs are not expected here. They will be ignored and overwritten.
String configString = files.get("config").get(0).getString();
bundle.config = JsonUtil.objectMapper.readValue(configString, TransportNetworkConfig.class);
}
UserPermissions userPermissions = UserPermissions.from(req);
bundle.accessGroup = userPermissions.accessGroup;
bundle.createdBy = userPermissions.email;
Expand Down Expand Up @@ -274,15 +279,19 @@ private Bundle create (Request req, Response res) {
return bundle;
}

/** SIDE EFFECTS: This method will change the field bundle.config before writing it. */
private void writeNetworkConfigToCache (Bundle bundle) throws IOException {
TransportNetworkConfig networkConfig = new TransportNetworkConfig();
networkConfig.osmId = bundle.osmId;
networkConfig.gtfsIds = bundle.feeds.stream().map(f -> f.bundleScopedFeedId).collect(Collectors.toList());

// If the user specified additional network configuration options, they should already be in bundle.config.
// If no custom options were specified, we start with a fresh, empty instance.
if (bundle.config == null) {
bundle.config = new TransportNetworkConfig();
}
// This will overwrite and override any inconsistent osm and gtfs IDs that were mistakenly supplied by the user.
bundle.config.osmId = bundle.osmId;
bundle.config.gtfsIds = bundle.feeds.stream().map(f -> f.bundleScopedFeedId).collect(Collectors.toList());
String configFileName = bundle._id + ".json";
File configFile = FileUtils.createScratchFile("json");
JsonUtil.objectMapper.writeValue(configFile, networkConfig);

JsonUtil.objectMapper.writeValue(configFile, bundle.config);
FileStorageKey key = new FileStorageKey(BUNDLES, configFileName);
fileStorage.moveIntoStorage(key, configFile);
}
Expand Down Expand Up @@ -312,6 +321,27 @@ private Bundle getBundle (Request req, Response res) {
return bundle;
}

/**
* There are two copies of the Bundle/Network config: one in the Bundle entry in the database and one in a JSON
* file (obtainable by the workers). This method always reads the one in the file, which has been around longer
* and is considered the definitive source of truth. The entry in the database is a newer addition and has only
* been around since September 2024.
*/
private TransportNetworkConfig getBundleConfig (Request request, Response res) {
// Unfortunately this mimics logic in TransportNetworkCache. Deduplicate in a static utility method?
String id = GTFSCache.cleanId(request.params("_id"));
FileStorageKey key = new FileStorageKey(BUNDLES, id, "json");
File networkConfigFile = fileStorage.getFile(key);
// Unlike in the worker, we expect the backend to have a model field for every known network/bundle option.
// Threfore, use the default objectMapper that does not tolerate unknown fields.
try {
return JsonUtil.objectMapper.readValue(networkConfigFile, TransportNetworkConfig.class);
} catch (Exception exception) {
LOG.error("Exception deserializing stored network config", exception);
return null;
}
}

private Collection<Bundle> getBundles (Request req, Response res) {
return Persistence.bundles.findPermittedForQuery(req);
}
Expand Down
6 changes: 6 additions & 0 deletions src/main/java/com/conveyal/analysis/models/Bundle.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import com.conveyal.gtfs.error.GTFSError;
import com.conveyal.gtfs.model.FeedInfo;
import com.conveyal.gtfs.validator.model.Priority;
import com.conveyal.r5.analyst.cluster.TransportNetworkConfig;
import com.fasterxml.jackson.annotation.JsonIgnore;

import java.time.LocalDate;
Expand Down Expand Up @@ -47,6 +48,11 @@ public class Bundle extends Model implements Cloneable {
public int feedsComplete;
public int totalFeeds;

// The definitive TransportNetworkConfig is a JSON file stored alongside the feeds in file storage. It is
// duplicated here to record any additional user-specified options that were supplied when the bundle was created.
// It may contain redundant copies of information stored in the outer level Bundle such as OSM and GTFS feed IDs.
public TransportNetworkConfig config;

public static String bundleScopeFeedId (String feedId, String feedGroupId) {
return String.format("%s_%s", feedId, feedGroupId);
}
Expand Down
4 changes: 3 additions & 1 deletion src/main/java/com/conveyal/r5/analyst/NetworkPreloader.java
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,13 @@ protected TransportNetwork buildValue(Key key) {

// Get the set of points to which we are measuring travel time. Any smaller sub-grids created here will
// reference the scenarioNetwork's built-in full-extent pointset, so can reuse its linkage.
// TODO handle multiple destination grids.
// FIXME handle multiple destination grids.

if (key.destinationGridExtents == null) {
// Special (and ideally temporary) case for regional freeform destinations, where there is no grid to link.
// The null destinationGridExtents are created by the WebMercatorExtents#forPointsets else clause.
// FIXME there is no grid to link, but there are points and egress tables to make!
// see com.conveyal.r5.analyst.cluster.AnalysisWorkerTask.loadAndValidateDestinationPointSets
return scenarioNetwork;
}

Expand Down
14 changes: 7 additions & 7 deletions src/main/java/com/conveyal/r5/analyst/cluster/ScenarioCache.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@
* scenarios from the backend instead of from S3.
*
* TODO merge this with TransportNetworkCache#resolveScenario into a single multi-level mem/disk/s3 cache.
* Note that this cache is going to just grow indefinitely in size as a worker receives many iterations of the same
* scenario - that could be a memory leak. Again multi level caching could releive those worries.
* It's debatable whether we should be hanging on to scenarios passed with single point requests becuase they may never
* be used again.
* This cache grows in size without bound as a worker receives many iterations of the same scenario.
* This is technically a sort of memory leak for long-lived workers. Multi-level caching could relieve those worries.
* However, this cache stores only the Scenarios and Modifications, not any large egress tables or linkages.
*
* It's debatable whether we should be hanging on to scenarios passed with single point requests,
* because they may never be used again.
* Should we just always require a single point task to be sent to the cluster before a regional?
* That would not ensure the scenario was present on all workers though.
*
* Created by abyrd on 2018-10-29
*/
public class ScenarioCache {

Expand All @@ -44,7 +44,7 @@ public class ScenarioCache {
public synchronized void storeScenario (Scenario scenario) {
Scenario existingScenario = scenariosById.put(scenario.id, scenario);
if (existingScenario != null) {
LOG.debug("Scenario cache already contained a this scenario.");
LOG.debug("Scenario cache already contained this scenario.");
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ public class WorkerStatus {
public String workerVersion;
public String workerId;
public Set<String> networks = new HashSet<>();
public Set<String> scenarios = new HashSet<>();
public double secondsSinceLastPoll;
public Map<String, Integer> tasksPerMinuteByJobId;
@JsonUnwrapped(prefix = "ec2")
Expand Down Expand Up @@ -86,7 +85,6 @@ public WorkerStatus (AnalysisWorker worker) {
// networks = worker.networkPreloader.transportNetworkCache.getLoadedNetworkIds();
// For now we report a single network, even before it's loaded.
networks = Sets.newHashSet(worker.networkId);
scenarios = worker.networkPreloader.transportNetworkCache.getAppliedScenarios();
ec2 = worker.ec2info;

OperatingSystemMXBean operatingSystemMXBean = ManagementFactory.getOperatingSystemMXBean();
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/com/conveyal/r5/streets/StreetRouter.java
Original file line number Diff line number Diff line change
Expand Up @@ -742,7 +742,7 @@ public int getTravelTimeToVertex (int vertexIndex) {
* fragments from the vertices at either end of the edge up to the destination split point.
* If no states can be produced return null.
*
* Note that this is only used by the point to point street router, not by LinkedPointSets (which have equivalent
* NOTE that this is ONLY USED BY the point to point street router, NOT BY LinkedPointSets (which have equivalent
* logic in their eval method). The PointSet implementation only needs to produce times, not States. But ideally
* some common logic can be factored out.
*/
Expand Down
10 changes: 3 additions & 7 deletions src/main/java/com/conveyal/r5/transit/TransportNetwork.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,10 @@ public class TransportNetwork implements Serializable {
public TransitLayer transitLayer;

/**
* This stores any number of lightweight scenario networks built upon the current base network.
* FIXME that sounds like a memory leak, should be a WeighingCache or at least size-limited.
* A single network cache at the top level could store base networks and scenarios since they all have globally
* unique IDs. A hierarchical cache does have the advantage of evicting all the scenarios with the associated
* base network, which keeps the references in the scenarios from holding on to the base network. But considering
* that we have never started evicting networks (other than for a "cache" of one element) this might be getting
* ahead of ourselves.
* This field is no longer used. It has been moved to TransportNetworkCache, but this one remains for now, to
* avoid any inadvertent incompatibilities with serialized network files or serialization library settings.
*/
@Deprecated
public transient Map<String, TransportNetwork> scenarios = new HashMap<>();

/**
Expand Down
Loading

0 comments on commit 16f2b03

Please sign in to comment.