diff --git a/examples/range_example.py b/examples/range_example.py new file mode 100644 index 00000000..4110562e --- /dev/null +++ b/examples/range_example.py @@ -0,0 +1,39 @@ +import sys +import time + +import pandana + +import numpy as np +import pandas as pd +from pympler.asizeof import asizeof + +print() +print("Loading data...") +t0 = time.time() +store = pd.HDFStore('examples/data/bayareanetwork.h5', 'r') +nodes, edges = store.nodes, store.edges +print(round(time.time()-t0, 1), ' sec.') + +print() +print("Initializing network...") +t0 = time.time() +net = pandana.Network(nodes.x, nodes.y, edges.from_int, edges.to_int, edges[['weight']]) +store.close() +print(round(time.time()-t0, 1), ' sec.') + +print() +print("Calculating nodes in 100m range...") +t0 = time.time() +r = net.nodes_in_range([53114882, 53107159], 100.0) +print(round(time.time()-t0, 1), ' sec.') + +# print(net.node_idx.values) +# print(net.node_idx.index.values) + +print(asizeof(r)) # 88.8 million bytes raw + +print() + +# dataframe.info() +# dataframe.memory_usage(deep=True) +# .set_index(['1','2'], inplace=True) \ No newline at end of file diff --git a/pandana/__init__.py b/pandana/__init__.py index 602e1d4c..41b80455 100644 --- a/pandana/__init__.py +++ b/pandana/__init__.py @@ -1,3 +1,3 @@ from .network import Network -version = __version__ = '0.6.1' +version = __version__ = '0.7.dev0' diff --git a/pandana/network.py b/pandana/network.py index 3694a779..ccdf7c16 100644 --- a/pandana/network.py +++ b/pandana/network.py @@ -24,7 +24,7 @@ def reserve_num_graphs(num): warnings.warn( "Function reserve_num_graphs() is no longer needed in Pandana 0.4+\ and will be removed in a future version", - DeprecationWarning + DeprecationWarning, ) return None @@ -65,11 +65,9 @@ class Network: """ - def __init__(self, node_x, node_y, edge_from, edge_to, edge_weights, - twoway=True): - nodes_df = pd.DataFrame({'x': node_x, 'y': node_y}) - edges_df = pd.DataFrame({'from': edge_from, 'to': edge_to}).\ - join(edge_weights) + def __init__(self, node_x, node_y, edge_from, edge_to, edge_weights, twoway=True): + nodes_df = pd.DataFrame({"x": node_x, "y": node_y}) + edges_df = pd.DataFrame({"from": edge_from, "to": edge_to}).join(edge_weights) self.nodes_df = nodes_df self.edges_df = edges_df @@ -84,19 +82,22 @@ def __init__(self, node_x, node_y, edge_from, edge_to, edge_weights, # in the c extension are actually indexes ordered from 0 to numnodes-1 # node IDs are thus translated back and forth in the python layer, # which allows non-integer node IDs as well - self.node_idx = pd.Series(np.arange(len(nodes_df), dtype="int"), - index=nodes_df.index) + self.node_idx = pd.Series( + np.arange(len(nodes_df), dtype="int"), index=nodes_df.index + ) - edges = pd.concat([self._node_indexes(edges_df["from"]), - self._node_indexes(edges_df["to"])], axis=1) + edges = pd.concat( + [self._node_indexes(edges_df["from"]), self._node_indexes(edges_df["to"])], + axis=1, + ) - self.net = cyaccess(self.node_idx.values, - nodes_df.astype('double').values, - edges.values, - edges_df[edge_weights.columns].transpose() - .astype('double') - .values, - twoway) + self.net = cyaccess( + self.node_idx.values, + nodes_df.astype("double").values, + edges.values, + edges_df[edge_weights.columns].transpose().astype("double").values, + twoway, + ) self._twoway = twoway @@ -137,11 +138,13 @@ def save_hdf5(self, filename, rm_nodes=None): def _node_indexes(self, node_ids): # for some reason, merge is must faster than .loc - df = pd.merge(pd.DataFrame({"node_ids": node_ids}), - pd.DataFrame({"node_idx": self.node_idx}), - left_on="node_ids", - right_index=True, - how="left") + df = pd.merge( + pd.DataFrame({"node_ids": node_ids}), + pd.DataFrame({"node_idx": self.node_idx}), + left_on="node_ids", + right_index=True, + how="left", + ) return df.node_idx @property @@ -164,8 +167,12 @@ def bbox(self): """ The bounding box for nodes in this network [xmin, ymin, xmax, ymax] """ - return [self.nodes_df.x.min(), self.nodes_df.y.min(), - self.nodes_df.x.max(), self.nodes_df.y.max()] + return [ + self.nodes_df.x.min(), + self.nodes_df.y.min(), + self.nodes_df.x.max(), + self.nodes_df.y.max(), + ] def shortest_path(self, node_a, node_b, imp_name=None): """ @@ -224,8 +231,11 @@ def shortest_paths(self, nodes_a, nodes_b, imp_name=None): """ if len(nodes_a) != len(nodes_b): - raise ValueError("Origin and destination counts don't match: {}, {}" - .format(len(nodes_a), len(nodes_b))) + raise ValueError( + "Origin and destination counts don't match: {}, {}".format( + len(nodes_a), len(nodes_b) + ) + ) # map to internal node indexes nodes_a_idx = self._node_indexes(pd.Series(nodes_a)).values @@ -298,8 +308,11 @@ def shortest_path_lengths(self, nodes_a, nodes_b, imp_name=None): """ if len(nodes_a) != len(nodes_b): - raise ValueError("Origin and destination counts don't match: {}, {}" - .format(len(nodes_a), len(nodes_b))) + raise ValueError( + "Origin and destination counts don't match: {}, {}".format( + len(nodes_a), len(nodes_b) + ) + ) # map to internal node indexes nodes_a_idx = self._node_indexes(pd.Series(nodes_a)).values @@ -349,23 +362,23 @@ def set(self, node_ids, variable=None, name="tmp"): """ if variable is None: variable = pd.Series(np.ones(len(node_ids)), index=node_ids.index) - - df = pd.DataFrame({name: variable, - "node_idx": self._node_indexes(node_ids)}) + df = pd.DataFrame({name: variable, "node_idx": self._node_indexes(node_ids)}) length = len(df) df = df.dropna(how="any") newl = len(df) - if length-newl > 0: + if length - newl > 0: print( - "Removed %d rows because they contain missing values" % - (length-newl)) + "Removed %d rows because they contain missing values" % (length - newl) + ) self.variable_names.add(name) - self.net.initialize_access_var(name.encode('utf-8'), - df.node_idx.values.astype('int'), - df[name].values.astype('double')) + self.net.initialize_access_var( + name.encode("utf-8"), + df.node_idx.values.astype("int"), + df[name].values.astype("double"), + ) def precompute(self, distance): """ @@ -386,19 +399,67 @@ def precompute(self, distance): """ self.net.precompute_range(distance) + def nodes_in_range(self, nodes, radius, imp_name=None): + """ + Computes the range queries (the reachable nodes within this maximum + distance) for each input node. + + Parameters + ---------- + nodes : list-like of ints + Source node IDs + radius : float + Maximum distance to use. This will usually be a distance unit in + meters however if you have customized the impedance (using the + imp_name option) this could be in other units such as utility or + time etc. + imp_name : string, optional + The impedance name to use for the aggregation on this network. + Must be one of the impedance names passed in the constructor of + this object. If not specified, there must be only one impedance + passed in the constructor, which will be used. + + Returns + ------- + d : pandas.DataFrame + Like nearest_pois, this is a dataframe containing the input node + index, the index of the nearby nodes within the search radius, + and the distance (according to the requested impedance) from the + source to the nearby node. + """ + imp_num = self._imp_name_to_num(imp_name) + imp_name = self.impedance_names[imp_num] + ext_ids = self.node_idx.index.values + + raw_result = self.net.nodes_in_range(nodes, radius, imp_num, ext_ids) + clean_result = pd.concat( + [ + pd.DataFrame(r, columns=["destination", imp_name]).assign(source=ix) + for r, ix in zip(raw_result, nodes) + ] + )[["source", "destination", imp_name]] + return ( + clean_result.drop_duplicates(subset=["source", "destination"]) + .reset_index(drop=True) + .query("{} <= {}".format(imp_name, radius)) + ) + def _imp_name_to_num(self, imp_name): if imp_name is None: - assert len(self.impedance_names) == 1,\ - "must pass impedance name if there are multiple impedances set" + assert ( + len(self.impedance_names) == 1 + ), "must pass impedance name if there are multiple impedances set" imp_name = self.impedance_names[0] - assert imp_name in self.impedance_names, "An impedance with that name" \ - "was not found" + assert imp_name in self.impedance_names, ( + "An impedance with that name" "was not found" + ) return self.impedance_names.index(imp_name) - def aggregate(self, distance, type="sum", decay="linear", imp_name=None, - name="tmp"): + def aggregate( + self, distance, type="sum", decay="linear", imp_name=None, name="tmp" + ): """ Aggregate information for every source node in the network - this is really the main purpose of this library. This allows you to touch @@ -457,23 +518,26 @@ def aggregate(self, distance, type="sum", decay="linear", imp_name=None, type = type.lower() # Resolve aliases - if type in ['ave', 'avg', 'average']: - type = 'mean' + if type in ["ave", "avg", "average"]: + type = "mean" - if type in ['stddev']: - type = 'std' + if type in ["stddev"]: + type = "std" - if type in ['med']: - type = 'median' + if type in ["med"]: + type = "median" - assert name in self.variable_names, "A variable with that name " \ - "has not yet been initialized" + assert name in self.variable_names, ( + "A variable with that name " "has not yet been initialized" + ) - res = self.net.get_all_aggregate_accessibility_variables(distance, - name.encode('utf-8'), - type.encode('utf-8'), - decay.encode('utf-8'), - imp_num) + res = self.net.get_all_aggregate_accessibility_variables( + distance, + name.encode("utf-8"), + type.encode("utf-8"), + decay.encode("utf-8"), + imp_num, + ) return pd.Series(res, index=self.node_ids) @@ -508,7 +572,7 @@ def get_node_ids(self, x_col, y_col, mapping_distance=None): If the mapping is imperfect, this function returns all the input x, y's that were successfully mapped to node_ids. """ - xys = pd.DataFrame({'x': x_col, 'y': y_col}) + xys = pd.DataFrame({"x": x_col, "y": y_col}) distances, indexes = self.kdtree.query(xys.values) indexes = np.transpose(indexes)[0] @@ -516,16 +580,22 @@ def get_node_ids(self, x_col, y_col, mapping_distance=None): node_ids = self.nodes_df.iloc[indexes].index - df = pd.DataFrame({"node_id": node_ids, "distance": distances}, - index=xys.index) + df = pd.DataFrame({"node_id": node_ids, "distance": distances}, index=xys.index) if mapping_distance is not None: df = df[df.distance <= mapping_distance] return df.node_id - def plot(self, data, bbox=None, plot_type='scatter', fig_kwargs=None, - plot_kwargs=None, cbar_kwargs=None): + def plot( + self, + data, + bbox=None, + plot_type="scatter", + fig_kwargs=None, + plot_kwargs=None, + cbar_kwargs=None, + ): """ Plot an array of data on a map using Matplotlib, automatically matching the data to the Pandana network node positions. Keyword arguments are @@ -570,8 +640,8 @@ def plot(self, data, bbox=None, plot_type='scatter', fig_kwargs=None, except (ModuleNotFoundError, RuntimeError): raise ModuleNotFoundError("Pandana's network.plot() requires Matplotlib") - fig_kwargs = fig_kwargs or {'figsize': (10, 8)} - plot_kwargs = plot_kwargs or {'cmap': 'hot_r', 's': 1} + fig_kwargs = fig_kwargs or {"figsize": (10, 8)} + plot_kwargs = plot_kwargs or {"cmap": "hot_r", "s": 1} cbar_kwargs = cbar_kwargs or {} if not bbox: @@ -579,18 +649,17 @@ def plot(self, data, bbox=None, plot_type='scatter', fig_kwargs=None, self.nodes_df.y.min(), self.nodes_df.x.min(), self.nodes_df.y.max(), - self.nodes_df.x.max()) + self.nodes_df.x.max(), + ) fig, ax = plt.subplots(**fig_kwargs) x, y = (self.nodes_df.x.values, self.nodes_df.y.values) - if plot_type == 'scatter': - plot = plt.scatter( - x, y, c=data.values, **plot_kwargs) - elif plot_type == 'hexbin': - plot = plt.hexbin( - x, y, C=data.values, **plot_kwargs) + if plot_type == "scatter": + plot = plt.scatter(x, y, c=data.values, **plot_kwargs) + elif plot_type == "hexbin": + plot = plt.hexbin(x, y, C=data.values, **plot_kwargs) colorbar = plt.colorbar(plot, **cbar_kwargs) @@ -623,11 +692,13 @@ def init_pois(self, num_categories, max_dist, max_pois): warnings.warn( "Method init_pois() is no longer needed in Pandana 0.4+ and will be removed in a \ future version; maxdist and maxitems should now be passed to set_pois()", - DeprecationWarning + DeprecationWarning, ) return None - def set_pois(self, category=None, maxdist=None, maxitems=None, x_col=None, y_col=None): + def set_pois( + self, category=None, maxdist=None, maxitems=None, x_col=None, y_col=None + ): """ Set the location of all the points of interest (POIs) of this category. The POIs are connected to the closest node in the Pandana network @@ -656,7 +727,7 @@ def set_pois(self, category=None, maxdist=None, maxitems=None, x_col=None, y_col """ # condition to check if missing arguments for keyword arguments using set_pois() from v0.3 if maxitems is None: - print('Reading parameters from init_pois()') + print("Reading parameters from init_pois()") maxitems = self.max_pois # condition to check for positional arguments in set_pois() from v0.3 @@ -665,7 +736,7 @@ def set_pois(self, category=None, maxdist=None, maxitems=None, x_col=None, y_col maxitems = self.max_pois if maxdist is None: - print('Reading parameters from init_pois()') + print("Reading parameters from init_pois()") maxdist = self.max_dist elif isinstance(maxdist, type(pd.Series())): @@ -683,10 +754,19 @@ def set_pois(self, category=None, maxdist=None, maxitems=None, x_col=None, y_col node_idx = self._node_indexes(node_ids) - self.net.initialize_category(maxdist, maxitems, category.encode('utf-8'), node_idx.values) + self.net.initialize_category( + maxdist, maxitems, category.encode("utf-8"), node_idx.values + ) - def nearest_pois(self, distance, category, num_pois=1, max_distance=None, - imp_name=None, include_poi_ids=False): + def nearest_pois( + self, + distance, + category, + num_pois=1, + max_distance=None, + imp_name=None, + include_poi_ids=False, + ): """ Find the distance to the nearest points of interest (POI)s from each source node. The bigger values in this case mean less accessibility. @@ -745,18 +825,16 @@ def nearest_pois(self, distance, category, num_pois=1, max_distance=None, imp_num = self._imp_name_to_num(imp_name) dists, poi_ids = self.net.find_all_nearest_pois( - distance, - num_pois, - category.encode('utf-8'), - imp_num) + distance, num_pois, category.encode("utf-8"), imp_num + ) dists[dists == -1] = max_distance df = pd.DataFrame(dists, index=self.node_ids) - df.columns = list(range(1, num_pois+1)) + df.columns = list(range(1, num_pois + 1)) if include_poi_ids: df2 = pd.DataFrame(poi_ids, index=self.node_ids) - df2.columns = ["poi%d" % i for i in range(1, num_pois+1)] + df2.columns = ["poi%d" % i for i in range(1, num_pois + 1)] for col in df2.columns: # if this is still all working according to plan at this point # the great magic trick is now to turn the integer position of @@ -765,7 +843,7 @@ def nearest_pois(self, distance, category, num_pois=1, max_distance=None, # initialized as a pandas.Series - this really is pandas-like # thinking. it's complicated on the inside, but quite # intuitive to the user I think - s = df2[col].astype('int') + s = df2[col].astype("int") df2[col] = self.poi_category_indexes[category].values[s] df2.loc[s == -1, col] = np.nan @@ -802,10 +880,9 @@ def low_connectivity_nodes(self, impedance, count, imp_name=None): """ # set a counter variable on all nodes - self.set(self.node_ids.to_series(), name='counter') + self.set(self.node_ids.to_series(), name="counter") # count nodes within impedance range - agg = self.aggregate( - impedance, type='count', imp_name=imp_name, name='counter') + agg = self.aggregate(impedance, type="count", imp_name=imp_name, name="counter") return np.array(agg[agg < count].index) diff --git a/pandana/tests/test_pandana.py b/pandana/tests/test_pandana.py index 1d64eb09..4e1593e1 100644 --- a/pandana/tests/test_pandana.py +++ b/pandana/tests/test_pandana.py @@ -12,17 +12,16 @@ @pytest.fixture(scope="module") def sample_osm(request): - store = pd.HDFStore( - os.path.join(os.path.dirname(__file__), 'osm_sample.h5'), "r") + store = pd.HDFStore(os.path.join(os.path.dirname(__file__), "osm_sample.h5"), "r") nodes, edges = store.nodes, store.edges - net = pdna.Network(nodes.x, nodes.y, edges["from"], edges.to, - edges[["weight"]]) + net = pdna.Network(nodes.x, nodes.y, edges["from"], edges.to, edges[["weight"]]) net.precompute(2000) def fin(): store.close() + request.addfinalizer(fin) return net @@ -31,16 +30,15 @@ def fin(): # initialize a second network @pytest.fixture(scope="module") def second_sample_osm(request): - store = pd.HDFStore( - os.path.join(os.path.dirname(__file__), 'osm_sample.h5'), "r") + store = pd.HDFStore(os.path.join(os.path.dirname(__file__), "osm_sample.h5"), "r") nodes, edges = store.nodes, store.edges - net = pdna.Network(nodes.x, nodes.y, edges["from"], edges.to, - edges[["weight"]]) + net = pdna.Network(nodes.x, nodes.y, edges["from"], edges.to, edges[["weight"]]) net.precompute(2000) def fin(): store.close() + request.addfinalizer(fin) return net @@ -91,48 +89,47 @@ def test_agg_variables_accuracy(sample_osm): assert s.iloc[0] == 50 s = net.aggregate(100000, type="AVE").loc[connected_nodes] - assert s.describe()['std'] < .01 # assert almost equal + assert s.describe()["std"] < 0.01 # assert almost equal assert_allclose(s.mean(), r.mean(), atol=1e-3) s = net.aggregate(100000, type="mean").loc[connected_nodes] - assert s.describe()['std'] < .01 # assert almost equal + assert s.describe()["std"] < 0.01 # assert almost equal assert_allclose(s.mean(), r.mean(), atol=1e-3) s = net.aggregate(100000, type="min").loc[connected_nodes] - assert s.describe()['std'] < .01 # assert almost equal + assert s.describe()["std"] < 0.01 # assert almost equal assert_allclose(s.mean(), r.min(), atol=1e-3) s = net.aggregate(100000, type="max").loc[connected_nodes] - assert s.describe()['std'] < .01 # assert almost equal + assert s.describe()["std"] < 0.01 # assert almost equal assert_allclose(s.mean(), r.max(), atol=1e-3) r.sort_values(inplace=True) s = net.aggregate(100000, type="median").loc[connected_nodes] - assert s.describe()['std'] < .01 # assert almost equal + assert s.describe()["std"] < 0.01 # assert almost equal assert_allclose(s.mean(), r.iloc[25], atol=1e-2) s = net.aggregate(100000, type="25pct").loc[connected_nodes] - assert s.describe()['std'] < .01 # assert almost equal + assert s.describe()["std"] < 0.01 # assert almost equal assert_allclose(s.mean(), r.iloc[12], atol=1e-2) s = net.aggregate(100000, type="75pct").loc[connected_nodes] - assert s.describe()['std'] < .01 # assert almost equal + assert s.describe()["std"] < 0.01 # assert almost equal assert_allclose(s.mean(), r.iloc[37], atol=1e-2) s = net.aggregate(100000, type="SUM").loc[connected_nodes] - assert s.describe()['std'] < .05 # assert almost equal + assert s.describe()["std"] < 0.05 # assert almost equal assert_allclose(s.mean(), r.sum(), atol=1e-2) s = net.aggregate(100000, type="std").loc[connected_nodes] - assert s.describe()['std'] < .01 # assert almost equal + assert s.describe()["std"] < 0.01 # assert almost equal assert_allclose(s.mean(), r.std(), atol=1e-2) def test_non_integer_nodeids(request): - store = pd.HDFStore( - os.path.join(os.path.dirname(__file__), 'osm_sample.h5'), "r") + store = pd.HDFStore(os.path.join(os.path.dirname(__file__), "osm_sample.h5"), "r") nodes, edges = store.nodes, store.edges # convert to string! @@ -140,11 +137,11 @@ def test_non_integer_nodeids(request): edges["from"] = edges["from"].astype("str") edges["to"] = edges["to"].astype("str") - net = pdna.Network(nodes.x, nodes.y, edges["from"], edges.to, - edges[["weight"]]) + net = pdna.Network(nodes.x, nodes.y, edges["from"], edges.to, edges[["weight"]]) def fin(): store.close() + request.addfinalizer(fin) # test accuracy compared to Pandas functions @@ -162,16 +159,15 @@ def test_agg_variables(sample_osm): net = sample_osm ssize = 50 - net.set(random_node_ids(sample_osm, ssize), - variable=random_data(ssize)) + net.set(random_node_ids(sample_osm, ssize), variable=random_data(ssize)) for type in net.aggregations: for decay in net.decays: for distance in [5, 10, 20]: - t = type.decode(encoding='UTF-8') - d = decay.decode(encoding='UTF-8') + t = type.decode(encoding="UTF-8") + d = decay.decode(encoding="UTF-8") s = net.aggregate(distance, type=t, decay=d) - assert s.describe()['std'] > 0 + assert s.describe()["std"] > 0 # testing w/o setting variable ssize = 50 @@ -180,30 +176,32 @@ def test_agg_variables(sample_osm): for type in net.aggregations: for decay in net.decays: for distance in [5, 10, 20]: - t = type.decode(encoding='UTF-8') - d = decay.decode(encoding='UTF-8') + t = type.decode(encoding="UTF-8") + d = decay.decode(encoding="UTF-8") s = net.aggregate(distance, type=t, decay=d) if t != "std": - assert s.describe()['std'] > 0 + assert s.describe()["std"] > 0 else: # no variance in data - assert s.describe()['std'] == 0 + assert s.describe()["std"] == 0 def test_non_float_node_values(sample_osm): net = sample_osm ssize = 50 - net.set(random_node_ids(sample_osm, ssize), - variable=(random_data(ssize)*100).astype('int')) + net.set( + random_node_ids(sample_osm, ssize), + variable=(random_data(ssize) * 100).astype("int"), + ) for type in net.aggregations: for decay in net.decays: for distance in [5, 10, 20]: - t = type.decode(encoding='UTF-8') - d = decay.decode(encoding='UTF-8') + t = type.decode(encoding="UTF-8") + d = decay.decode(encoding="UTF-8") s = net.aggregate(distance, type=t, decay=d) - assert s.describe()['std'] > 0 + assert s.describe()["std"] > 0 def test_missing_nodeid(sample_osm): @@ -238,13 +236,12 @@ def test_named_variable(sample_osm): net = sample_osm ssize = 50 - net.set(random_node_ids(sample_osm, ssize), - variable=random_data(ssize), name="foo") + net.set(random_node_ids(sample_osm, ssize), variable=random_data(ssize), name="foo") net.aggregate(500, type="sum", decay="linear", name="foo") -''' +""" def test_plot(sample_osm): net = sample_osm @@ -255,7 +252,7 @@ def test_plot(sample_osm): s = net.aggregate(500, type="sum", decay="linear") sample_osm.plot(s) -''' +""" def test_shortest_path(sample_osm): @@ -274,8 +271,8 @@ def test_shortest_paths(sample_osm): vec_paths = sample_osm.shortest_paths(nodes[0:50], nodes[50:100]) for i in range(50): - path = sample_osm.shortest_path(nodes[i], nodes[i+50]) - assert(np.array_equal(vec_paths[i], path)) + path = sample_osm.shortest_path(nodes[i], nodes[i + 50]) + assert np.array_equal(vec_paths[i], path) # check mismatched OD lists try: @@ -331,13 +328,12 @@ def test_pois(sample_osm): net = sample_osm x, y = random_x_y(sample_osm, 100) - x.index = ['lab%d' % i for i in range(len(x))] + x.index = ["lab%d" % i for i in range(len(x))] y.index = x.index net.set_pois("restaurants", 2000, 10, x, y) - d = net.nearest_pois(2000, "restaurants", num_pois=10, - include_poi_ids=True) + d = net.nearest_pois(2000, "restaurants", num_pois=10, include_poi_ids=True) def test_pois2(second_sample_osm): @@ -384,6 +380,7 @@ def test_pois_pandana3_pos_args(second_sample_osm): net2.nearest_pois(2000, "restaurants", num_pois=10) + # test items are sorted @@ -407,12 +404,14 @@ def test_repeat_pois(sample_osm): net = sample_osm def get_nearest_nodes(x, y, x2=None, y2=None, n=2): - coords_dict = [{'x': x, 'y': y, 'var': 1} for i in range(2)] + coords_dict = [{"x": x, "y": y, "var": 1} for i in range(2)] if x2 and y2: - coords_dict.append({'x': x2, 'y': y2, 'var': 1}) + coords_dict.append({"x": x2, "y": y2, "var": 1}) df = pd.DataFrame(coords_dict) - sample_osm.set_pois("restaurants", 2000, 10, df['x'], df['y']) - res = sample_osm.nearest_pois(2000, "restaurants", num_pois=5, include_poi_ids=True) + sample_osm.set_pois("restaurants", 2000, 10, df["x"], df["y"]) + res = sample_osm.nearest_pois( + 2000, "restaurants", num_pois=5, include_poi_ids=True + ) return res # these are the min-max values of the network @@ -426,8 +425,45 @@ def get_nearest_nodes(x, y, x2=None, y2=None, n=2): assert test1.equals(test3) test4 = get_nearest_nodes(-122.31, 47.60, -122.32, 47.61, n=3) - assert_allclose(test4.loc[53114882], [7, 13, 13, 2000, 2000, 2, 0, 1, np.nan, np.nan]) - assert_allclose(test4.loc[53114880], [6, 14, 14, 2000, 2000, 2, 0, 1, np.nan, np.nan]) + assert_allclose( + test4.loc[53114882], [7, 13, 13, 2000, 2000, 2, 0, 1, np.nan, np.nan] + ) + assert_allclose( + test4.loc[53114880], [6, 14, 14, 2000, 2000, 2, 0, 1, np.nan, np.nan] + ) assert_allclose( test4.loc[53227769], - [2000, 2000, 2000, 2000, 2000, np.nan, np.nan, np.nan, np.nan, np.nan]) + [2000, 2000, 2000, 2000, 2000, np.nan, np.nan, np.nan, np.nan, np.nan], + ) + + +def test_nodes_in_range(sample_osm): + net = sample_osm + + np.random.seed(0) + ssize = 10 + x, y = random_x_y(net, 10) + snaps = net.get_node_ids(x, y) + + test1 = net.nodes_in_range(snaps, 1) + net.precompute(10) + test5 = net.nodes_in_range(snaps, 5) + test11 = net.nodes_in_range(snaps, 11) + assert test1.weight.max() == 1 + assert test5.weight.max() == 5 + assert test11.weight.max() == 11 + + focus_id = snaps[0] + all_distances = net.shortest_path_lengths( + [focus_id] * len(net.node_ids), net.node_ids + ) + all_distances = np.asarray(all_distances) + assert (all_distances <= 1).sum() == len( + test1.query("source == {}".format(focus_id)) + ) + assert (all_distances <= 5).sum() == len( + test5.query("source == {}".format(focus_id)) + ) + assert (all_distances <= 11).sum() == len( + test11.query("source == {}".format(focus_id)) + ) diff --git a/setup.py b/setup.py index 35d6d506..7d0b4104 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,8 @@ import sys import sysconfig -from setuptools import find_packages, setup, Extension +from setuptools import find_packages +from distutils.core import setup, Extension from setuptools.command.test import test as TestCommand from setuptools.command.build_ext import build_ext @@ -12,8 +13,9 @@ ## Invoking tests ############################################### + class PyTest(TestCommand): - user_options = [('pytest-args=', 'a', "Arguments to pass to py.test")] + user_options = [("pytest-args=", "a", "Arguments to pass to py.test")] def initialize_options(self): TestCommand.initialize_options(self) @@ -27,21 +29,26 @@ def finalize_options(self): def run_tests(self): # import here, cause outside the eggs aren't loaded import pytest - errno = pytest.main(self.pytest_args or ['']) + + errno = pytest.main(self.pytest_args or [""]) sys.exit(errno) class Lint(TestCommand): def run(self): - os.system("cpplint --filter=-build/include_subdir,-legal/copyright,-runtime/references,-runtime/int src/accessibility.* src/graphalg.*") + os.system( + "cpplint --filter=-build/include_subdir,-legal/copyright,-runtime/references,-runtime/int src/accessibility.* src/graphalg.*" + ) os.system("pycodestyle src/cyaccess.pyx") os.system("pycodestyle pandana") class CustomBuildExtCommand(build_ext): """build_ext command for use when numpy headers are needed.""" + def run(self): import numpy as np + self.include_dirs.append(np.get_include()) build_ext.run(self) @@ -50,121 +57,131 @@ def run(self): ## Building the C++ extension ############################################### -extra_compile_args = ['-w', '-std=c++11', '-O3'] +extra_compile_args = ["-w", "-std=c++11", "-O3"] extra_link_args = [] # Mac compilation: flags are for the llvm compilers included with recent # versions of Xcode Command Line Tools, or newer versions installed separately -if sys.platform.startswith('darwin'): # Mac +if sys.platform.startswith("darwin"): # Mac + + extra_compile_args += ["-stdlib=libc++"] + extra_link_args += ["-stdlib=libc++"] - extra_compile_args += ['-stdlib=libc++'] - extra_link_args += ['-stdlib=libc++'] - # The default compiler that ships with Macs doesn't support OpenMP multi- # threading. We recommend using the Conda toolchain instead, but will also # try to detect if people are using another alternative like Homebrew. - if 'CC' in os.environ: - extra_compile_args += ['-fopenmp'] - print('Attempting Pandana compilation with OpenMP multi-threading ' - 'support, with user-specified compiler:\n{}'.format( - os.environ['CC'])) + if "CC" in os.environ: + extra_compile_args += ["-fopenmp"] + print( + "Attempting Pandana compilation with OpenMP multi-threading " + "support, with user-specified compiler:\n{}".format(os.environ["CC"]) + ) # Otherwise, if the default clang has been replaced but nothing specified # in the 'CC' environment variable, assume they've followed our instructions # for using the Conda toolchain. - - elif os.popen('which clang').read().strip() != '/usr/bin/clang': - cc = 'clang' - cc_catalina = 'clang --sysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk' - - extra_compile_args += ['-fopenmp'] - print('Attempting Pandana compilation with OpenMP multi-threading ' - 'support, with the following compiler:\n{}'.format( - os.popen('which clang').read())) - - if '10.15' in os.popen('sw_vers').read(): - os.environ['CC'] = cc_catalina - elif '11.' in os.popen('sw_vers').read(): - os.environ['CC'] = cc_catalina + + elif os.popen("which clang").read().strip() != "/usr/bin/clang": + cc = "clang" + cc_catalina = ( + "clang --sysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk" + ) + + extra_compile_args += ["-fopenmp"] + print( + "Attempting Pandana compilation with OpenMP multi-threading " + "support, with the following compiler:\n{}".format( + os.popen("which clang").read() + ) + ) + + if "10.15" in os.popen("sw_vers").read(): + os.environ["CC"] = cc_catalina + elif "11." in os.popen("sw_vers").read(): + os.environ["CC"] = cc_catalina else: - os.environ['CC'] = cc + os.environ["CC"] = cc else: - print('Attempting Pandana compilation without support for ' - 'multi-threading. See installation instructions for alternative ' - 'options') + print( + "Attempting Pandana compilation without support for " + "multi-threading. See installation instructions for alternative " + "options" + ) # Window compilation: flags are for Visual C++ -elif sys.platform.startswith('win'): # Windows - extra_compile_args = ['/w', '/openmp'] +elif sys.platform.startswith("win"): # Windows + extra_compile_args = ["/w", "/openmp"] # Linux compilation: flags are for gcc 4.8 and later else: # Linux - extra_compile_args += ['-fopenmp'] - extra_link_args += ['-lgomp'] + extra_compile_args += ["-fopenmp"] + extra_link_args += ["-lgomp"] cyaccess = Extension( - name='pandana.cyaccess', - sources=[ - 'src/accessibility.cpp', - 'src/graphalg.cpp', - 'src/cyaccess.pyx', - 'src/contraction_hierarchies/src/libch.cpp'], - language='c++', - include_dirs=['.'], - extra_compile_args=extra_compile_args, - extra_link_args=extra_link_args) + name="pandana.cyaccess", + sources=[ + "src/accessibility.cpp", + "src/graphalg.cpp", + "src/cyaccess.pyx", + "src/contraction_hierarchies/src/libch.cpp", + ], + language="c++", + include_dirs=["."], + extra_compile_args=extra_compile_args, + extra_link_args=extra_link_args, +) ############################################### ## Standard setup ############################################### -version = '0.6.1' +version = "0.7.dev0" packages = find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]) setup( packages=packages, - name='pandana', - author='UrbanSim Inc.', + name="pandana", + author="UrbanSim Inc.", version=version, - license='AGPL', - description=('Python library for network analysis'), + license="AGPL", + description=("Python library for network analysis"), long_description=( - 'Pandana is a Python library for network analysis that uses ' - 'contraction hierarchies to calculate super-fast travel ' - 'accessibility metrics and shortest paths. The numerical ' - 'code is in C++.'), - url='https://udst.github.io/pandana/', + "Pandana is a Python library for network analysis that uses " + "contraction hierarchies to calculate super-fast travel " + "accessibility metrics and shortest paths. The numerical " + "code is in C++." + ), + url="https://udst.github.io/pandana/", ext_modules=[cyaccess], - python_requires = '>=3.5', install_requires=[ - 'cython >=0.25.2', - 'numpy >=1.8', - 'pandas >=0.17', - 'requests >=2.0', - 'scikit-learn >=0.18', + "cython >=0.25.2", + "numpy >=1.8", + "pandas >=0.17", + "requests >=2.0", + "scikit-learn >=0.18", 'tables >=3.1, <3.6; python_version <"3.6"', - 'tables >=3.1, <3.7; python_version >="3.6"' + 'tables >=3.1, <3.7; python_version >="3.6"', ], cmdclass={ - 'test': PyTest, - 'lint': Lint, - 'build_ext': CustomBuildExtCommand, + "test": PyTest, + "lint": Lint, + "build_ext": CustomBuildExtCommand, }, classifiers=[ - 'Development Status :: 4 - Beta', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'License :: OSI Approved :: GNU Affero General Public License v3' + "Development Status :: 4 - Beta", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "License :: OSI Approved :: GNU Affero General Public License v3", ], ) diff --git a/src/accessibility.cpp b/src/accessibility.cpp index 5c3ec2bd..5aa86b5a 100644 --- a/src/accessibility.cpp +++ b/src/accessibility.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "graphalg.h" @@ -81,6 +82,48 @@ Accessibility::precomputeRangeQueries(float radius) { } +vector>> +Accessibility::Range(vector srcnodes, float radius, int graphno, + vector ext_ids) { + + // Set up a mapping between the external node ids and internal ones + std::unordered_map int_ids(ext_ids.size()); + for (int i = 0; i < ext_ids.size(); i++) { + int_ids.insert(pair(ext_ids[i], i)); + } + + // use cached results if available + vector dists(srcnodes.size()); + if (dmsradius > 0 && radius <= dmsradius) { + for (int i = 0; i < srcnodes.size(); i++) { + dists[i] = dms[graphno][int_ids[srcnodes[i]]]; + } + } + else { + #pragma omp parallel + #pragma omp for schedule(guided) + for (int i = 0; i < srcnodes.size(); i++) { + ga[graphno]->Range(int_ids[srcnodes[i]], radius, + omp_get_thread_num(), dists[i]); + } + } + + // todo: check that results are returned from cache correctly + // todo: check that performing an aggregation creates cache + + // Convert back to external node ids + vector>> output(dists.size()); + for (int i = 0; i < dists.size(); i++) { + output[i].resize(dists[i].size()); + for (int j = 0; j < dists[i].size(); j++) { + output[i][j] = std::make_pair(ext_ids[dists[i][j].first], + dists[i][j].second); + } + } + return output; +} + + vector Accessibility::Route(int src, int tgt, int graphno) { vector ret = this->ga[graphno]->Route(src, tgt); diff --git a/src/accessibility.h b/src/accessibility.h index de76aab5..c5e0ad71 100644 --- a/src/accessibility.h +++ b/src/accessibility.h @@ -45,15 +45,16 @@ class Accessibility { string decay, int graphno = 0); - // get nodes with the range - DistanceVec Range(int srcnode, float radius, int graphno = 0); + // get nodes with a range for a specific list of source nodes + vector>> Range(vector srcnodes, float radius, + int graphno, vector ext_ids); // shortest path between two points vector Route(int src, int tgt, int graphno = 0); // shortest path between list of origins and destinations vector> Routes(vector sources, vector targets, - int graphno = 0); + int graphno = 0); // shortest path distance between two points double Distance(int src, int tgt, int graphno = 0); diff --git a/src/cyaccess.pyx b/src/cyaccess.pyx index a4f6c617..af637647 100644 --- a/src/cyaccess.pyx +++ b/src/cyaccess.pyx @@ -1,3 +1,5 @@ +#cython: language_level=3 + cimport cython from libcpp cimport bool from libcpp.vector cimport vector @@ -27,6 +29,7 @@ cdef extern from "accessibility.h" namespace "MTC::accessibility": vector[vector[int]] Routes(vector[long], vector[long], int) double Distance(int, int, int) vector[double] Distances(vector[long], vector[long], int) + vector[vector[pair[long, float]]] Range(vector[long], float, int, vector[long]) void precomputeRangeQueries(double) @@ -191,6 +194,16 @@ cdef class cyaccess: impno - impedance id """ return self.access.Distances(srcnodes, destnodes, impno) - + def precompute_range(self, double radius): self.access.precomputeRangeQueries(radius) + + def nodes_in_range(self, vector[long] srcnodes, float radius, int impno, + np.ndarray[long] ext_ids): + """ + srcnodes - node ids of origins + radius - maximum range in which to search for nearby nodes + impno - the impedance id to use + ext_ids - all node ids in the network + """ + return self.access.Range(srcnodes, radius, impno, ext_ids)