From 657406ab5721714edd77181fede0d320cba62149 Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Mon, 4 Jan 2021 17:03:10 -0800 Subject: [PATCH 01/13] Versioning --- pandana/__init__.py | 2 +- setup.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandana/__init__.py b/pandana/__init__.py index 602e1d4c..41b80455 100644 --- a/pandana/__init__.py +++ b/pandana/__init__.py @@ -1,3 +1,3 @@ from .network import Network -version = __version__ = '0.6.1' +version = __version__ = '0.7.dev0' diff --git a/setup.py b/setup.py index 35d6d506..866669f4 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,8 @@ import sys import sysconfig -from setuptools import find_packages, setup, Extension +from setuptools import find_packages +from distutils.core import setup, Extension from setuptools.command.test import test as TestCommand from setuptools.command.build_ext import build_ext @@ -57,7 +58,7 @@ def run(self): # versions of Xcode Command Line Tools, or newer versions installed separately if sys.platform.startswith('darwin'): # Mac - + extra_compile_args += ['-stdlib=libc++'] extra_link_args += ['-stdlib=libc++'] @@ -125,7 +126,7 @@ def run(self): ## Standard setup ############################################### -version = '0.6.1' +version = '0.7.dev0' packages = find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]) @@ -143,7 +144,6 @@ def run(self): 'code is in C++.'), url='https://udst.github.io/pandana/', ext_modules=[cyaccess], - python_requires = '>=3.5', install_requires=[ 'cython >=0.25.2', 'numpy >=1.8', @@ -160,11 +160,11 @@ def run(self): }, classifiers=[ 'Development Status :: 4 - Beta', + 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', 'License :: OSI Approved :: GNU Affero General Public License v3' ], ) From c50bdc16db646b63652a6aaac755b3844a435ae2 Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Tue, 5 Jan 2021 14:45:24 -0800 Subject: [PATCH 02/13] Initial range query --- examples/range_example.py | 40 +++++++++++++++++++++++++++++++++++++++ pandana/network.py | 9 +++++++++ src/accessibility.cpp | 19 +++++++++++++++++++ src/accessibility.h | 2 +- src/cyaccess.pyx | 6 ++++++ 5 files changed, 75 insertions(+), 1 deletion(-) create mode 100644 examples/range_example.py diff --git a/examples/range_example.py b/examples/range_example.py new file mode 100644 index 00000000..ac85d23f --- /dev/null +++ b/examples/range_example.py @@ -0,0 +1,40 @@ +from __future__ import print_function + +import os.path +import sys +import time + +import numpy as np +import pandas as pd +import pandana.network as pdna + +if len(sys.argv) > 1: + # allow test file to be passed as an argument + storef = sys.argv[1] +else: + # if no argument provided look for it in the test data + storef = os.path.normpath(os.path.join( + os.path.dirname(os.path.abspath(__file__)), + '../pandana/tests/osm_sample.h5')) + +if not os.path.isfile(storef): + raise IOError('Could not find test input file: {!r}'.format(storef)) + +print('Building network from file: {!r}'.format(storef)) + +store = pd.HDFStore(storef, "r") +nodes, edges = store.nodes, store.edges +net = pdna.Network(nodes.x, nodes.y, edges["from"], edges.to, + edges[["weight"]]) +store.close() +print() + +# Demonstrate "nodes in range" code - the largest connected subgraph here has 477 nodes, +# per the unit tests + +net.set(pd.Series(net.node_ids)) +s = net.aggregate(10000, type='count') +connected_nodes = s[s==477] + +print(net.nodes_in_range(1, 500)) + diff --git a/pandana/network.py b/pandana/network.py index 3694a779..ab24ab24 100644 --- a/pandana/network.py +++ b/pandana/network.py @@ -386,6 +386,15 @@ def precompute(self, distance): """ self.net.precompute_range(distance) + def nodes_in_range(self, node, radius, imp_name=None): + """ + """ + # NEED TO MAP TO INTERNAL NODE INDEX + + imp_num = self._imp_name_to_num(imp_name) + + return self.net.nodes_in_range(node, radius, imp_num) + def _imp_name_to_num(self, imp_name): if imp_name is None: assert len(self.impedance_names) == 1,\ diff --git a/src/accessibility.cpp b/src/accessibility.cpp index 5c3ec2bd..cdb2ee54 100644 --- a/src/accessibility.cpp +++ b/src/accessibility.cpp @@ -81,6 +81,25 @@ Accessibility::precomputeRangeQueries(float radius) { } +vector> +Accessibility::Range(int srcnode, float radius, int graphno) { + DistanceVec tmp; + DistanceVec &distances = tmp; + + // use cached results if available + if (dmsradius > 0 && radius <= dmsradius) { + distances = dms[graphno][srcnode]; + } else { + ga[graphno]->Range( + srcnode, + radius, + omp_get_thread_num(), + tmp); + } + return vector> (tmp.begin(), tmp.end()); +} + + vector Accessibility::Route(int src, int tgt, int graphno) { vector ret = this->ga[graphno]->Route(src, tgt); diff --git a/src/accessibility.h b/src/accessibility.h index de76aab5..a306872a 100644 --- a/src/accessibility.h +++ b/src/accessibility.h @@ -46,7 +46,7 @@ class Accessibility { int graphno = 0); // get nodes with the range - DistanceVec Range(int srcnode, float radius, int graphno = 0); + vector> Range(int srcnode, float radius, int graphno = 0); // shortest path between two points vector Route(int src, int tgt, int graphno = 0); diff --git a/src/cyaccess.pyx b/src/cyaccess.pyx index a4f6c617..8b2a63be 100644 --- a/src/cyaccess.pyx +++ b/src/cyaccess.pyx @@ -27,6 +27,7 @@ cdef extern from "accessibility.h" namespace "MTC::accessibility": vector[vector[int]] Routes(vector[long], vector[long], int) double Distance(int, int, int) vector[double] Distances(vector[long], vector[long], int) + vector[pair[int, float]] Range(int, float, int) void precomputeRangeQueries(double) @@ -192,5 +193,10 @@ cdef class cyaccess: """ return self.access.Distances(srcnodes, destnodes, impno) + def nodes_in_range(self, int srcnode, float radius, int impno=0): + """ + """ + return self.access.Range(srcnode, radius, impno) + def precompute_range(self, double radius): self.access.precomputeRangeQueries(radius) From abbe12086ac080fb05fa863123549e2cdcaebe42 Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Thu, 7 Jan 2021 11:31:01 -0800 Subject: [PATCH 03/13] Cython language_level --- pandana/network.py | 6 +++--- src/cyaccess.pyx | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandana/network.py b/pandana/network.py index ab24ab24..0e3ce7c2 100644 --- a/pandana/network.py +++ b/pandana/network.py @@ -390,11 +390,11 @@ def nodes_in_range(self, node, radius, imp_name=None): """ """ # NEED TO MAP TO INTERNAL NODE INDEX - + imp_num = self._imp_name_to_num(imp_name) - + return self.net.nodes_in_range(node, radius, imp_num) - + def _imp_name_to_num(self, imp_name): if imp_name is None: assert len(self.impedance_names) == 1,\ diff --git a/src/cyaccess.pyx b/src/cyaccess.pyx index 8b2a63be..ee29e87b 100644 --- a/src/cyaccess.pyx +++ b/src/cyaccess.pyx @@ -1,3 +1,5 @@ +#cython: language_level=3 + cimport cython from libcpp cimport bool from libcpp.vector cimport vector From bd76d5bf66ea35c8cf688daee1b4362059fbc905 Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Thu, 7 Jan 2021 14:50:17 -0800 Subject: [PATCH 04/13] Node id conversion in c++ --- examples/range_example.py | 10 +++++----- pandana/network.py | 5 ++--- src/accessibility.cpp | 27 ++++++++++++++++++++++----- src/accessibility.h | 5 +++-- src/cyaccess.pyx | 13 +++++++------ 5 files changed, 39 insertions(+), 21 deletions(-) diff --git a/examples/range_example.py b/examples/range_example.py index ac85d23f..6de0e972 100644 --- a/examples/range_example.py +++ b/examples/range_example.py @@ -1,5 +1,3 @@ -from __future__ import print_function - import os.path import sys import time @@ -33,8 +31,10 @@ # per the unit tests net.set(pd.Series(net.node_ids)) -s = net.aggregate(10000, type='count') -connected_nodes = s[s==477] +#s = net.aggregate(10000, type='count') +#connected_nodes = s[s==477] -print(net.nodes_in_range(1, 500)) +print(net.nodes_in_range(53114882, 5.0)) +print(net.node_idx.values) +print(net.node_idx.index.values) diff --git a/pandana/network.py b/pandana/network.py index 0e3ce7c2..42881b0d 100644 --- a/pandana/network.py +++ b/pandana/network.py @@ -389,11 +389,10 @@ def precompute(self, distance): def nodes_in_range(self, node, radius, imp_name=None): """ """ - # NEED TO MAP TO INTERNAL NODE INDEX - imp_num = self._imp_name_to_num(imp_name) + ext_node_ids = self.node_idx.index.values - return self.net.nodes_in_range(node, radius, imp_num) + return self.net.nodes_in_range(node, radius, imp_num, ext_node_ids) def _imp_name_to_num(self, imp_name): if imp_name is None: diff --git a/src/accessibility.cpp b/src/accessibility.cpp index cdb2ee54..99b2891b 100644 --- a/src/accessibility.cpp +++ b/src/accessibility.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include "graphalg.h" @@ -81,22 +82,38 @@ Accessibility::precomputeRangeQueries(float radius) { } -vector> -Accessibility::Range(int srcnode, float radius, int graphno) { +vector> +Accessibility::Range(int srcnode, float radius, int graphno, vector ext_node_ids) { + + // Set up a mapping between the external node ids and internal ones + std::unordered_map int_ids(ext_node_ids.size()); + for (int i = 0; i < ext_node_ids.size(); i++) { + int_ids.insert(pair(ext_node_ids[i], i)); + } + DistanceVec tmp; DistanceVec &distances = tmp; // use cached results if available if (dmsradius > 0 && radius <= dmsradius) { - distances = dms[graphno][srcnode]; + distances = dms[graphno][int_ids[srcnode]]; } else { ga[graphno]->Range( - srcnode, + int_ids[srcnode], radius, omp_get_thread_num(), tmp); } - return vector> (tmp.begin(), tmp.end()); + + // todo: check that results are returned from cache correctly + // todo: check that performing an aggregation creates cache + + // Convert back to external node ids + vector> output(distances.size()); + for (int i = 0; i < distances.size(); i++) { + output[i] = std::make_pair(ext_node_ids[distances[i].first], distances[i].second); + } + return output; } diff --git a/src/accessibility.h b/src/accessibility.h index a306872a..c5d346fa 100644 --- a/src/accessibility.h +++ b/src/accessibility.h @@ -46,14 +46,15 @@ class Accessibility { int graphno = 0); // get nodes with the range - vector> Range(int srcnode, float radius, int graphno = 0); + vector> Range(int srcnode, float radius, int graphno, + vector ext_node_ids); // shortest path between two points vector Route(int src, int tgt, int graphno = 0); // shortest path between list of origins and destinations vector> Routes(vector sources, vector targets, - int graphno = 0); + int graphno = 0); // shortest path distance between two points double Distance(int src, int tgt, int graphno = 0); diff --git a/src/cyaccess.pyx b/src/cyaccess.pyx index ee29e87b..94f4223d 100644 --- a/src/cyaccess.pyx +++ b/src/cyaccess.pyx @@ -29,7 +29,7 @@ cdef extern from "accessibility.h" namespace "MTC::accessibility": vector[vector[int]] Routes(vector[long], vector[long], int) double Distance(int, int, int) vector[double] Distances(vector[long], vector[long], int) - vector[pair[int, float]] Range(int, float, int) + vector[pair[long, float]] Range(int, float, int, vector[long]) void precomputeRangeQueries(double) @@ -194,11 +194,12 @@ cdef class cyaccess: impno - impedance id """ return self.access.Distances(srcnodes, destnodes, impno) - - def nodes_in_range(self, int srcnode, float radius, int impno=0): - """ - """ - return self.access.Range(srcnode, radius, impno) def precompute_range(self, double radius): self.access.precomputeRangeQueries(radius) + + def nodes_in_range(self, int srcnode, float radius, int impno, + np.ndarray[long] ext_node_ids): + """ + """ + return self.access.Range(srcnode, radius, impno, ext_node_ids) From 1d4687556e8456c905b4956ebf3df4aa413d79e1 Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Thu, 7 Jan 2021 20:40:27 -0800 Subject: [PATCH 05/13] Multiple source nodes --- examples/range_example.py | 2 +- pandana/network.py | 6 +++--- src/accessibility.cpp | 43 +++++++++++++++++++++++---------------- src/accessibility.h | 6 +++--- src/cyaccess.pyx | 8 ++++---- 5 files changed, 36 insertions(+), 29 deletions(-) diff --git a/examples/range_example.py b/examples/range_example.py index 6de0e972..b558bf5f 100644 --- a/examples/range_example.py +++ b/examples/range_example.py @@ -34,7 +34,7 @@ #s = net.aggregate(10000, type='count') #connected_nodes = s[s==477] -print(net.nodes_in_range(53114882, 5.0)) +print(net.nodes_in_range([53114882, 53107159], 5.0)) print(net.node_idx.values) print(net.node_idx.index.values) diff --git a/pandana/network.py b/pandana/network.py index 42881b0d..0b90ebaf 100644 --- a/pandana/network.py +++ b/pandana/network.py @@ -386,13 +386,13 @@ def precompute(self, distance): """ self.net.precompute_range(distance) - def nodes_in_range(self, node, radius, imp_name=None): + def nodes_in_range(self, nodes, radius, imp_name=None): """ """ imp_num = self._imp_name_to_num(imp_name) - ext_node_ids = self.node_idx.index.values + ext_ids = self.node_idx.index.values - return self.net.nodes_in_range(node, radius, imp_num, ext_node_ids) + return self.net.nodes_in_range(nodes, radius, imp_num, ext_ids) def _imp_name_to_num(self, imp_name): if imp_name is None: diff --git a/src/accessibility.cpp b/src/accessibility.cpp index 99b2891b..5aa86b5a 100644 --- a/src/accessibility.cpp +++ b/src/accessibility.cpp @@ -82,36 +82,43 @@ Accessibility::precomputeRangeQueries(float radius) { } -vector> -Accessibility::Range(int srcnode, float radius, int graphno, vector ext_node_ids) { +vector>> +Accessibility::Range(vector srcnodes, float radius, int graphno, + vector ext_ids) { // Set up a mapping between the external node ids and internal ones - std::unordered_map int_ids(ext_node_ids.size()); - for (int i = 0; i < ext_node_ids.size(); i++) { - int_ids.insert(pair(ext_node_ids[i], i)); + std::unordered_map int_ids(ext_ids.size()); + for (int i = 0; i < ext_ids.size(); i++) { + int_ids.insert(pair(ext_ids[i], i)); } - - DistanceVec tmp; - DistanceVec &distances = tmp; // use cached results if available + vector dists(srcnodes.size()); if (dmsradius > 0 && radius <= dmsradius) { - distances = dms[graphno][int_ids[srcnode]]; - } else { - ga[graphno]->Range( - int_ids[srcnode], - radius, - omp_get_thread_num(), - tmp); + for (int i = 0; i < srcnodes.size(); i++) { + dists[i] = dms[graphno][int_ids[srcnodes[i]]]; + } + } + else { + #pragma omp parallel + #pragma omp for schedule(guided) + for (int i = 0; i < srcnodes.size(); i++) { + ga[graphno]->Range(int_ids[srcnodes[i]], radius, + omp_get_thread_num(), dists[i]); + } } // todo: check that results are returned from cache correctly // todo: check that performing an aggregation creates cache // Convert back to external node ids - vector> output(distances.size()); - for (int i = 0; i < distances.size(); i++) { - output[i] = std::make_pair(ext_node_ids[distances[i].first], distances[i].second); + vector>> output(dists.size()); + for (int i = 0; i < dists.size(); i++) { + output[i].resize(dists[i].size()); + for (int j = 0; j < dists[i].size(); j++) { + output[i][j] = std::make_pair(ext_ids[dists[i][j].first], + dists[i][j].second); + } } return output; } diff --git a/src/accessibility.h b/src/accessibility.h index c5d346fa..c5e0ad71 100644 --- a/src/accessibility.h +++ b/src/accessibility.h @@ -45,9 +45,9 @@ class Accessibility { string decay, int graphno = 0); - // get nodes with the range - vector> Range(int srcnode, float radius, int graphno, - vector ext_node_ids); + // get nodes with a range for a specific list of source nodes + vector>> Range(vector srcnodes, float radius, + int graphno, vector ext_ids); // shortest path between two points vector Route(int src, int tgt, int graphno = 0); diff --git a/src/cyaccess.pyx b/src/cyaccess.pyx index 94f4223d..1b7ede19 100644 --- a/src/cyaccess.pyx +++ b/src/cyaccess.pyx @@ -29,7 +29,7 @@ cdef extern from "accessibility.h" namespace "MTC::accessibility": vector[vector[int]] Routes(vector[long], vector[long], int) double Distance(int, int, int) vector[double] Distances(vector[long], vector[long], int) - vector[pair[long, float]] Range(int, float, int, vector[long]) + vector[vector[pair[long, float]]] Range(vector[long], float, int, vector[long]) void precomputeRangeQueries(double) @@ -198,8 +198,8 @@ cdef class cyaccess: def precompute_range(self, double radius): self.access.precomputeRangeQueries(radius) - def nodes_in_range(self, int srcnode, float radius, int impno, - np.ndarray[long] ext_node_ids): + def nodes_in_range(self, vector[long] srcnodes, float radius, int impno, + np.ndarray[long] ext_ids): """ """ - return self.access.Range(srcnode, radius, impno, ext_node_ids) + return self.access.Range(srcnodes, radius, impno, ext_ids) From 0b57562aa2d4cf5123070e8093e5f913a85e9534 Mon Sep 17 00:00:00 2001 From: Sam Maurer Date: Mon, 8 Feb 2021 17:03:36 -0800 Subject: [PATCH 06/13] Range example --- examples/range_example.py | 53 +++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/examples/range_example.py b/examples/range_example.py index b558bf5f..4110562e 100644 --- a/examples/range_example.py +++ b/examples/range_example.py @@ -1,40 +1,39 @@ -import os.path import sys import time +import pandana + import numpy as np import pandas as pd -import pandana.network as pdna - -if len(sys.argv) > 1: - # allow test file to be passed as an argument - storef = sys.argv[1] -else: - # if no argument provided look for it in the test data - storef = os.path.normpath(os.path.join( - os.path.dirname(os.path.abspath(__file__)), - '../pandana/tests/osm_sample.h5')) - -if not os.path.isfile(storef): - raise IOError('Could not find test input file: {!r}'.format(storef)) +from pympler.asizeof import asizeof -print('Building network from file: {!r}'.format(storef)) - -store = pd.HDFStore(storef, "r") +print() +print("Loading data...") +t0 = time.time() +store = pd.HDFStore('examples/data/bayareanetwork.h5', 'r') nodes, edges = store.nodes, store.edges -net = pdna.Network(nodes.x, nodes.y, edges["from"], edges.to, - edges[["weight"]]) +print(round(time.time()-t0, 1), ' sec.') + +print() +print("Initializing network...") +t0 = time.time() +net = pandana.Network(nodes.x, nodes.y, edges.from_int, edges.to_int, edges[['weight']]) store.close() +print(round(time.time()-t0, 1), ' sec.') + print() +print("Calculating nodes in 100m range...") +t0 = time.time() +r = net.nodes_in_range([53114882, 53107159], 100.0) +print(round(time.time()-t0, 1), ' sec.') -# Demonstrate "nodes in range" code - the largest connected subgraph here has 477 nodes, -# per the unit tests +# print(net.node_idx.values) +# print(net.node_idx.index.values) -net.set(pd.Series(net.node_ids)) -#s = net.aggregate(10000, type='count') -#connected_nodes = s[s==477] +print(asizeof(r)) # 88.8 million bytes raw -print(net.nodes_in_range([53114882, 53107159], 5.0)) +print() -print(net.node_idx.values) -print(net.node_idx.index.values) +# dataframe.info() +# dataframe.memory_usage(deep=True) +# .set_index(['1','2'], inplace=True) \ No newline at end of file From 2603c2ed99a719665ddf824a1758d6a05973aac1 Mon Sep 17 00:00:00 2001 From: ljwolf Date: Fri, 9 Jul 2021 21:39:38 +0100 Subject: [PATCH 07/13] add docstrings --- pandana/network.py | 230 ++++++++++++++++++++++++++++----------------- src/cyaccess.pyx | 4 + 2 files changed, 148 insertions(+), 86 deletions(-) diff --git a/pandana/network.py b/pandana/network.py index 0b90ebaf..eb16f257 100644 --- a/pandana/network.py +++ b/pandana/network.py @@ -24,7 +24,7 @@ def reserve_num_graphs(num): warnings.warn( "Function reserve_num_graphs() is no longer needed in Pandana 0.4+\ and will be removed in a future version", - DeprecationWarning + DeprecationWarning, ) return None @@ -65,11 +65,9 @@ class Network: """ - def __init__(self, node_x, node_y, edge_from, edge_to, edge_weights, - twoway=True): - nodes_df = pd.DataFrame({'x': node_x, 'y': node_y}) - edges_df = pd.DataFrame({'from': edge_from, 'to': edge_to}).\ - join(edge_weights) + def __init__(self, node_x, node_y, edge_from, edge_to, edge_weights, twoway=True): + nodes_df = pd.DataFrame({"x": node_x, "y": node_y}) + edges_df = pd.DataFrame({"from": edge_from, "to": edge_to}).join(edge_weights) self.nodes_df = nodes_df self.edges_df = edges_df @@ -84,19 +82,22 @@ def __init__(self, node_x, node_y, edge_from, edge_to, edge_weights, # in the c extension are actually indexes ordered from 0 to numnodes-1 # node IDs are thus translated back and forth in the python layer, # which allows non-integer node IDs as well - self.node_idx = pd.Series(np.arange(len(nodes_df), dtype="int"), - index=nodes_df.index) + self.node_idx = pd.Series( + np.arange(len(nodes_df), dtype="int"), index=nodes_df.index + ) - edges = pd.concat([self._node_indexes(edges_df["from"]), - self._node_indexes(edges_df["to"])], axis=1) + edges = pd.concat( + [self._node_indexes(edges_df["from"]), self._node_indexes(edges_df["to"])], + axis=1, + ) - self.net = cyaccess(self.node_idx.values, - nodes_df.astype('double').values, - edges.values, - edges_df[edge_weights.columns].transpose() - .astype('double') - .values, - twoway) + self.net = cyaccess( + self.node_idx.values, + nodes_df.astype("double").values, + edges.values, + edges_df[edge_weights.columns].transpose().astype("double").values, + twoway, + ) self._twoway = twoway @@ -137,11 +138,13 @@ def save_hdf5(self, filename, rm_nodes=None): def _node_indexes(self, node_ids): # for some reason, merge is must faster than .loc - df = pd.merge(pd.DataFrame({"node_ids": node_ids}), - pd.DataFrame({"node_idx": self.node_idx}), - left_on="node_ids", - right_index=True, - how="left") + df = pd.merge( + pd.DataFrame({"node_ids": node_ids}), + pd.DataFrame({"node_idx": self.node_idx}), + left_on="node_ids", + right_index=True, + how="left", + ) return df.node_idx @property @@ -164,8 +167,12 @@ def bbox(self): """ The bounding box for nodes in this network [xmin, ymin, xmax, ymax] """ - return [self.nodes_df.x.min(), self.nodes_df.y.min(), - self.nodes_df.x.max(), self.nodes_df.y.max()] + return [ + self.nodes_df.x.min(), + self.nodes_df.y.min(), + self.nodes_df.x.max(), + self.nodes_df.y.max(), + ] def shortest_path(self, node_a, node_b, imp_name=None): """ @@ -224,8 +231,11 @@ def shortest_paths(self, nodes_a, nodes_b, imp_name=None): """ if len(nodes_a) != len(nodes_b): - raise ValueError("Origin and destination counts don't match: {}, {}" - .format(len(nodes_a), len(nodes_b))) + raise ValueError( + "Origin and destination counts don't match: {}, {}".format( + len(nodes_a), len(nodes_b) + ) + ) # map to internal node indexes nodes_a_idx = self._node_indexes(pd.Series(nodes_a)).values @@ -298,8 +308,11 @@ def shortest_path_lengths(self, nodes_a, nodes_b, imp_name=None): """ if len(nodes_a) != len(nodes_b): - raise ValueError("Origin and destination counts don't match: {}, {}" - .format(len(nodes_a), len(nodes_b))) + raise ValueError( + "Origin and destination counts don't match: {}, {}".format( + len(nodes_a), len(nodes_b) + ) + ) # map to internal node indexes nodes_a_idx = self._node_indexes(pd.Series(nodes_a)).values @@ -350,22 +363,23 @@ def set(self, node_ids, variable=None, name="tmp"): if variable is None: variable = pd.Series(np.ones(len(node_ids)), index=node_ids.index) - df = pd.DataFrame({name: variable, - "node_idx": self._node_indexes(node_ids)}) + df = pd.DataFrame({name: variable, "node_idx": self._node_indexes(node_ids)}) length = len(df) df = df.dropna(how="any") newl = len(df) - if length-newl > 0: + if length - newl > 0: print( - "Removed %d rows because they contain missing values" % - (length-newl)) + "Removed %d rows because they contain missing values" % (length - newl) + ) self.variable_names.add(name) - self.net.initialize_access_var(name.encode('utf-8'), - df.node_idx.values.astype('int'), - df[name].values.astype('double')) + self.net.initialize_access_var( + name.encode("utf-8"), + df.node_idx.values.astype("int"), + df[name].values.astype("double"), + ) def precompute(self, distance): """ @@ -388,6 +402,31 @@ def precompute(self, distance): def nodes_in_range(self, nodes, radius, imp_name=None): """ + Computes the range queries (the reachable nodes within this maximum + distance) for each input node. + + Parameters + ---------- + nodes : list-like of ints + Source node IDs + radius : float + Maximum distance to use. This will usually be a distance unit in + meters however if you have customized the impedance (using the + imp_name option) this could be in other units such as utility or + time etc. + imp_name : string, optional + The impedance name to use for the aggregation on this network. + Must be one of the impedance names passed in the constructor of + this object. If not specified, there must be only one impedance + passed in the constructor, which will be used. + + Returns + ------- + d : pandas.DataFrame + Like nearest_pois, this is a dataframe containing the input node + index, the index of the nearby nodes within the search radius, + and the distance (according to the requested impedance) from the + source to the nearby node. """ imp_num = self._imp_name_to_num(imp_name) ext_ids = self.node_idx.index.values @@ -396,17 +435,20 @@ def nodes_in_range(self, nodes, radius, imp_name=None): def _imp_name_to_num(self, imp_name): if imp_name is None: - assert len(self.impedance_names) == 1,\ - "must pass impedance name if there are multiple impedances set" + assert ( + len(self.impedance_names) == 1 + ), "must pass impedance name if there are multiple impedances set" imp_name = self.impedance_names[0] - assert imp_name in self.impedance_names, "An impedance with that name" \ - "was not found" + assert imp_name in self.impedance_names, ( + "An impedance with that name" "was not found" + ) return self.impedance_names.index(imp_name) - def aggregate(self, distance, type="sum", decay="linear", imp_name=None, - name="tmp"): + def aggregate( + self, distance, type="sum", decay="linear", imp_name=None, name="tmp" + ): """ Aggregate information for every source node in the network - this is really the main purpose of this library. This allows you to touch @@ -465,23 +507,26 @@ def aggregate(self, distance, type="sum", decay="linear", imp_name=None, type = type.lower() # Resolve aliases - if type in ['ave', 'avg', 'average']: - type = 'mean' + if type in ["ave", "avg", "average"]: + type = "mean" - if type in ['stddev']: - type = 'std' + if type in ["stddev"]: + type = "std" - if type in ['med']: - type = 'median' + if type in ["med"]: + type = "median" - assert name in self.variable_names, "A variable with that name " \ - "has not yet been initialized" + assert name in self.variable_names, ( + "A variable with that name " "has not yet been initialized" + ) - res = self.net.get_all_aggregate_accessibility_variables(distance, - name.encode('utf-8'), - type.encode('utf-8'), - decay.encode('utf-8'), - imp_num) + res = self.net.get_all_aggregate_accessibility_variables( + distance, + name.encode("utf-8"), + type.encode("utf-8"), + decay.encode("utf-8"), + imp_num, + ) return pd.Series(res, index=self.node_ids) @@ -516,7 +561,7 @@ def get_node_ids(self, x_col, y_col, mapping_distance=None): If the mapping is imperfect, this function returns all the input x, y's that were successfully mapped to node_ids. """ - xys = pd.DataFrame({'x': x_col, 'y': y_col}) + xys = pd.DataFrame({"x": x_col, "y": y_col}) distances, indexes = self.kdtree.query(xys.values) indexes = np.transpose(indexes)[0] @@ -524,16 +569,22 @@ def get_node_ids(self, x_col, y_col, mapping_distance=None): node_ids = self.nodes_df.iloc[indexes].index - df = pd.DataFrame({"node_id": node_ids, "distance": distances}, - index=xys.index) + df = pd.DataFrame({"node_id": node_ids, "distance": distances}, index=xys.index) if mapping_distance is not None: df = df[df.distance <= mapping_distance] return df.node_id - def plot(self, data, bbox=None, plot_type='scatter', fig_kwargs=None, - plot_kwargs=None, cbar_kwargs=None): + def plot( + self, + data, + bbox=None, + plot_type="scatter", + fig_kwargs=None, + plot_kwargs=None, + cbar_kwargs=None, + ): """ Plot an array of data on a map using Matplotlib, automatically matching the data to the Pandana network node positions. Keyword arguments are @@ -578,8 +629,8 @@ def plot(self, data, bbox=None, plot_type='scatter', fig_kwargs=None, except (ModuleNotFoundError, RuntimeError): raise ModuleNotFoundError("Pandana's network.plot() requires Matplotlib") - fig_kwargs = fig_kwargs or {'figsize': (10, 8)} - plot_kwargs = plot_kwargs or {'cmap': 'hot_r', 's': 1} + fig_kwargs = fig_kwargs or {"figsize": (10, 8)} + plot_kwargs = plot_kwargs or {"cmap": "hot_r", "s": 1} cbar_kwargs = cbar_kwargs or {} if not bbox: @@ -587,18 +638,17 @@ def plot(self, data, bbox=None, plot_type='scatter', fig_kwargs=None, self.nodes_df.y.min(), self.nodes_df.x.min(), self.nodes_df.y.max(), - self.nodes_df.x.max()) + self.nodes_df.x.max(), + ) fig, ax = plt.subplots(**fig_kwargs) x, y = (self.nodes_df.x.values, self.nodes_df.y.values) - if plot_type == 'scatter': - plot = plt.scatter( - x, y, c=data.values, **plot_kwargs) - elif plot_type == 'hexbin': - plot = plt.hexbin( - x, y, C=data.values, **plot_kwargs) + if plot_type == "scatter": + plot = plt.scatter(x, y, c=data.values, **plot_kwargs) + elif plot_type == "hexbin": + plot = plt.hexbin(x, y, C=data.values, **plot_kwargs) colorbar = plt.colorbar(plot, **cbar_kwargs) @@ -631,11 +681,13 @@ def init_pois(self, num_categories, max_dist, max_pois): warnings.warn( "Method init_pois() is no longer needed in Pandana 0.4+ and will be removed in a \ future version; maxdist and maxitems should now be passed to set_pois()", - DeprecationWarning + DeprecationWarning, ) return None - def set_pois(self, category=None, maxdist=None, maxitems=None, x_col=None, y_col=None): + def set_pois( + self, category=None, maxdist=None, maxitems=None, x_col=None, y_col=None + ): """ Set the location of all the points of interest (POIs) of this category. The POIs are connected to the closest node in the Pandana network @@ -664,7 +716,7 @@ def set_pois(self, category=None, maxdist=None, maxitems=None, x_col=None, y_col """ # condition to check if missing arguments for keyword arguments using set_pois() from v0.3 if maxitems is None: - print('Reading parameters from init_pois()') + print("Reading parameters from init_pois()") maxitems = self.max_pois # condition to check for positional arguments in set_pois() from v0.3 @@ -673,7 +725,7 @@ def set_pois(self, category=None, maxdist=None, maxitems=None, x_col=None, y_col maxitems = self.max_pois if maxdist is None: - print('Reading parameters from init_pois()') + print("Reading parameters from init_pois()") maxdist = self.max_dist elif isinstance(maxdist, type(pd.Series())): @@ -691,10 +743,19 @@ def set_pois(self, category=None, maxdist=None, maxitems=None, x_col=None, y_col node_idx = self._node_indexes(node_ids) - self.net.initialize_category(maxdist, maxitems, category.encode('utf-8'), node_idx.values) + self.net.initialize_category( + maxdist, maxitems, category.encode("utf-8"), node_idx.values + ) - def nearest_pois(self, distance, category, num_pois=1, max_distance=None, - imp_name=None, include_poi_ids=False): + def nearest_pois( + self, + distance, + category, + num_pois=1, + max_distance=None, + imp_name=None, + include_poi_ids=False, + ): """ Find the distance to the nearest points of interest (POI)s from each source node. The bigger values in this case mean less accessibility. @@ -753,18 +814,16 @@ def nearest_pois(self, distance, category, num_pois=1, max_distance=None, imp_num = self._imp_name_to_num(imp_name) dists, poi_ids = self.net.find_all_nearest_pois( - distance, - num_pois, - category.encode('utf-8'), - imp_num) + distance, num_pois, category.encode("utf-8"), imp_num + ) dists[dists == -1] = max_distance df = pd.DataFrame(dists, index=self.node_ids) - df.columns = list(range(1, num_pois+1)) + df.columns = list(range(1, num_pois + 1)) if include_poi_ids: df2 = pd.DataFrame(poi_ids, index=self.node_ids) - df2.columns = ["poi%d" % i for i in range(1, num_pois+1)] + df2.columns = ["poi%d" % i for i in range(1, num_pois + 1)] for col in df2.columns: # if this is still all working according to plan at this point # the great magic trick is now to turn the integer position of @@ -773,7 +832,7 @@ def nearest_pois(self, distance, category, num_pois=1, max_distance=None, # initialized as a pandas.Series - this really is pandas-like # thinking. it's complicated on the inside, but quite # intuitive to the user I think - s = df2[col].astype('int') + s = df2[col].astype("int") df2[col] = self.poi_category_indexes[category].values[s] df2.loc[s == -1, col] = np.nan @@ -810,10 +869,9 @@ def low_connectivity_nodes(self, impedance, count, imp_name=None): """ # set a counter variable on all nodes - self.set(self.node_ids.to_series(), name='counter') + self.set(self.node_ids.to_series(), name="counter") # count nodes within impedance range - agg = self.aggregate( - impedance, type='count', imp_name=imp_name, name='counter') + agg = self.aggregate(impedance, type="count", imp_name=imp_name, name="counter") return np.array(agg[agg < count].index) diff --git a/src/cyaccess.pyx b/src/cyaccess.pyx index 1b7ede19..af637647 100644 --- a/src/cyaccess.pyx +++ b/src/cyaccess.pyx @@ -201,5 +201,9 @@ cdef class cyaccess: def nodes_in_range(self, vector[long] srcnodes, float radius, int impno, np.ndarray[long] ext_ids): """ + srcnodes - node ids of origins + radius - maximum range in which to search for nearby nodes + impno - the impedance id to use + ext_ids - all node ids in the network """ return self.access.Range(srcnodes, radius, impno, ext_ids) From d39f7eee0db79d789779fc83c2974e5414a47738 Mon Sep 17 00:00:00 2001 From: ljwolf Date: Fri, 9 Jul 2021 21:46:35 +0100 Subject: [PATCH 08/13] remove 2.7 and add 3.9 --- setup.py | 161 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 89 insertions(+), 72 deletions(-) diff --git a/setup.py b/setup.py index 866669f4..7d0b4104 100644 --- a/setup.py +++ b/setup.py @@ -13,8 +13,9 @@ ## Invoking tests ############################################### + class PyTest(TestCommand): - user_options = [('pytest-args=', 'a', "Arguments to pass to py.test")] + user_options = [("pytest-args=", "a", "Arguments to pass to py.test")] def initialize_options(self): TestCommand.initialize_options(self) @@ -28,21 +29,26 @@ def finalize_options(self): def run_tests(self): # import here, cause outside the eggs aren't loaded import pytest - errno = pytest.main(self.pytest_args or ['']) + + errno = pytest.main(self.pytest_args or [""]) sys.exit(errno) class Lint(TestCommand): def run(self): - os.system("cpplint --filter=-build/include_subdir,-legal/copyright,-runtime/references,-runtime/int src/accessibility.* src/graphalg.*") + os.system( + "cpplint --filter=-build/include_subdir,-legal/copyright,-runtime/references,-runtime/int src/accessibility.* src/graphalg.*" + ) os.system("pycodestyle src/cyaccess.pyx") os.system("pycodestyle pandana") class CustomBuildExtCommand(build_ext): """build_ext command for use when numpy headers are needed.""" + def run(self): import numpy as np + self.include_dirs.append(np.get_include()) build_ext.run(self) @@ -51,120 +57,131 @@ def run(self): ## Building the C++ extension ############################################### -extra_compile_args = ['-w', '-std=c++11', '-O3'] +extra_compile_args = ["-w", "-std=c++11", "-O3"] extra_link_args = [] # Mac compilation: flags are for the llvm compilers included with recent # versions of Xcode Command Line Tools, or newer versions installed separately -if sys.platform.startswith('darwin'): # Mac - - extra_compile_args += ['-stdlib=libc++'] - extra_link_args += ['-stdlib=libc++'] - +if sys.platform.startswith("darwin"): # Mac + + extra_compile_args += ["-stdlib=libc++"] + extra_link_args += ["-stdlib=libc++"] + # The default compiler that ships with Macs doesn't support OpenMP multi- # threading. We recommend using the Conda toolchain instead, but will also # try to detect if people are using another alternative like Homebrew. - if 'CC' in os.environ: - extra_compile_args += ['-fopenmp'] - print('Attempting Pandana compilation with OpenMP multi-threading ' - 'support, with user-specified compiler:\n{}'.format( - os.environ['CC'])) + if "CC" in os.environ: + extra_compile_args += ["-fopenmp"] + print( + "Attempting Pandana compilation with OpenMP multi-threading " + "support, with user-specified compiler:\n{}".format(os.environ["CC"]) + ) # Otherwise, if the default clang has been replaced but nothing specified # in the 'CC' environment variable, assume they've followed our instructions # for using the Conda toolchain. - - elif os.popen('which clang').read().strip() != '/usr/bin/clang': - cc = 'clang' - cc_catalina = 'clang --sysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk' - - extra_compile_args += ['-fopenmp'] - print('Attempting Pandana compilation with OpenMP multi-threading ' - 'support, with the following compiler:\n{}'.format( - os.popen('which clang').read())) - - if '10.15' in os.popen('sw_vers').read(): - os.environ['CC'] = cc_catalina - elif '11.' in os.popen('sw_vers').read(): - os.environ['CC'] = cc_catalina + + elif os.popen("which clang").read().strip() != "/usr/bin/clang": + cc = "clang" + cc_catalina = ( + "clang --sysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk" + ) + + extra_compile_args += ["-fopenmp"] + print( + "Attempting Pandana compilation with OpenMP multi-threading " + "support, with the following compiler:\n{}".format( + os.popen("which clang").read() + ) + ) + + if "10.15" in os.popen("sw_vers").read(): + os.environ["CC"] = cc_catalina + elif "11." in os.popen("sw_vers").read(): + os.environ["CC"] = cc_catalina else: - os.environ['CC'] = cc + os.environ["CC"] = cc else: - print('Attempting Pandana compilation without support for ' - 'multi-threading. See installation instructions for alternative ' - 'options') + print( + "Attempting Pandana compilation without support for " + "multi-threading. See installation instructions for alternative " + "options" + ) # Window compilation: flags are for Visual C++ -elif sys.platform.startswith('win'): # Windows - extra_compile_args = ['/w', '/openmp'] +elif sys.platform.startswith("win"): # Windows + extra_compile_args = ["/w", "/openmp"] # Linux compilation: flags are for gcc 4.8 and later else: # Linux - extra_compile_args += ['-fopenmp'] - extra_link_args += ['-lgomp'] + extra_compile_args += ["-fopenmp"] + extra_link_args += ["-lgomp"] cyaccess = Extension( - name='pandana.cyaccess', - sources=[ - 'src/accessibility.cpp', - 'src/graphalg.cpp', - 'src/cyaccess.pyx', - 'src/contraction_hierarchies/src/libch.cpp'], - language='c++', - include_dirs=['.'], - extra_compile_args=extra_compile_args, - extra_link_args=extra_link_args) + name="pandana.cyaccess", + sources=[ + "src/accessibility.cpp", + "src/graphalg.cpp", + "src/cyaccess.pyx", + "src/contraction_hierarchies/src/libch.cpp", + ], + language="c++", + include_dirs=["."], + extra_compile_args=extra_compile_args, + extra_link_args=extra_link_args, +) ############################################### ## Standard setup ############################################### -version = '0.7.dev0' +version = "0.7.dev0" packages = find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]) setup( packages=packages, - name='pandana', - author='UrbanSim Inc.', + name="pandana", + author="UrbanSim Inc.", version=version, - license='AGPL', - description=('Python library for network analysis'), + license="AGPL", + description=("Python library for network analysis"), long_description=( - 'Pandana is a Python library for network analysis that uses ' - 'contraction hierarchies to calculate super-fast travel ' - 'accessibility metrics and shortest paths. The numerical ' - 'code is in C++.'), - url='https://udst.github.io/pandana/', + "Pandana is a Python library for network analysis that uses " + "contraction hierarchies to calculate super-fast travel " + "accessibility metrics and shortest paths. The numerical " + "code is in C++." + ), + url="https://udst.github.io/pandana/", ext_modules=[cyaccess], install_requires=[ - 'cython >=0.25.2', - 'numpy >=1.8', - 'pandas >=0.17', - 'requests >=2.0', - 'scikit-learn >=0.18', + "cython >=0.25.2", + "numpy >=1.8", + "pandas >=0.17", + "requests >=2.0", + "scikit-learn >=0.18", 'tables >=3.1, <3.6; python_version <"3.6"', - 'tables >=3.1, <3.7; python_version >="3.6"' + 'tables >=3.1, <3.7; python_version >="3.6"', ], cmdclass={ - 'test': PyTest, - 'lint': Lint, - 'build_ext': CustomBuildExtCommand, + "test": PyTest, + "lint": Lint, + "build_ext": CustomBuildExtCommand, }, classifiers=[ - 'Development Status :: 4 - Beta', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'License :: OSI Approved :: GNU Affero General Public License v3' + "Development Status :: 4 - Beta", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "License :: OSI Approved :: GNU Affero General Public License v3", ], ) From 47b99fa141d104faccbbb015ccce33a4c7719f44 Mon Sep 17 00:00:00 2001 From: ljwolf Date: Fri, 9 Jul 2021 21:54:25 +0100 Subject: [PATCH 09/13] cast output to indexed dataframe --- pandana/network.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandana/network.py b/pandana/network.py index eb16f257..77287e34 100644 --- a/pandana/network.py +++ b/pandana/network.py @@ -362,7 +362,6 @@ def set(self, node_ids, variable=None, name="tmp"): """ if variable is None: variable = pd.Series(np.ones(len(node_ids)), index=node_ids.index) - df = pd.DataFrame({name: variable, "node_idx": self._node_indexes(node_ids)}) length = len(df) @@ -429,9 +428,16 @@ def nodes_in_range(self, nodes, radius, imp_name=None): source to the nearby node. """ imp_num = self._imp_name_to_num(imp_name) + imp_name = self.impedance_names[imp_num] ext_ids = self.node_idx.index.values - return self.net.nodes_in_range(nodes, radius, imp_num, ext_ids) + raw_result = self.net.nodes_in_range(nodes, radius, imp_num, ext_ids) + return pd.concat( + [ + pd.DataFrame(r, columns=["destination", imp_name]).assign(source=ix) + for r, ix in zip(raw_result, nodes) + ] + )[["source", "destination", imp_name]] def _imp_name_to_num(self, imp_name): if imp_name is None: From fc372028e9b0821a6f7a2e8846e535de2060f143 Mon Sep 17 00:00:00 2001 From: ljwolf Date: Fri, 9 Jul 2021 22:07:55 +0100 Subject: [PATCH 10/13] drop duplicated node pairs --- pandana/network.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandana/network.py b/pandana/network.py index 77287e34..852c13cf 100644 --- a/pandana/network.py +++ b/pandana/network.py @@ -432,12 +432,13 @@ def nodes_in_range(self, nodes, radius, imp_name=None): ext_ids = self.node_idx.index.values raw_result = self.net.nodes_in_range(nodes, radius, imp_num, ext_ids) - return pd.concat( + clean_result = pd.concat( [ pd.DataFrame(r, columns=["destination", imp_name]).assign(source=ix) for r, ix in zip(raw_result, nodes) ] )[["source", "destination", imp_name]] + return clean_result.drop_duplicates(subset=["source", "destination"]) def _imp_name_to_num(self, imp_name): if imp_name is None: From 63f01ff92bc7e7a41073f3aa957a0800189aa525 Mon Sep 17 00:00:00 2001 From: ljwolf Date: Fri, 9 Jul 2021 23:14:18 +0100 Subject: [PATCH 11/13] censor results (cached or otherwise) greater than search radius --- src/accessibility.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/accessibility.cpp b/src/accessibility.cpp index 5aa86b5a..bc54023e 100644 --- a/src/accessibility.cpp +++ b/src/accessibility.cpp @@ -115,9 +115,14 @@ Accessibility::Range(vector srcnodes, float radius, int graphno, vector>> output(dists.size()); for (int i = 0; i < dists.size(); i++) { output[i].resize(dists[i].size()); + //int acc = 0; for (int j = 0; j < dists[i].size(); j++) { - output[i][j] = std::make_pair(ext_ids[dists[i][j].first], - dists[i][j].second); + if (dists[i][j].second <= radius){ + output[i][j] = std::make_pair(ext_ids[dists[i][j].first], + dists[i][j].second); + // acc += 1; + } + //output[i].resize(acc+1); } } return output; From 7aee012fb7df36053bb6a3605e27c8049bdda8a9 Mon Sep 17 00:00:00 2001 From: ljwolf Date: Fri, 9 Jul 2021 23:33:23 +0100 Subject: [PATCH 12/13] move radius filter logic to pandas postprocessing --- pandana/network.py | 6 +++++- src/accessibility.cpp | 9 ++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/pandana/network.py b/pandana/network.py index 852c13cf..ccdf7c16 100644 --- a/pandana/network.py +++ b/pandana/network.py @@ -438,7 +438,11 @@ def nodes_in_range(self, nodes, radius, imp_name=None): for r, ix in zip(raw_result, nodes) ] )[["source", "destination", imp_name]] - return clean_result.drop_duplicates(subset=["source", "destination"]) + return ( + clean_result.drop_duplicates(subset=["source", "destination"]) + .reset_index(drop=True) + .query("{} <= {}".format(imp_name, radius)) + ) def _imp_name_to_num(self, imp_name): if imp_name is None: diff --git a/src/accessibility.cpp b/src/accessibility.cpp index bc54023e..5aa86b5a 100644 --- a/src/accessibility.cpp +++ b/src/accessibility.cpp @@ -115,14 +115,9 @@ Accessibility::Range(vector srcnodes, float radius, int graphno, vector>> output(dists.size()); for (int i = 0; i < dists.size(); i++) { output[i].resize(dists[i].size()); - //int acc = 0; for (int j = 0; j < dists[i].size(); j++) { - if (dists[i][j].second <= radius){ - output[i][j] = std::make_pair(ext_ids[dists[i][j].first], - dists[i][j].second); - // acc += 1; - } - //output[i].resize(acc+1); + output[i][j] = std::make_pair(ext_ids[dists[i][j].first], + dists[i][j].second); } } return output; From 5760c15e6fef4c7bd093ca693f7d66b49814447a Mon Sep 17 00:00:00 2001 From: ljwolf Date: Fri, 9 Jul 2021 23:34:11 +0100 Subject: [PATCH 13/13] add test and blacken --- pandana/tests/test_pandana.py | 138 +++++++++++++++++++++------------- 1 file changed, 87 insertions(+), 51 deletions(-) diff --git a/pandana/tests/test_pandana.py b/pandana/tests/test_pandana.py index 1d64eb09..4e1593e1 100644 --- a/pandana/tests/test_pandana.py +++ b/pandana/tests/test_pandana.py @@ -12,17 +12,16 @@ @pytest.fixture(scope="module") def sample_osm(request): - store = pd.HDFStore( - os.path.join(os.path.dirname(__file__), 'osm_sample.h5'), "r") + store = pd.HDFStore(os.path.join(os.path.dirname(__file__), "osm_sample.h5"), "r") nodes, edges = store.nodes, store.edges - net = pdna.Network(nodes.x, nodes.y, edges["from"], edges.to, - edges[["weight"]]) + net = pdna.Network(nodes.x, nodes.y, edges["from"], edges.to, edges[["weight"]]) net.precompute(2000) def fin(): store.close() + request.addfinalizer(fin) return net @@ -31,16 +30,15 @@ def fin(): # initialize a second network @pytest.fixture(scope="module") def second_sample_osm(request): - store = pd.HDFStore( - os.path.join(os.path.dirname(__file__), 'osm_sample.h5'), "r") + store = pd.HDFStore(os.path.join(os.path.dirname(__file__), "osm_sample.h5"), "r") nodes, edges = store.nodes, store.edges - net = pdna.Network(nodes.x, nodes.y, edges["from"], edges.to, - edges[["weight"]]) + net = pdna.Network(nodes.x, nodes.y, edges["from"], edges.to, edges[["weight"]]) net.precompute(2000) def fin(): store.close() + request.addfinalizer(fin) return net @@ -91,48 +89,47 @@ def test_agg_variables_accuracy(sample_osm): assert s.iloc[0] == 50 s = net.aggregate(100000, type="AVE").loc[connected_nodes] - assert s.describe()['std'] < .01 # assert almost equal + assert s.describe()["std"] < 0.01 # assert almost equal assert_allclose(s.mean(), r.mean(), atol=1e-3) s = net.aggregate(100000, type="mean").loc[connected_nodes] - assert s.describe()['std'] < .01 # assert almost equal + assert s.describe()["std"] < 0.01 # assert almost equal assert_allclose(s.mean(), r.mean(), atol=1e-3) s = net.aggregate(100000, type="min").loc[connected_nodes] - assert s.describe()['std'] < .01 # assert almost equal + assert s.describe()["std"] < 0.01 # assert almost equal assert_allclose(s.mean(), r.min(), atol=1e-3) s = net.aggregate(100000, type="max").loc[connected_nodes] - assert s.describe()['std'] < .01 # assert almost equal + assert s.describe()["std"] < 0.01 # assert almost equal assert_allclose(s.mean(), r.max(), atol=1e-3) r.sort_values(inplace=True) s = net.aggregate(100000, type="median").loc[connected_nodes] - assert s.describe()['std'] < .01 # assert almost equal + assert s.describe()["std"] < 0.01 # assert almost equal assert_allclose(s.mean(), r.iloc[25], atol=1e-2) s = net.aggregate(100000, type="25pct").loc[connected_nodes] - assert s.describe()['std'] < .01 # assert almost equal + assert s.describe()["std"] < 0.01 # assert almost equal assert_allclose(s.mean(), r.iloc[12], atol=1e-2) s = net.aggregate(100000, type="75pct").loc[connected_nodes] - assert s.describe()['std'] < .01 # assert almost equal + assert s.describe()["std"] < 0.01 # assert almost equal assert_allclose(s.mean(), r.iloc[37], atol=1e-2) s = net.aggregate(100000, type="SUM").loc[connected_nodes] - assert s.describe()['std'] < .05 # assert almost equal + assert s.describe()["std"] < 0.05 # assert almost equal assert_allclose(s.mean(), r.sum(), atol=1e-2) s = net.aggregate(100000, type="std").loc[connected_nodes] - assert s.describe()['std'] < .01 # assert almost equal + assert s.describe()["std"] < 0.01 # assert almost equal assert_allclose(s.mean(), r.std(), atol=1e-2) def test_non_integer_nodeids(request): - store = pd.HDFStore( - os.path.join(os.path.dirname(__file__), 'osm_sample.h5'), "r") + store = pd.HDFStore(os.path.join(os.path.dirname(__file__), "osm_sample.h5"), "r") nodes, edges = store.nodes, store.edges # convert to string! @@ -140,11 +137,11 @@ def test_non_integer_nodeids(request): edges["from"] = edges["from"].astype("str") edges["to"] = edges["to"].astype("str") - net = pdna.Network(nodes.x, nodes.y, edges["from"], edges.to, - edges[["weight"]]) + net = pdna.Network(nodes.x, nodes.y, edges["from"], edges.to, edges[["weight"]]) def fin(): store.close() + request.addfinalizer(fin) # test accuracy compared to Pandas functions @@ -162,16 +159,15 @@ def test_agg_variables(sample_osm): net = sample_osm ssize = 50 - net.set(random_node_ids(sample_osm, ssize), - variable=random_data(ssize)) + net.set(random_node_ids(sample_osm, ssize), variable=random_data(ssize)) for type in net.aggregations: for decay in net.decays: for distance in [5, 10, 20]: - t = type.decode(encoding='UTF-8') - d = decay.decode(encoding='UTF-8') + t = type.decode(encoding="UTF-8") + d = decay.decode(encoding="UTF-8") s = net.aggregate(distance, type=t, decay=d) - assert s.describe()['std'] > 0 + assert s.describe()["std"] > 0 # testing w/o setting variable ssize = 50 @@ -180,30 +176,32 @@ def test_agg_variables(sample_osm): for type in net.aggregations: for decay in net.decays: for distance in [5, 10, 20]: - t = type.decode(encoding='UTF-8') - d = decay.decode(encoding='UTF-8') + t = type.decode(encoding="UTF-8") + d = decay.decode(encoding="UTF-8") s = net.aggregate(distance, type=t, decay=d) if t != "std": - assert s.describe()['std'] > 0 + assert s.describe()["std"] > 0 else: # no variance in data - assert s.describe()['std'] == 0 + assert s.describe()["std"] == 0 def test_non_float_node_values(sample_osm): net = sample_osm ssize = 50 - net.set(random_node_ids(sample_osm, ssize), - variable=(random_data(ssize)*100).astype('int')) + net.set( + random_node_ids(sample_osm, ssize), + variable=(random_data(ssize) * 100).astype("int"), + ) for type in net.aggregations: for decay in net.decays: for distance in [5, 10, 20]: - t = type.decode(encoding='UTF-8') - d = decay.decode(encoding='UTF-8') + t = type.decode(encoding="UTF-8") + d = decay.decode(encoding="UTF-8") s = net.aggregate(distance, type=t, decay=d) - assert s.describe()['std'] > 0 + assert s.describe()["std"] > 0 def test_missing_nodeid(sample_osm): @@ -238,13 +236,12 @@ def test_named_variable(sample_osm): net = sample_osm ssize = 50 - net.set(random_node_ids(sample_osm, ssize), - variable=random_data(ssize), name="foo") + net.set(random_node_ids(sample_osm, ssize), variable=random_data(ssize), name="foo") net.aggregate(500, type="sum", decay="linear", name="foo") -''' +""" def test_plot(sample_osm): net = sample_osm @@ -255,7 +252,7 @@ def test_plot(sample_osm): s = net.aggregate(500, type="sum", decay="linear") sample_osm.plot(s) -''' +""" def test_shortest_path(sample_osm): @@ -274,8 +271,8 @@ def test_shortest_paths(sample_osm): vec_paths = sample_osm.shortest_paths(nodes[0:50], nodes[50:100]) for i in range(50): - path = sample_osm.shortest_path(nodes[i], nodes[i+50]) - assert(np.array_equal(vec_paths[i], path)) + path = sample_osm.shortest_path(nodes[i], nodes[i + 50]) + assert np.array_equal(vec_paths[i], path) # check mismatched OD lists try: @@ -331,13 +328,12 @@ def test_pois(sample_osm): net = sample_osm x, y = random_x_y(sample_osm, 100) - x.index = ['lab%d' % i for i in range(len(x))] + x.index = ["lab%d" % i for i in range(len(x))] y.index = x.index net.set_pois("restaurants", 2000, 10, x, y) - d = net.nearest_pois(2000, "restaurants", num_pois=10, - include_poi_ids=True) + d = net.nearest_pois(2000, "restaurants", num_pois=10, include_poi_ids=True) def test_pois2(second_sample_osm): @@ -384,6 +380,7 @@ def test_pois_pandana3_pos_args(second_sample_osm): net2.nearest_pois(2000, "restaurants", num_pois=10) + # test items are sorted @@ -407,12 +404,14 @@ def test_repeat_pois(sample_osm): net = sample_osm def get_nearest_nodes(x, y, x2=None, y2=None, n=2): - coords_dict = [{'x': x, 'y': y, 'var': 1} for i in range(2)] + coords_dict = [{"x": x, "y": y, "var": 1} for i in range(2)] if x2 and y2: - coords_dict.append({'x': x2, 'y': y2, 'var': 1}) + coords_dict.append({"x": x2, "y": y2, "var": 1}) df = pd.DataFrame(coords_dict) - sample_osm.set_pois("restaurants", 2000, 10, df['x'], df['y']) - res = sample_osm.nearest_pois(2000, "restaurants", num_pois=5, include_poi_ids=True) + sample_osm.set_pois("restaurants", 2000, 10, df["x"], df["y"]) + res = sample_osm.nearest_pois( + 2000, "restaurants", num_pois=5, include_poi_ids=True + ) return res # these are the min-max values of the network @@ -426,8 +425,45 @@ def get_nearest_nodes(x, y, x2=None, y2=None, n=2): assert test1.equals(test3) test4 = get_nearest_nodes(-122.31, 47.60, -122.32, 47.61, n=3) - assert_allclose(test4.loc[53114882], [7, 13, 13, 2000, 2000, 2, 0, 1, np.nan, np.nan]) - assert_allclose(test4.loc[53114880], [6, 14, 14, 2000, 2000, 2, 0, 1, np.nan, np.nan]) + assert_allclose( + test4.loc[53114882], [7, 13, 13, 2000, 2000, 2, 0, 1, np.nan, np.nan] + ) + assert_allclose( + test4.loc[53114880], [6, 14, 14, 2000, 2000, 2, 0, 1, np.nan, np.nan] + ) assert_allclose( test4.loc[53227769], - [2000, 2000, 2000, 2000, 2000, np.nan, np.nan, np.nan, np.nan, np.nan]) + [2000, 2000, 2000, 2000, 2000, np.nan, np.nan, np.nan, np.nan, np.nan], + ) + + +def test_nodes_in_range(sample_osm): + net = sample_osm + + np.random.seed(0) + ssize = 10 + x, y = random_x_y(net, 10) + snaps = net.get_node_ids(x, y) + + test1 = net.nodes_in_range(snaps, 1) + net.precompute(10) + test5 = net.nodes_in_range(snaps, 5) + test11 = net.nodes_in_range(snaps, 11) + assert test1.weight.max() == 1 + assert test5.weight.max() == 5 + assert test11.weight.max() == 11 + + focus_id = snaps[0] + all_distances = net.shortest_path_lengths( + [focus_id] * len(net.node_ids), net.node_ids + ) + all_distances = np.asarray(all_distances) + assert (all_distances <= 1).sum() == len( + test1.query("source == {}".format(focus_id)) + ) + assert (all_distances <= 5).sum() == len( + test5.query("source == {}".format(focus_id)) + ) + assert (all_distances <= 11).sum() == len( + test11.query("source == {}".format(focus_id)) + )