From cfa32a87f5425578ba886540ad081391c797cdc7 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Wed, 19 Jun 2024 14:53:59 +0000 Subject: [PATCH 01/19] Reorganize sample_data folder to accomodate multiple use cases --- .gitignore | 2 +- sample_data/ingest_sample_data.py | 17 +-- sample_data/ingest_sample_data_output.txt | 112 ------------------ .../{ => use_cases/boston_floods}/charts.json | 0 .../{ => use_cases/boston_floods}/cities.json | 0 .../boston_floods}/contexts.json | 22 ---- .../boston_floods}/datasets.json | 0 uvdat/core/management/commands/populate.py | 13 +- uvdat/core/tests/test_populate.py | 1 + 9 files changed, 22 insertions(+), 145 deletions(-) delete mode 100644 sample_data/ingest_sample_data_output.txt rename sample_data/{ => use_cases/boston_floods}/charts.json (100%) rename sample_data/{ => use_cases/boston_floods}/cities.json (100%) rename sample_data/{ => use_cases/boston_floods}/contexts.json (55%) rename sample_data/{ => use_cases/boston_floods}/datasets.json (100%) diff --git a/.gitignore b/.gitignore index 0c317854..f086f1ab 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ minio/* rabbitmq/* .vscode/ staticfiles/ -sample_data/*/* +sample_data/downloads/* # osmnx data cache folder cache diff --git a/sample_data/ingest_sample_data.py b/sample_data/ingest_sample_data.py index 064b4170..fa77a8ff 100644 --- a/sample_data/ingest_sample_data.py +++ b/sample_data/ingest_sample_data.py @@ -10,13 +10,16 @@ from uvdat.core.models import Chart, Context, Dataset, FileItem +USE_CASE_FOLDER = Path('sample_data/use_cases') + + def ingest_file(file_info, index=0, dataset=None, chart=None): file_path = file_info.get('path') file_name = file_info.get('name', file_path.split('/')[-1]) file_url = file_info.get('url') file_metadata = file_info.get('metadata', {}) - file_location = Path('sample_data', file_path) + file_location = Path('sample_data/downloads', file_path) file_type = file_path.split('.')[-1] if not file_location.exists(): print(f'\t Downloading data file {file_name}.') @@ -46,9 +49,9 @@ def ingest_file(file_info, index=0, dataset=None, chart=None): new_file_item.file.save(file_path, ContentFile(f.read())) -def ingest_contexts(): +def ingest_contexts(use_case): print('Creating Context objects...') - with open('sample_data/contexts.json') as contexts_json: + with open(str(USE_CASE_FOLDER / use_case / 'contexts.json')) as contexts_json: data = json.load(contexts_json) for context in data: print('\t- ', context['name']) @@ -66,9 +69,9 @@ def ingest_contexts(): context_for_setting.datasets.set(Dataset.objects.filter(name__in=context['datasets'])) -def ingest_charts(): +def ingest_charts(use_case): print('Creating Chart objects...') - with open('sample_data/charts.json') as charts_json: + with open(str(USE_CASE_FOLDER / use_case / 'charts.json')) as charts_json: data = json.load(charts_json) for chart in data: print('\t- ', chart['name']) @@ -100,9 +103,9 @@ def ingest_charts(): ) -def ingest_datasets(include_large=False, dataset_indexes=None): +def ingest_datasets(use_case, include_large=False, dataset_indexes=None): print('Creating Dataset objects...') - with open('sample_data/datasets.json') as datasets_json: + with open(str(USE_CASE_FOLDER / use_case / 'datasets.json')) as datasets_json: data = json.load(datasets_json) for index, dataset in enumerate(data): if dataset_indexes is None or index in dataset_indexes: diff --git a/sample_data/ingest_sample_data_output.txt b/sample_data/ingest_sample_data_output.txt deleted file mode 100644 index e510a6d5..00000000 --- a/sample_data/ingest_sample_data_output.txt +++ /dev/null @@ -1,112 +0,0 @@ -root@70f88cf67dbf:/opt/django-project# python manage.py populate --include_large ------------------------------------- -Populating server with sample data... -Creating Context objects... - - Boston Transportation - Context Boston Transportation created. - - DC Transportation - Context DC Transportation created. - - Boston-Washington Transportation - Context Boston-Washington Transportation created. -Creating Chart objects... - - Boston Harbor Daily Tide Levels - Chart Boston Harbor Daily Tide Levels created. - FileItem tide_level_data.csv created. - Converting data for Boston Harbor Daily Tide Levels... - Saved converted data for chart Boston Harbor Daily Tide Levels. -Creating Dataset objects... - - MBTA Rapid Transit - Dataset MBTA Rapid Transit created. - FileItem mbta_rapid_transit.zip created. - Converting data for MBTA Rapid Transit... - VectorMapLayer 1 created. - 31 vector tiles created. - 158 nodes and 164 edges created. - - MBTA Commuter Rail - Dataset MBTA Commuter Rail created. - FileItem commuter_rail.zip created. - Converting data for MBTA Commuter Rail... - VectorMapLayer 2 created. - 686 vector tiles created. - 268 nodes and 269 edges created. - - Boston Hurricane Surge Inundation Zones - Dataset Boston Hurricane Surge Inundation Zones created. - FileItem hurr_inun.zip created. - Converting data for Boston Hurricane Surge Inundation Zones... - VectorMapLayer 3 created. - 295 vector tiles created. - - Boston FEMA National Flood Hazard - Dataset Boston FEMA National Flood Hazard created. - FileItem flood_hazard_fema.zip created. - Converting data for Boston FEMA National Flood Hazard... - VectorMapLayer 4 created. - 587 vector tiles created. - - Massachusetts Elevation Data - Dataset Massachusetts Elevation Data created. - FileItem easternmass.tif created. - Converting data for Massachusetts Elevation Data... - RasterMapLayer 1 created. - - Boston Neighborhoods - Dataset Boston Neighborhoods created. - FileItem neighborhoods2020.json created. - Converting data for Boston Neighborhoods... - VectorMapLayer 5 created. - 26 vector tiles created. - 24 regions created. - - Boston Census 2020 Block Groups - Dataset Boston Census 2020 Block Groups created. - FileItem blockgroups.zip created. - Converting data for Boston Census 2020 Block Groups... - VectorMapLayer 6 created. - 26 vector tiles created. - 581 regions created. - - Boston Zip Codes - Dataset Boston Zip Codes created. - FileItem zipcodes.zip created. - Converting data for Boston Zip Codes... - VectorMapLayer 7 created. - 780 vector tiles created. - 539 regions created. - - Boston Sea Level Rises - Dataset Boston Sea Level Rises created. - FileItem 9in_rise.geojson created. - FileItem 21in_rise.geojson created. - FileItem 36in_rise.geojson created. - Converting data for Boston Sea Level Rises... - VectorMapLayer 8 created. - 26 vector tiles created. - VectorMapLayer 9 created. - 26 vector tiles created. - VectorMapLayer 10 created. - 26 vector tiles created. - - Boston 10-Year Flood Events - Dataset Boston 10-Year Flood Events created. - FileItem 9in_10yr_flood.geojson created. - FileItem 21in_10yr_flood.geojson created. - FileItem 36in_10yr_flood.geojson created. - Converting data for Boston 10-Year Flood Events... - VectorMapLayer 11 created. - 26 vector tiles created. - VectorMapLayer 12 created. - 26 vector tiles created. - VectorMapLayer 13 created. - 26 vector tiles created. - - Boston 100-Year Flood Events - Dataset Boston 100-Year Flood Events created. - FileItem 9in_100yr_flood.geojson created. - FileItem 21in_100yr_flood.geojson created. - FileItem 36in_100yr_flood.geojson created. - Converting data for Boston 100-Year Flood Events... - VectorMapLayer 14 created. - 26 vector tiles created. - VectorMapLayer 15 created. - 26 vector tiles created. - VectorMapLayer 16 created. - 26 vector tiles created. - - DC Metro - Dataset DC Metro created. - FileItem DC_Metro.zip created. - Converting data for DC Metro... - VectorMapLayer 17 created. - 56 vector tiles created. - 98 nodes and 134 edges created. diff --git a/sample_data/charts.json b/sample_data/use_cases/boston_floods/charts.json similarity index 100% rename from sample_data/charts.json rename to sample_data/use_cases/boston_floods/charts.json diff --git a/sample_data/cities.json b/sample_data/use_cases/boston_floods/cities.json similarity index 100% rename from sample_data/cities.json rename to sample_data/use_cases/boston_floods/cities.json diff --git a/sample_data/contexts.json b/sample_data/use_cases/boston_floods/contexts.json similarity index 55% rename from sample_data/contexts.json rename to sample_data/use_cases/boston_floods/contexts.json index 0f6428b2..409a7911 100644 --- a/sample_data/contexts.json +++ b/sample_data/use_cases/boston_floods/contexts.json @@ -30,27 +30,5 @@ "datasets": [ "DC Metro" ] - }, - { - "name": "Boston-Washington Transportation", - "default_map_center": [ - 40.5, - -74.5 - ], - "default_map_zoom": 8, - "datasets": [ - "MBTA Rapid Transit", - "MBTA Commuter Rail", - "Massachusetts Elevation Data", - "Boston Hurricane Surge Inundation Zones", - "Bsoton FEMA National Flood Hazard", - "Boston Neighborhoods", - "Boston Census 2020 Block Groups", - "Boston Zip Codes", - "Boston Sea Level Rises", - "Boston 10-Year Flood Events", - "Boston 100-Year Flood Events", - "DC Metro" - ] } ] diff --git a/sample_data/datasets.json b/sample_data/use_cases/boston_floods/datasets.json similarity index 100% rename from sample_data/datasets.json rename to sample_data/use_cases/boston_floods/datasets.json diff --git a/uvdat/core/management/commands/populate.py b/uvdat/core/management/commands/populate.py index 8c8abffa..ff4f07cd 100644 --- a/uvdat/core/management/commands/populate.py +++ b/uvdat/core/management/commands/populate.py @@ -7,6 +7,11 @@ class Command(BaseCommand): requires_migrations_checks = True def add_arguments(self, parser): + parser.add_argument( + 'use_case', + choices=['boston_floods', 'new_york_energy'], + help='Sample data collection to load', + ) parser.add_argument( '--include_large', action='store_true', @@ -15,15 +20,17 @@ def add_arguments(self, parser): parser.add_argument('--dataset_indexes', nargs='*', type=int) def handle(self, *args, **kwargs): - print('Populating server with sample data...') + use_case = kwargs['use_case'] include_large = kwargs['include_large'] dataset_indexes = kwargs['dataset_indexes'] if dataset_indexes is None or len(dataset_indexes) == 0: dataset_indexes = None + print(f'Populating server with sample data for use case {use_case}...') ingest_datasets( + use_case, include_large=include_large, dataset_indexes=dataset_indexes, ) - ingest_contexts() - ingest_charts() + ingest_contexts(use_case) + ingest_charts(use_case) diff --git a/uvdat/core/tests/test_populate.py b/uvdat/core/tests/test_populate.py index f2bcb7c7..55d79bd0 100644 --- a/uvdat/core/tests/test_populate.py +++ b/uvdat/core/tests/test_populate.py @@ -29,6 +29,7 @@ def test_populate(): call_command( 'populate', + 'boston_floods', include_large=True, dataset_indexes=dataset_indexes, ) From 48e0a2249c8b4331f56d3b3c35a1561c4dbf023d Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Mon, 24 Jun 2024 19:23:04 +0000 Subject: [PATCH 02/19] Add module-based dataset loading --- sample_data/ingest_sample_data.py | 1 + .../use_cases/boston_floods/cities.json | 14 ---- .../use_cases/new_york_energy/charts.json | 1 + .../use_cases/new_york_energy/contexts.json | 19 +++++ .../use_cases/new_york_energy/datasets.json | 72 +++++++++++++++++++ .../use_cases/new_york_energy/nysdp.py | 55 ++++++++++++++ 6 files changed, 148 insertions(+), 14 deletions(-) delete mode 100644 sample_data/use_cases/boston_floods/cities.json create mode 100644 sample_data/use_cases/new_york_energy/charts.json create mode 100644 sample_data/use_cases/new_york_energy/contexts.json create mode 100644 sample_data/use_cases/new_york_energy/datasets.json create mode 100644 sample_data/use_cases/new_york_energy/nysdp.py diff --git a/sample_data/ingest_sample_data.py b/sample_data/ingest_sample_data.py index fa77a8ff..3292b6d4 100644 --- a/sample_data/ingest_sample_data.py +++ b/sample_data/ingest_sample_data.py @@ -1,4 +1,5 @@ from datetime import datetime +import importlib import json import os from pathlib import Path diff --git a/sample_data/use_cases/boston_floods/cities.json b/sample_data/use_cases/boston_floods/cities.json deleted file mode 100644 index b00ea6b4..00000000 --- a/sample_data/use_cases/boston_floods/cities.json +++ /dev/null @@ -1,14 +0,0 @@ -[ - { - "name": "Boston, MA", - "latitude": 42.4, - "longitude": -71.1, - "default_zoom": 11 - }, - { - "name": "Washington, DC", - "latitude": 38.9, - "longitude": -77.1, - "default_zoom": 11 - } -] diff --git a/sample_data/use_cases/new_york_energy/charts.json b/sample_data/use_cases/new_york_energy/charts.json new file mode 100644 index 00000000..fe51488c --- /dev/null +++ b/sample_data/use_cases/new_york_energy/charts.json @@ -0,0 +1 @@ +[] diff --git a/sample_data/use_cases/new_york_energy/contexts.json b/sample_data/use_cases/new_york_energy/contexts.json new file mode 100644 index 00000000..189f92fd --- /dev/null +++ b/sample_data/use_cases/new_york_energy/contexts.json @@ -0,0 +1,19 @@ +[ + { + "name": "NY Energy", + "default_map_center": [ + 43.5, + -75.5 + ], + "default_map_zoom": 8, + "datasets": [ + "National Grid Substations", + "National Grid DistAssetsOverview", + "National Grid Electrification_Data", + "National Grid EV_Load_Serving_Capacity", + "National Grid Hosting_Capacity_Data", + "National Grid LSRV", + "National Grid NY_SubT_SDP" + ] + } +] diff --git a/sample_data/use_cases/new_york_energy/datasets.json b/sample_data/use_cases/new_york_energy/datasets.json new file mode 100644 index 00000000..fc4d61d2 --- /dev/null +++ b/sample_data/use_cases/new_york_energy/datasets.json @@ -0,0 +1,72 @@ +[ + { + "name": "National Grid Substations", + "description": "Substation vector data", + "category": "energy", + "module": "nysdp", + "function": "create_vector_features", + "kwargs": { + "service_name": "Substations" + } + }, + { + "name": "National Grid DistAssetsOverview", + "description": "DistAssetsOverview vector data", + "category": "energy", + "module": "nysdp", + "function": "create_vector_features", + "kwargs": { + "service_name": "DistAssetsOverview" + } + }, + { + "name": "National Grid Electrification_Data", + "description": "Electrification_Data vector data", + "category": "energy", + "module": "nysdp", + "function": "create_vector_features", + "kwargs": { + "service_name": "Electrification_Data" + } + }, + { + "name": "National Grid EV_Load_Serving_Capacity", + "description": "EV_Load_Serving_Capacity vector data", + "category": "energy", + "module": "nysdp", + "function": "create_vector_features", + "kwargs": { + "service_name": "EV_Load_Serving_Capacity" + } + }, + { + "name": "National Grid Hosting_Capacity_Data", + "description": "Hosting_Capacity_Data vector data", + "category": "energy", + "module": "nysdp", + "function": "create_vector_features", + "kwargs": { + "service_name": "Hosting_Capacity_Data" + } + }, + { + "name": "National Grid LSRV", + "description": "LSRV vector data", + "category": "energy", + "module": "nysdp", + "function": "create_vector_features", + "kwargs": { + "service_name": "LSRV" + } + }, + { + "name": "National Grid NY_SubT_SDP", + "description": "NY_SubT_SDP vector data", + "category": "energy", + "module": "nysdp", + "function": "create_vector_features", + "kwargs": { + "service_name": "NY_SubT_SDP" + } + } +] diff --git a/sample_data/use_cases/new_york_energy/nysdp.py b/sample_data/use_cases/new_york_energy/nysdp.py new file mode 100644 index 00000000..2e56a84a --- /dev/null +++ b/sample_data/use_cases/new_york_energy/nysdp.py @@ -0,0 +1,55 @@ +import json +import geopandas +import requests + +from django.contrib.gis.geos import GEOSGeometry +from uvdat.core.models import VectorMapLayer, VectorFeature + + +NYDSP_URL = 'https://systemdataportal.nationalgrid.com/arcgis/rest/services/NYSDP' +RECORDS_PER_PAGE = 100 +SERVICE_SUFFIX = 'MapServer' +FORMAT_SUFFIX = 'f=pjson' +QUERY_CONTENT = f'where=1%3D1&returnGeometry=true&outFields=*&resultRecordCount={RECORDS_PER_PAGE}&f=geojson' + + +def fetch_vector_features(service_name=None): + feature_sets = {} + if service_name is None: + return feature_sets + service_url = f'{NYDSP_URL}/{service_name}/{SERVICE_SUFFIX}' + service_info = requests.get(f'{service_url}?{FORMAT_SUFFIX}').json() + for layer in service_info.get('layers', []): + feature_set = [] + layer_id = layer.get('id') + if layer_id is not None: + feature_page = None + result_offset = 0 + while feature_page is None or len(feature_page) == RECORDS_PER_PAGE: + query_response = requests.get( + f"{service_url}/{layer_id}/query?resultOffset={result_offset}&{QUERY_CONTENT}" + ).json() + feature_page = query_response.get('features', []) + feature_set += feature_page + result_offset += RECORDS_PER_PAGE + if len(feature_set): + feature_sets[layer_id] = feature_set + return feature_sets + + +def create_vector_features(dataset, service_name=None): + VectorMapLayer.objects.filter(dataset=dataset).delete() + + feature_sets = fetch_vector_features(service_name=service_name) + vector_features = [] + for index, feature_set in feature_sets.items(): + map_layer = VectorMapLayer.objects.create(dataset=dataset, index=index) + for feature in feature_set: + vector_features.append( + VectorFeature( + map_layer=map_layer, + geometry=GEOSGeometry(json.dumps(feature['geometry'])), + properties=feature['properties'], + ) + ) + VectorFeature.objects.bulk_create(vector_features) From d4995314b02ff50b30cb633f4dca8dc09a9af0ed Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Mon, 8 Jul 2024 17:38:30 +0000 Subject: [PATCH 03/19] Add a function to consolidate nysdp features --- .../use_cases/new_york_energy/contexts.json | 2 + .../use_cases/new_york_energy/datasets.json | 23 ++++---- .../use_cases/new_york_energy/nysdp.py | 54 ++++++++++++++++++- 3 files changed, 65 insertions(+), 14 deletions(-) diff --git a/sample_data/use_cases/new_york_energy/contexts.json b/sample_data/use_cases/new_york_energy/contexts.json index 189f92fd..9837673e 100644 --- a/sample_data/use_cases/new_york_energy/contexts.json +++ b/sample_data/use_cases/new_york_energy/contexts.json @@ -7,6 +7,8 @@ ], "default_map_zoom": 8, "datasets": [ + "National Grid CompanyBoundary", + "National Grid Network", "National Grid Substations", "National Grid DistAssetsOverview", "National Grid Electrification_Data", diff --git a/sample_data/use_cases/new_york_energy/datasets.json b/sample_data/use_cases/new_york_energy/datasets.json index fc4d61d2..53df0031 100644 --- a/sample_data/use_cases/new_york_energy/datasets.json +++ b/sample_data/use_cases/new_york_energy/datasets.json @@ -1,22 +1,12 @@ [ { - "name": "National Grid Substations", - "description": "Substation vector data", + "name": "National Grid CompanyBoundary", + "description": "CompanyBoundary vector data", "category": "energy", "module": "nysdp", "function": "create_vector_features", "kwargs": { - "service_name": "Substations" - } - }, - { - "name": "National Grid DistAssetsOverview", - "description": "DistAssetsOverview vector data", - "category": "energy", - "module": "nysdp", - "function": "create_vector_features", - "kwargs": { - "service_name": "DistAssetsOverview" + "service_name": "CompanyBoundary" } }, { @@ -68,5 +58,12 @@ "kwargs": { "service_name": "NY_SubT_SDP" } + }, + { + "name": "National Grid Network", + "description": "Consolidated network from several services' combined data", + "category": "energy", + "module": "nysdp", + "function": "create_consolidated_network" } ] diff --git a/sample_data/use_cases/new_york_energy/nysdp.py b/sample_data/use_cases/new_york_energy/nysdp.py index 2e56a84a..b830081e 100644 --- a/sample_data/use_cases/new_york_energy/nysdp.py +++ b/sample_data/use_cases/new_york_energy/nysdp.py @@ -3,7 +3,7 @@ import requests from django.contrib.gis.geos import GEOSGeometry -from uvdat.core.models import VectorMapLayer, VectorFeature +from uvdat.core.models import Dataset, VectorMapLayer, VectorFeature NYDSP_URL = 'https://systemdataportal.nationalgrid.com/arcgis/rest/services/NYSDP' @@ -53,3 +53,55 @@ def create_vector_features(dataset, service_name=None): ) ) VectorFeature.objects.bulk_create(vector_features) + + +def create_consolidated_network(dataset): + VectorMapLayer.objects.filter(dataset=dataset).delete() + map_layer = VectorMapLayer.objects.create(dataset=dataset, index=0) + include_services = [ + "Substations", + "DistAssetsOverview", + "Electrification_Data", + "EV_Load_Serving_Capacity", + "Hosting_Capacity_Data", + "LSRV", + "NY_SubT_SDP" + ] + + features = [] + for service_name in include_services: + try: + dataset = Dataset.objects.get(name=f'National Grid {service_name}') + print('\t', f'Using saved vector features for {service_name}...') + features += [ + dict( + type='Feature', + geometry=json.loads(feature.geometry.json), + properties=feature.properties + ) + for feature in VectorFeature.objects.filter(map_layer__dataset=dataset) + ] + except Dataset.DoesNotExist: + print('\t', f'Querying {service_name}...') + feature_sets = fetch_vector_features(service_name=service_name) + for layer_id, feature_set in feature_sets.items(): + features += feature_set + print('\t', len(features), 'features found. Combining like geometries...') + + # Use geopandas to merge properties of duplicate features + gdf = geopandas.GeoDataFrame.from_features(features) + gdf["geometry"] = gdf.normalize() + + grouped = gdf.groupby(gdf['geometry'].to_wkt()).first() + intersects = grouped.sjoin(grouped, how='left', predicate='intersects') + print('\t', len(grouped), 'consolidated features found. Saving to database...') + + geojson = json.loads(grouped.to_json()) + VectorFeature.objects.bulk_create([ + VectorFeature( + map_layer=map_layer, + geometry=GEOSGeometry(json.dumps(feature['geometry'])), + properties=feature['properties'], + ) + for feature in geojson.get('features') + ]) From 02578334512437121fcec1f27aeff45b00b2041c Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Wed, 10 Jul 2024 23:32:01 +0000 Subject: [PATCH 04/19] wip: interpret network from vector features --- .../use_cases/new_york_energy/nysdp.py | 257 +++++++++++++++--- 1 file changed, 213 insertions(+), 44 deletions(-) diff --git a/sample_data/use_cases/new_york_energy/nysdp.py b/sample_data/use_cases/new_york_energy/nysdp.py index b830081e..8cccaa11 100644 --- a/sample_data/use_cases/new_york_energy/nysdp.py +++ b/sample_data/use_cases/new_york_energy/nysdp.py @@ -1,9 +1,12 @@ import json import geopandas +import numpy +import pandas import requests +import shapely -from django.contrib.gis.geos import GEOSGeometry -from uvdat.core.models import Dataset, VectorMapLayer, VectorFeature +from django.contrib.gis.geos import GEOSGeometry, Point, LineString +from uvdat.core.models import Dataset, Network, NetworkNode, NetworkEdge, VectorMapLayer, VectorFeature NYDSP_URL = 'https://systemdataportal.nationalgrid.com/arcgis/rest/services/NYSDP' @@ -55,53 +58,219 @@ def create_vector_features(dataset, service_name=None): VectorFeature.objects.bulk_create(vector_features) +def vector_features_from_network(network): + VectorMapLayer.objects.filter(dataset=network.dataset).delete() + map_layer = VectorMapLayer.objects.create(dataset=network.dataset) + VectorFeature.objects.bulk_create([ + VectorFeature( + map_layer=map_layer, + geometry=node.location, + properties=node.metadata or {}, + ) + for node in network.nodes.all() + ]) + VectorFeature.objects.bulk_create([ + VectorFeature( + map_layer=map_layer, + geometry=edge.line_geometry, + properties=edge.metadata or {}, + ) + for edge in network.edges.all() + ]) + + def create_consolidated_network(dataset): - VectorMapLayer.objects.filter(dataset=dataset).delete() - map_layer = VectorMapLayer.objects.create(dataset=dataset, index=0) - include_services = [ - "Substations", - "DistAssetsOverview", - "Electrification_Data", - "EV_Load_Serving_Capacity", - "Hosting_Capacity_Data", - "LSRV", - "NY_SubT_SDP" - ] + # VectorMapLayer.objects.filter(dataset=dataset).delete() + # map_layer = VectorMapLayer.objects.create(dataset=dataset, index=0) + # include_services = [ + # "Substations", + # "DistAssetsOverview", + # "Electrification_Data", + # "EV_Load_Serving_Capacity", + # "Hosting_Capacity_Data", + # "LSRV", + # "NY_SubT_SDP" + # ] - features = [] - for service_name in include_services: - try: - dataset = Dataset.objects.get(name=f'National Grid {service_name}') - print('\t', f'Using saved vector features for {service_name}...') - features += [ - dict( - type='Feature', - geometry=json.loads(feature.geometry.json), - properties=feature.properties - ) - for feature in VectorFeature.objects.filter(map_layer__dataset=dataset) - ] - except Dataset.DoesNotExist: - print('\t', f'Querying {service_name}...') - feature_sets = fetch_vector_features(service_name=service_name) - for layer_id, feature_set in feature_sets.items(): - features += feature_set - print('\t', len(features), 'features found. Combining like geometries...') + # features = [] + # for service_name in include_services: + # try: + # d = Dataset.objects.get(name=f'National Grid {service_name}') + # print('\t', f'Using saved vector features for {service_name}...') + # features += [ + # dict( + # type='Feature', + # geometry=json.loads(feature.geometry.json), + # properties=feature.properties + # ) + # for feature in VectorFeature.objects.filter(map_layer__dataset=d) + # ] + # except Dataset.DoesNotExist: + # print('\t', f'Querying {service_name}...') + # feature_sets = fetch_vector_features(service_name=service_name) + # for layer_id, feature_set in feature_sets.items(): + # features += feature_set + # print('\t', len(features), 'features found. Combining like geometries...') + # print('features found:', len(features)) # Use geopandas to merge properties of duplicate features + # gdf = geopandas.GeoDataFrame.from_features(features) + # gdf["geometry"] = gdf.normalize() + + # grouped = gdf.groupby(gdf['geometry'].to_wkt()).first() + # print('\t', len(grouped), 'consolidated features found. Saving to database...') + + # TODO: color by properties + + # geojson = json.loads(grouped.to_json()) + # VectorFeature.objects.bulk_create([ + # VectorFeature( + # map_layer=map_layer, + # geometry=GEOSGeometry(json.dumps(feature['geometry'])), + # properties=feature['properties'], + # ) + # for feature in geojson.get('features') + # ], batch_size=10000) + + from django.contrib.gis.geos import Polygon + + + test_dataset, created = Dataset.objects.get_or_create( + name="Test Network", + category="energy", + dataset_type="VECTOR" + ) + Network.objects.filter(dataset=test_dataset).delete() + network = Network.objects.create(dataset=test_dataset) + + test_bbox = dict(xmin=-74, ymin=42.8, xmax=-73.9, ymax=42.9) + test_area = Polygon.from_bbox( + [test_bbox['xmin'], test_bbox['ymin'], test_bbox['xmax'], test_bbox['ymax']] + ) + features = [ + dict( + type='Feature', + geometry=json.loads(feature.geometry.json), + properties=feature.properties + ) + for feature in VectorFeature.objects.filter( + map_layer__dataset=dataset, + geometry__coveredby=test_area, + ) + ] + show_columns = [ + 'geometry', + ] + pandas.set_option('display.max_columns', None) + print('test features:', len(features)) gdf = geopandas.GeoDataFrame.from_features(features) gdf["geometry"] = gdf.normalize() + spatial_index = gdf.sindex - grouped = gdf.groupby(gdf['geometry'].to_wkt()).first() - intersects = grouped.sjoin(grouped, how='left', predicate='intersects') - print('\t', len(grouped), 'consolidated features found. Saving to database...') + # construct adjacencies list + adjacencies = {} + for feat_index, feature in gdf.iterrows(): + adjacencies[feat_index] = {} + geom = feature['geometry'] + if geom.geom_type == 'MultiLineString': + # attempt to merge contiguous portions + geom = shapely.ops.linemerge(geom) + geom_list = [geom] + if geom.geom_type == 'MultiLineString': + # if still multi, contains non-contiguous portions + geom_list = geom.geoms - geojson = json.loads(grouped.to_json()) - VectorFeature.objects.bulk_create([ - VectorFeature( - map_layer=map_layer, - geometry=GEOSGeometry(json.dumps(feature['geometry'])), - properties=feature['properties'], - ) - for feature in geojson.get('features') - ]) + for geom_index, geom in enumerate(geom_list): + adjacencies[feat_index][geom_index] = {} + # first and last points (endpoints of current geom) + # assumes no intersections will occur in the middle of a line + for coord_index, coord_name in [(0, 'start'), (-1, 'end')]: + coord = shapely.geometry.Point(geom.coords[coord_index]) + nearest_indexes, distances = spatial_index.nearest(coord, max_distance=10, return_distance=True) + geom_inds, tree_inds = nearest_indexes + # exclude self from nearest results + tree_inds = [int(i) for i in tree_inds if i != feat_index] + node = None + if len(tree_inds) != 1: + node = NetworkNode.objects.create( + name=f'{feat_index}_{geom_index}_{coord_name}', + network=network, + location=Point(*geom.coords[coord_index]) + ) + adjacencies[feat_index][geom_index][coord_name] = dict( + node=node, + adjs=tree_inds, + ) + + visited = [] + def follow_adjacencies(feat_index, current_line=None, current_from_node=None): + if feat_index not in visited: + visited.append(feat_index) + feat = gdf.iloc[feat_index] + for geom_index, geom_record in adjacencies[feat_index].items(): + start_record = geom_record.get('start', {}) + start_node = start_record.get('node') + start_adjs = start_record.get('adjs') + + end_record = geom_record.get('end', {}) + end_node = end_record.get('node') + end_adjs = end_record.get('adjs') + coords = [] + if feat['geometry'].geom_type == 'MultiLineString': + for geom in feat['geometry'].geoms: + coords += geom.coords + else: + coords = feat['geometry'].coords + + if start_node and end_node: + # print('edge', start_node, end_node, 'len', len(coords)) + NetworkEdge.objects.create( + name='edge', + network=network, + from_node=start_node, + to_node=end_node, + metadata={}, # TODO: figure out capturing metadata + line_geometry=LineString(*[ + Point(*p) for p in coords + ]) + ) + + if current_line is None: + current_line = list(coords) + else: + current_line += list(coords) + + follow_start_results = None + follow_end_results = None + if len(start_adjs) > 0: + for adj in start_adjs: + follow_start_results = follow_adjacencies( + adj, current_line=current_line, current_from_node=current_from_node or end_node + ) + if len(end_adjs) > 0: + for adj in end_adjs: + follow_end_results = follow_adjacencies( + adj, current_line=current_line, current_from_node=current_from_node or start_node + ) + + start_line, start_from_node, end_line, end_from_node = None, None, None, None + if follow_start_results: + start_line, start_from_node = follow_start_results + if follow_end_results: + end_line, end_from_node = follow_end_results + if current_from_node is None: + if start_line is not None and end_line is not None: + print(feat_index, 'combine lines', len(start_line), '+', len(end_lines), 'between', start_from_node, end_from_node) + if start_line is not None: + print(feat_index, 'start line!', start_from_node, end_from_node, current_from_node, start_node, end_node) + if end_line is not None: + print(feat_index, 'end line!', start_from_node, end_from_node, current_from_node, start_node, end_node) + + return current_line, current_from_node + + for feat_index in list(adjacencies.keys())[:5]: + follow_adjacencies(feat_index) + print('total nodes', NetworkNode.objects.filter(network=network).count()) + print('total edges', NetworkEdge.objects.filter(network=network).count()) + + vector_features_from_network(network) From 29650e70397b367d066e23e575441330b31368f5 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Mon, 29 Jul 2024 13:39:10 +0000 Subject: [PATCH 05/19] fix: Improve speed and accuracy of network interpretation algorithm --- .../use_cases/new_york_energy/contexts.json | 7 +- .../use_cases/new_york_energy/datasets.json | 68 ++-- .../new_york_energy/interpret_network.py | 198 +++++++++++ .../use_cases/new_york_energy/nysdp.py | 333 +++++++----------- 4 files changed, 355 insertions(+), 251 deletions(-) create mode 100644 sample_data/use_cases/new_york_energy/interpret_network.py diff --git a/sample_data/use_cases/new_york_energy/contexts.json b/sample_data/use_cases/new_york_energy/contexts.json index 9837673e..23d4550f 100644 --- a/sample_data/use_cases/new_york_energy/contexts.json +++ b/sample_data/use_cases/new_york_energy/contexts.json @@ -10,12 +10,7 @@ "National Grid CompanyBoundary", "National Grid Network", "National Grid Substations", - "National Grid DistAssetsOverview", - "National Grid Electrification_Data", - "National Grid EV_Load_Serving_Capacity", - "National Grid Hosting_Capacity_Data", - "National Grid LSRV", - "National Grid NY_SubT_SDP" + "County Boundaries" ] } ] diff --git a/sample_data/use_cases/new_york_energy/datasets.json b/sample_data/use_cases/new_york_energy/datasets.json index 53df0031..141c1b0f 100644 --- a/sample_data/use_cases/new_york_energy/datasets.json +++ b/sample_data/use_cases/new_york_energy/datasets.json @@ -2,7 +2,7 @@ { "name": "National Grid CompanyBoundary", "description": "CompanyBoundary vector data", - "category": "energy", + "category": "region", "module": "nysdp", "function": "create_vector_features", "kwargs": { @@ -10,53 +10,34 @@ } }, { - "name": "National Grid Electrification_Data", - "description": "Electrification_Data vector data", - "category": "energy", - "module": "nysdp", - "function": "create_vector_features", - "kwargs": { - "service_name": "Electrification_Data" - } - }, - { - "name": "National Grid EV_Load_Serving_Capacity", - "description": "EV_Load_Serving_Capacity vector data", - "category": "energy", - "module": "nysdp", - "function": "create_vector_features", - "kwargs": { - "service_name": "EV_Load_Serving_Capacity" - } - }, - { - "name": "National Grid Hosting_Capacity_Data", - "description": "Hosting_Capacity_Data vector data", - "category": "energy", - "module": "nysdp", - "function": "create_vector_features", - "kwargs": { - "service_name": "Hosting_Capacity_Data" - } - }, - { - "name": "National Grid LSRV", - "description": "LSRV vector data", - "category": "energy", - "module": "nysdp", - "function": "create_vector_features", - "kwargs": { - "service_name": "LSRV" + "name": "County Boundaries", + "description": "From https://gis.ny.gov/civil-boundaries", + "category": "region", + "type": "vector", + "files": [ + { + "url": "https://data.kitware.com/api/v1/item/66a2a19a5d2551c516b1e502/download", + "path": "nyc/counties.zip" + } + ], + "region_options": { + "name_property": "NAME" + }, + "style_options": { + "outline": "white", + "palette": [ + "grey" + ] } }, { - "name": "National Grid NY_SubT_SDP", - "description": "NY_SubT_SDP vector data", + "name": "National Grid Substations", + "description": "Substations vector data", "category": "energy", "module": "nysdp", "function": "create_vector_features", "kwargs": { - "service_name": "NY_SubT_SDP" + "service_name": "Substations" } }, { @@ -64,6 +45,9 @@ "description": "Consolidated network from several services' combined data", "category": "energy", "module": "nysdp", - "function": "create_consolidated_network" + "function": "create_consolidated_network", + "kwargs": { + "zones_dataset_name": "County Boundaries" + } } ] diff --git a/sample_data/use_cases/new_york_energy/interpret_network.py b/sample_data/use_cases/new_york_energy/interpret_network.py new file mode 100644 index 00000000..49e75ca9 --- /dev/null +++ b/sample_data/use_cases/new_york_energy/interpret_network.py @@ -0,0 +1,198 @@ +import json +import geopandas +import shapely + +from webcolors import name_to_hex + + +TOLERANCE = 0.0001 + + +def get_properties(feature): + properties = json.loads( + feature.drop(['geometry', 'index'], errors='ignore').fillna('').to_json() + ) + properties.update(dict( + colors=','.join([ + name_to_hex((properties.get('color',) or 'black').replace(' ', '')), + '#ffffff' + ]) + )) + return properties + + +def merge_properties(p1, p2): + properties = {} + if p1 is None: + return p2 + if p2 is None: + return p1 + for k1, v1 in p1.items(): + v2 = p2.get(k1) + if v2 is None or v2 == '': + properties[k1] = v1 + else: + if v1 is None or v1 == '': + properties[k1] = v2 + else: + properties[k1] = ','.join([str(v1), str(v2)]) + # update p2 with merged properties to catch keys not in p1 + return p2.update(properties) + + +def cut_crossed_lines(gdf): + # cut lines at any cross points + separated_features = [] + for feat_index, feature in gdf.iterrows(): + properties = json.loads( + feature.drop(['geometry', 'index'], errors='ignore').fillna('').to_json() + ) + curr_geom = feature.geometry + crosses = gdf[gdf.geometry.crosses(curr_geom)] + separated = [] + if len(crosses) > 0: + split_points = [] + for c_id, cross in crosses.iterrows(): + p = cross.geometry.intersection(curr_geom) + if p.geom_type == 'MultiPoint': + split_points.append(p.geoms[0]) + elif p.geom_type == 'Point': + split_points.append(p) + separated = shapely.ops.split(curr_geom, shapely.MultiPoint(split_points)).geoms + else: + separated = [feature.geometry] + separated_features.extend([ + dict( + type='Feature', + geometry=json.loads(shapely.to_geojson(s)), + properties=properties + ) + for s in separated + ]) + + gdf = geopandas.GeoDataFrame.from_features(separated_features) + return gdf + + +def merge_lines(gdf): + visited = [] + merged_features = [] + for feat_index, feature in gdf.iterrows(): + if feat_index not in visited: + visited.append(feat_index) + properties = json.loads( + feature.drop(['geometry', 'index'], errors='ignore').fillna('').to_json() + ) + curr_geom = feature.geometry + not_visited = gdf[~gdf.index.isin(visited)] + touching = not_visited[not_visited.geometry.touches(curr_geom)] + snapped = touching.snap(curr_geom, TOLERANCE * 2) + merge = shapely.line_merge(shapely.union_all([ + *snapped.geometry, curr_geom + ])) + curr_segment = None + if merge.geom_type == 'MultiLineString': + for segment in merge.geoms: + if segment.contains(curr_geom) and not any( + s.touches(segment) + for s in merge.geoms + if s != segment + ): + curr_segment = segment + elif merge.geom_type == 'LineString': + curr_segment = merge + + if curr_segment is None: + # no valid merge segment, include feature as-is + merged_features.append(dict( + type='Feature', + geometry=json.loads(shapely.to_geojson(curr_geom)), + properties=properties + )) + else: + # valid merge segment, mark constituent features as visited + visited_indexes = list(snapped[snapped.geometry.within(curr_segment)].index) + visited += visited_indexes + visited_features = gdf.loc[visited_indexes] + for v_index, v_feature in visited_features.iterrows(): + properties = merge_properties(properties, json.loads( + v_feature.drop(['geometry', 'index'], errors='ignore').fillna('').to_json() + )) + merged_features.append(dict( + type='Feature', + geometry=json.loads(shapely.to_geojson(curr_segment)), + properties=properties + )) + gdf = geopandas.GeoDataFrame.from_features(merged_features) + return cut_crossed_lines(gdf) + + +def find_nodes(gdf): + nodes = [] + + for feat_index, feature in gdf.iterrows(): + properties = get_properties(feature) + curr_geom = feature.geometry + points = [shapely.Point(*p) for p in curr_geom.coords] + # create nodes at line endpoints + for endpoint in [points[0], points[-1]]: + # touching_lines = gdf[gdf.geometry.snap(endpoint, TOLERANCE).touches(endpoint)] + existing_node_locations = geopandas.GeoSeries([n['location'] for n in nodes]) + if ( + # allow endpoints (1 touching) and intersections (>2 touching) + # (len(touching_lines) == 1 or len(touching_lines) > 2 ) and + # omit duplicates within tolerance radius + not existing_node_locations.dwithin(endpoint, TOLERANCE).any() + ): + nodes.append(dict( + location=endpoint, + metadata=properties + )) + return nodes + + +def find_edges(gdf, nodes): + edges = [] + existing_node_locations = geopandas.GeoSeries([n['location'] for n in nodes]) + for feat_index, feature in gdf.iterrows(): + properties = get_properties(feature) + curr_geom = feature.geometry + points = [shapely.Point(*p) for p in curr_geom.coords] + nearby_nodes = existing_node_locations[existing_node_locations.dwithin(curr_geom, TOLERANCE)] + snapped = shapely.snap(shapely.MultiPoint(list(nearby_nodes.geometry)), curr_geom, TOLERANCE) + separated = shapely.ops.split(shapely.LineString(points), snapped).geoms + existing_edge_geometries = geopandas.GeoSeries([e['line_geometry'] for e in edges]) + for segment in separated: + endpoints = [ + shapely.Point(segment.coords[0]), + shapely.Point(segment.coords[-1]), + ] + from_points = nearby_nodes[nearby_nodes.dwithin(endpoints[0], TOLERANCE)] + to_points = nearby_nodes[nearby_nodes.dwithin(endpoints[1], TOLERANCE)] + if ( + len(from_points) > 0 and + len(to_points) > 0 and + not shapely.snap(existing_edge_geometries, segment, TOLERANCE).covers(segment).any() + ): + edges.append(dict( + line_geometry=segment, + from_point=from_points.iloc[0], + to_point=to_points.iloc[0], + metadata=properties, + )) + return edges + + +def interpret_group(gdf): + print(f'\t\t Reading group with {len(gdf)} features.') + # iteratively merge lines until no more merging can be done + merged_gdf = merge_lines(gdf) + while len(merged_gdf) < len(gdf): + gdf = merged_gdf + merged_gdf = merge_lines(gdf) + print(f'\t\tMerged to {len(gdf)} lines.') + + nodes = find_nodes(gdf) + edges = find_edges(gdf, nodes) + + return nodes, edges diff --git a/sample_data/use_cases/new_york_energy/nysdp.py b/sample_data/use_cases/new_york_energy/nysdp.py index 8cccaa11..d08b22ff 100644 --- a/sample_data/use_cases/new_york_energy/nysdp.py +++ b/sample_data/use_cases/new_york_energy/nysdp.py @@ -1,22 +1,25 @@ import json import geopandas -import numpy -import pandas import requests -import shapely + +from datetime import datetime +from pathlib import Path +from concurrent.futures import ThreadPoolExecutor from django.contrib.gis.geos import GEOSGeometry, Point, LineString -from uvdat.core.models import Dataset, Network, NetworkNode, NetworkEdge, VectorMapLayer, VectorFeature +from uvdat.core.models import Dataset, Network, NetworkNode, NetworkEdge, VectorMapLayer, VectorFeature, SourceRegion +from .interpret_network import interpret_group +DOWNLOAD_PATH = Path(__file__).parent.parent NYDSP_URL = 'https://systemdataportal.nationalgrid.com/arcgis/rest/services/NYSDP' -RECORDS_PER_PAGE = 100 +RECORDS_PER_PAGE = 1000 SERVICE_SUFFIX = 'MapServer' FORMAT_SUFFIX = 'f=pjson' QUERY_CONTENT = f'where=1%3D1&returnGeometry=true&outFields=*&resultRecordCount={RECORDS_PER_PAGE}&f=geojson' -def fetch_vector_features(service_name=None): +def fetch_vector_features(service_name=None, **kwargs): feature_sets = {} if service_name is None: return feature_sets @@ -31,16 +34,20 @@ def fetch_vector_features(service_name=None): while feature_page is None or len(feature_page) == RECORDS_PER_PAGE: query_response = requests.get( f"{service_url}/{layer_id}/query?resultOffset={result_offset}&{QUERY_CONTENT}" - ).json() - feature_page = query_response.get('features', []) - feature_set += feature_page - result_offset += RECORDS_PER_PAGE + ) + try: + query_json = query_response.json() + feature_page = query_json.get('features', []) + feature_set += feature_page + result_offset += RECORDS_PER_PAGE + except Exception as e: + print(f'\t\tFailed to get {service_name} data from NYSDP.') if len(feature_set): feature_sets[layer_id] = feature_set return feature_sets -def create_vector_features(dataset, service_name=None): +def create_vector_features(dataset, service_name=None, **kwargs): VectorMapLayer.objects.filter(dataset=dataset).delete() feature_sets = fetch_vector_features(service_name=service_name) @@ -59,13 +66,12 @@ def create_vector_features(dataset, service_name=None): def vector_features_from_network(network): - VectorMapLayer.objects.filter(dataset=network.dataset).delete() - map_layer = VectorMapLayer.objects.create(dataset=network.dataset) + map_layer, created = VectorMapLayer.objects.get_or_create(dataset=network.dataset, index=0) VectorFeature.objects.bulk_create([ VectorFeature( map_layer=map_layer, geometry=node.location, - properties=node.metadata or {}, + properties=dict(node_id=node.id, **node.metadata), ) for node in network.nodes.all() ]) @@ -73,204 +79,125 @@ def vector_features_from_network(network): VectorFeature( map_layer=map_layer, geometry=edge.line_geometry, - properties=edge.metadata or {}, + properties=dict( + edge_id=edge.id, + from_node_id=edge.from_node.id, + to_node_id=edge.to_node.id, + **edge.metadata, + ), ) for edge in network.edges.all() ]) -def create_consolidated_network(dataset): - # VectorMapLayer.objects.filter(dataset=dataset).delete() - # map_layer = VectorMapLayer.objects.create(dataset=dataset, index=0) - # include_services = [ - # "Substations", - # "DistAssetsOverview", - # "Electrification_Data", - # "EV_Load_Serving_Capacity", - # "Hosting_Capacity_Data", - # "LSRV", - # "NY_SubT_SDP" - # ] - - # features = [] - # for service_name in include_services: - # try: - # d = Dataset.objects.get(name=f'National Grid {service_name}') - # print('\t', f'Using saved vector features for {service_name}...') - # features += [ - # dict( - # type='Feature', - # geometry=json.loads(feature.geometry.json), - # properties=feature.properties - # ) - # for feature in VectorFeature.objects.filter(map_layer__dataset=d) - # ] - # except Dataset.DoesNotExist: - # print('\t', f'Querying {service_name}...') - # feature_sets = fetch_vector_features(service_name=service_name) - # for layer_id, feature_set in feature_sets.items(): - # features += feature_set - # print('\t', len(features), 'features found. Combining like geometries...') - # print('features found:', len(features)) - - # Use geopandas to merge properties of duplicate features - # gdf = geopandas.GeoDataFrame.from_features(features) - # gdf["geometry"] = gdf.normalize() - - # grouped = gdf.groupby(gdf['geometry'].to_wkt()).first() - # print('\t', len(grouped), 'consolidated features found. Saving to database...') - - # TODO: color by properties - - # geojson = json.loads(grouped.to_json()) - # VectorFeature.objects.bulk_create([ - # VectorFeature( - # map_layer=map_layer, - # geometry=GEOSGeometry(json.dumps(feature['geometry'])), - # properties=feature['properties'], - # ) - # for feature in geojson.get('features') - # ], batch_size=10000) - - from django.contrib.gis.geos import Polygon +def download_all_deduped_vector_features(**kwargs): + start = datetime.now() + include_services = kwargs.get('include_services', [ + "DistAssetsOverview", + "Electrification_Data", + "EV_Load_Serving_Capacity", + "Hosting_Capacity_Data", + "LSRV", + "NY_SubT_SDP" + ]) + downloads_folder = kwargs.get('downloads_folder') + if downloads_folder is None: + downloads_folder = Path(__file__).parent + else: + downloads_folder = Path(downloads_folder) + filename = downloads_folder / 'nyc' / 'network_basic_features.json' + if filename.exists(): + print('\t\tReading saved file of basic features.') + return geopandas.GeoDataFrame.from_file(filename) + + print('\t\tDownloading basic features from NYSDP.') + feature_sets = None + with ThreadPoolExecutor(max_workers=len(include_services)) as pool: + feature_sets = pool.map(fetch_vector_features, include_services) + + features = [] + for feature_set in feature_sets: + for set_id, feature_set in feature_set.items(): + for feature in feature_set: + properties = feature['properties'] + geometry = GEOSGeometry(json.dumps(feature['geometry'])) + geoms = [] + # split multilinestrings + if geometry.geom_type == 'MultiLineString': + geoms = [LineString(*line) for line in geometry.coords] + elif geometry.geom_type == 'LineString': + geoms = [geometry] + for geom in geoms: + features.append(dict( + type='Feature', + geometry=json.loads(geom.json), + properties=properties + )) + # normalize and eliminate duplicates + gdf = geopandas.GeoDataFrame.from_features(features, crs='EPSG:4326') + gdf["geometry"] = gdf.normalize() + gdf = gdf.groupby(gdf.geometry.to_wkt()).first() + gdf.reset_index(inplace=True) + gdf.to_file(filename) + print(f'\t\tCompleted download in {(datetime.now() - start).total_seconds()} seconds.') + return gdf - test_dataset, created = Dataset.objects.get_or_create( - name="Test Network", - category="energy", - dataset_type="VECTOR" - ) - Network.objects.filter(dataset=test_dataset).delete() - network = Network.objects.create(dataset=test_dataset) - test_bbox = dict(xmin=-74, ymin=42.8, xmax=-73.9, ymax=42.9) - test_area = Polygon.from_bbox( - [test_bbox['xmin'], test_bbox['ymin'], test_bbox['xmax'], test_bbox['ymax']] - ) - features = [ - dict( - type='Feature', - geometry=json.loads(feature.geometry.json), - properties=feature.properties - ) - for feature in VectorFeature.objects.filter( - map_layer__dataset=dataset, - geometry__coveredby=test_area, - ) +def create_consolidated_network(dataset, **kwargs): + start = datetime.now() + Network.objects.filter(dataset=dataset).delete() + VectorMapLayer.objects.filter(dataset=dataset).delete() + gdf = download_all_deduped_vector_features(**kwargs) + + zones_dataset_name = kwargs.get('zones_dataset_name') + if zones_dataset_name is None: + raise ValueError('`zones_dataset_name` is required.') + zones = SourceRegion.objects.filter(dataset__name=zones_dataset_name) + if zones.count() == 0: + raise ValueError(f'No regions found with dataset name "{zones_dataset_name}".') + + print(f'\t\tInterpreting networks from {len(gdf)} basic features...') + # Divide into groups + zone_geometries = [ + geopandas.GeoSeries.from_wkt([zone.boundary.wkt]).set_crs(4326).iloc[0] + for zone in zones ] - show_columns = [ - 'geometry', + groups = [ + gdf[gdf.geometry.covered_by(zone_geom)] + for zone_geom in zone_geometries ] - pandas.set_option('display.max_columns', None) - print('test features:', len(features)) - gdf = geopandas.GeoDataFrame.from_features(features) - gdf["geometry"] = gdf.normalize() - spatial_index = gdf.sindex - - # construct adjacencies list - adjacencies = {} - for feat_index, feature in gdf.iterrows(): - adjacencies[feat_index] = {} - geom = feature['geometry'] - if geom.geom_type == 'MultiLineString': - # attempt to merge contiguous portions - geom = shapely.ops.linemerge(geom) - geom_list = [geom] - if geom.geom_type == 'MultiLineString': - # if still multi, contains non-contiguous portions - geom_list = geom.geoms - - for geom_index, geom in enumerate(geom_list): - adjacencies[feat_index][geom_index] = {} - # first and last points (endpoints of current geom) - # assumes no intersections will occur in the middle of a line - for coord_index, coord_name in [(0, 'start'), (-1, 'end')]: - coord = shapely.geometry.Point(geom.coords[coord_index]) - nearest_indexes, distances = spatial_index.nearest(coord, max_distance=10, return_distance=True) - geom_inds, tree_inds = nearest_indexes - # exclude self from nearest results - tree_inds = [int(i) for i in tree_inds if i != feat_index] - node = None - if len(tree_inds) != 1: - node = NetworkNode.objects.create( - name=f'{feat_index}_{geom_index}_{coord_name}', - network=network, - location=Point(*geom.coords[coord_index]) - ) - adjacencies[feat_index][geom_index][coord_name] = dict( - node=node, - adjs=tree_inds, - ) - - visited = [] - def follow_adjacencies(feat_index, current_line=None, current_from_node=None): - if feat_index not in visited: - visited.append(feat_index) - feat = gdf.iloc[feat_index] - for geom_index, geom_record in adjacencies[feat_index].items(): - start_record = geom_record.get('start', {}) - start_node = start_record.get('node') - start_adjs = start_record.get('adjs') - - end_record = geom_record.get('end', {}) - end_node = end_record.get('node') - end_adjs = end_record.get('adjs') - coords = [] - if feat['geometry'].geom_type == 'MultiLineString': - for geom in feat['geometry'].geoms: - coords += geom.coords - else: - coords = feat['geometry'].coords - - if start_node and end_node: - # print('edge', start_node, end_node, 'len', len(coords)) - NetworkEdge.objects.create( - name='edge', - network=network, - from_node=start_node, - to_node=end_node, - metadata={}, # TODO: figure out capturing metadata - line_geometry=LineString(*[ - Point(*p) for p in coords - ]) - ) - - if current_line is None: - current_line = list(coords) - else: - current_line += list(coords) - - follow_start_results = None - follow_end_results = None - if len(start_adjs) > 0: - for adj in start_adjs: - follow_start_results = follow_adjacencies( - adj, current_line=current_line, current_from_node=current_from_node or end_node - ) - if len(end_adjs) > 0: - for adj in end_adjs: - follow_end_results = follow_adjacencies( - adj, current_line=current_line, current_from_node=current_from_node or start_node - ) - - start_line, start_from_node, end_line, end_from_node = None, None, None, None - if follow_start_results: - start_line, start_from_node = follow_start_results - if follow_end_results: - end_line, end_from_node = follow_end_results - if current_from_node is None: - if start_line is not None and end_line is not None: - print(feat_index, 'combine lines', len(start_line), '+', len(end_lines), 'between', start_from_node, end_from_node) - if start_line is not None: - print(feat_index, 'start line!', start_from_node, end_from_node, current_from_node, start_node, end_node) - if end_line is not None: - print(feat_index, 'end line!', start_from_node, end_from_node, current_from_node, start_node, end_node) - - return current_line, current_from_node + groups = [g for g in groups if len(g) > 0] + print(f'\t\tSeparated into {len(groups)} groups.') + + with ThreadPoolExecutor(max_workers=10) as pool: + results = pool.map(interpret_group, groups) + for result in results: + nodes, edges = result + network = Network.objects.create(dataset=dataset) + NetworkNode.objects.bulk_create([ + NetworkNode( + network=network, + name=f'Node {i}', + location=Point(n.get('location').x, n.get('location').y), + metadata=n.get('metadata', {}) + ) + for i, n in enumerate(nodes) + ], batch_size=1000) + NetworkEdge.objects.bulk_create([ + NetworkEdge( + network=network, + name=f'Edge {i}', + from_node=NetworkNode.objects.get(network=network, location=Point(e.get('from_point').x, e.get('from_point').y)), + to_node=NetworkNode.objects.get(network=network, location=Point(e.get('to_point').x, e.get('to_point').y)), + line_geometry=LineString(*e.get('line_geometry').coords), + metadata=e.get('metadata', {}) + ) + for i, e in enumerate(edges) + ], batch_size=1000) - for feat_index in list(adjacencies.keys())[:5]: - follow_adjacencies(feat_index) - print('total nodes', NetworkNode.objects.filter(network=network).count()) - print('total edges', NetworkEdge.objects.filter(network=network).count()) + print(f'\t\t{network.nodes.count()} nodes created, {network.edges.count()} edges created.') + vector_features_from_network(network) - vector_features_from_network(network) + print(f'\t\t{dataset.networks.count()} separate networks created.') + print(f'\tCompleted in {(datetime.now() - start).total_seconds()} seconds.') From 478f58a1f2d2e2b757a4058e4a14679e7ee7203a Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Mon, 29 Jul 2024 13:39:58 +0000 Subject: [PATCH 06/19] fix: Small bug fixes --- sample_data/ingest_sample_data.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sample_data/ingest_sample_data.py b/sample_data/ingest_sample_data.py index 3292b6d4..7fae18de 100644 --- a/sample_data/ingest_sample_data.py +++ b/sample_data/ingest_sample_data.py @@ -12,6 +12,7 @@ USE_CASE_FOLDER = Path('sample_data/use_cases') +DOWNLOADS_FOLDER = Path('sample_data/downloads') def ingest_file(file_info, index=0, dataset=None, chart=None): @@ -20,7 +21,7 @@ def ingest_file(file_info, index=0, dataset=None, chart=None): file_url = file_info.get('url') file_metadata = file_info.get('metadata', {}) - file_location = Path('sample_data/downloads', file_path) + file_location = Path(DOWNLOADS_FOLDER, file_path) file_type = file_path.split('.')[-1] if not file_location.exists(): print(f'\t Downloading data file {file_name}.') From 0bb90a432686168797fbde5c5a4f3ba935208027 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Mon, 29 Jul 2024 18:13:41 +0000 Subject: [PATCH 07/19] feat: Add import script to load county networks from exported geojson files --- .../use_cases/new_york_energy/datasets.json | 17 ++-- .../new_york_energy/export_networks.py | 55 +++++++++++ .../new_york_energy/import_networks.py | 91 +++++++++++++++++++ .../use_cases/new_york_energy/nysdp.py | 27 +----- uvdat/core/tasks/networks.py | 28 +++++- 5 files changed, 184 insertions(+), 34 deletions(-) create mode 100644 sample_data/use_cases/new_york_energy/export_networks.py create mode 100644 sample_data/use_cases/new_york_energy/import_networks.py diff --git a/sample_data/use_cases/new_york_energy/datasets.json b/sample_data/use_cases/new_york_energy/datasets.json index 141c1b0f..741f0236 100644 --- a/sample_data/use_cases/new_york_energy/datasets.json +++ b/sample_data/use_cases/new_york_energy/datasets.json @@ -41,13 +41,16 @@ } }, { - "name": "National Grid Network", - "description": "Consolidated network from several services' combined data", + "name": "National Grid County Networks", + "description": "Energy networks for each county in New York", "category": "energy", - "module": "nysdp", - "function": "create_consolidated_network", - "kwargs": { - "zones_dataset_name": "County Boundaries" - } + "files": [ + { + "url": "https://data.kitware.com/api/v1/item/66a7bdab0ea2cce8e698b958/download", + "path": "nyc/networks.zip" + } + ], + "module": "import_networks", + "function": "perform_import" } ] diff --git a/sample_data/use_cases/new_york_energy/export_networks.py b/sample_data/use_cases/new_york_energy/export_networks.py new file mode 100644 index 00000000..83918540 --- /dev/null +++ b/sample_data/use_cases/new_york_energy/export_networks.py @@ -0,0 +1,55 @@ +import json +import shapely + +from datetime import datetime +from pathlib import Path + +from uvdat.core.models import Network, SourceRegion + + +OUTPUT_FOLDER = Path('sample_data/use_cases/new_york_energy/networks') + + +def perform_export(): + start = datetime.now() + networks = Network.objects.filter(dataset__name='National Grid Network') + zones = SourceRegion.objects.filter(dataset__name='County Boundaries') + + for network in networks: + sample_node = network.nodes.first() + zone = zones.filter(boundary__contains=sample_node.location).first() + if zone: + features = [] + for node in network.nodes.all(): + geom = shapely.geometry.mapping(shapely.wkt.loads(node.location.wkt)) + features.append(dict( + geometry=geom, + properties=dict( + **node.metadata, + county=zone.name + ) + )) + for edge in network.edges.all(): + geom = shapely.geometry.mapping(shapely.wkt.loads(edge.line_geometry.wkt)) + from_point = shapely.geometry.mapping(shapely.wkt.loads(edge.from_node.location.wkt)) + to_point = shapely.geometry.mapping(shapely.wkt.loads(edge.to_node.location.wkt)) + features.append(dict( + geometry=geom, + properties=dict( + **edge.metadata, + from_point=from_point, + to_point=to_point, + county=zone.name + ) + )) + geodata = dict( + type="FeatureCollection", + crs=dict(type='name', properties=dict(name='EPSG:4326')), + features=features, + ) + filename = OUTPUT_FOLDER / f'{zone.name}.json' + with open(filename, 'w') as f: + json.dump(geodata, f) + print(f'Wrote {len(features)} features to {filename}.') + + print(f'\tCompleted in {(datetime.now() - start).total_seconds()} seconds.') diff --git a/sample_data/use_cases/new_york_energy/import_networks.py b/sample_data/use_cases/new_york_energy/import_networks.py new file mode 100644 index 00000000..88798f7d --- /dev/null +++ b/sample_data/use_cases/new_york_energy/import_networks.py @@ -0,0 +1,91 @@ +import json +import tempfile +import zipfile + +from datetime import datetime +from pathlib import Path + +from django.contrib.gis.measure import D +from django.contrib.gis.db.models.functions import Distance +from django.contrib.gis.geos import Point, LineString +from uvdat.core.models import Network, NetworkEdge, NetworkNode +from uvdat.core.tasks.networks import vector_features_from_network + + +TOLERANCE_METERS = 15 + + +def create_network(dataset, network_name, geodata): + print(f'\t\tCreating network for {network_name}.') + network = Network.objects.create( + dataset=dataset, + category='energy', + metadata=dict(name=network_name) + ) + features = geodata.get('features') + nodes = [] + edges = [] + for feature in features: + geom = feature.get('geometry') + if geom.get('type') == 'Point': + nodes.append(NetworkNode( + name=f'{network_name} {len(nodes)}', + network=network, + location=Point(*geom.get('coordinates')), + metadata=feature.get('properties'), + )) + elif geom.get('type') == 'LineString': + edges.append(NetworkEdge( + name=f'{network_name} {len(edges)}', + network=network, + line_geometry=LineString(*geom.get('coordinates')), + metadata=feature.get('properties'), + )) + NetworkNode.objects.bulk_create(nodes, batch_size=1000) + + # fill in node relationships on edges now that nodes exist + connected_node_ids = [] + for edge in edges: + metadata = edge.metadata + from_point = metadata.get('from_point', {}).get('coordinates') + to_point = metadata.get('to_point', {}).get('coordinates') + if from_point is not None and to_point is not None: + from_point = Point(*from_point) + to_point = Point(*to_point) + from_nodes = NetworkNode.objects.filter(network=network, location=from_point) + to_nodes = NetworkNode.objects.filter(network=network, location=to_point) + if from_nodes.count() > 0 and to_nodes.count() > 0: + edge.from_node = from_nodes.first() + edge.to_node = to_nodes.first() + if edge.from_node.id not in connected_node_ids: + connected_node_ids.append(edge.from_node.id) + if edge.to_node.id not in connected_node_ids: + connected_node_ids.append(edge.to_node.id) + + # remove any nodes that have no connections + not_connected = NetworkNode.objects.filter(network=network).exclude(id__in=connected_node_ids) + not_connected.delete() + + NetworkEdge.objects.bulk_create(edges, batch_size=1000) + vector_features_from_network(network) + print(f'\t\tCreated {network.nodes.count()} nodes and {network.edges.count()} edges.') + + +def perform_import(dataset, **kwargs): + print('\tEstimated time: 90 minutes.') + start = datetime.now() + Network.objects.filter(dataset=dataset).delete() + for file_item in dataset.source_files.all(): + with tempfile.TemporaryDirectory() as temp_dir: + archive_path = Path(temp_dir, 'archive.zip') + with open(archive_path, 'wb') as archive_file: + archive_file.write(file_item.file.open('rb').read()) + with zipfile.ZipFile(archive_path) as zip_archive: + filenames = zip_archive.namelist() + for filename in filenames: + if filename.endswith('.json'): + network_name = filename.split('/')[-1].replace('.json', '') + content = zip_archive.open(filename).read() + geodata = json.loads(content) + create_network(dataset, network_name, geodata) + print(f'\tCompleted in {(datetime.now() - start).total_seconds()} seconds.') diff --git a/sample_data/use_cases/new_york_energy/nysdp.py b/sample_data/use_cases/new_york_energy/nysdp.py index d08b22ff..fc69324b 100644 --- a/sample_data/use_cases/new_york_energy/nysdp.py +++ b/sample_data/use_cases/new_york_energy/nysdp.py @@ -8,6 +8,7 @@ from django.contrib.gis.geos import GEOSGeometry, Point, LineString from uvdat.core.models import Dataset, Network, NetworkNode, NetworkEdge, VectorMapLayer, VectorFeature, SourceRegion +from uvdat.core.tasks.networks import vector_features_from_network from .interpret_network import interpret_group @@ -65,31 +66,6 @@ def create_vector_features(dataset, service_name=None, **kwargs): VectorFeature.objects.bulk_create(vector_features) -def vector_features_from_network(network): - map_layer, created = VectorMapLayer.objects.get_or_create(dataset=network.dataset, index=0) - VectorFeature.objects.bulk_create([ - VectorFeature( - map_layer=map_layer, - geometry=node.location, - properties=dict(node_id=node.id, **node.metadata), - ) - for node in network.nodes.all() - ]) - VectorFeature.objects.bulk_create([ - VectorFeature( - map_layer=map_layer, - geometry=edge.line_geometry, - properties=dict( - edge_id=edge.id, - from_node_id=edge.from_node.id, - to_node_id=edge.to_node.id, - **edge.metadata, - ), - ) - for edge in network.edges.all() - ]) - - def download_all_deduped_vector_features(**kwargs): start = datetime.now() include_services = kwargs.get('include_services', [ @@ -147,7 +123,6 @@ def download_all_deduped_vector_features(**kwargs): def create_consolidated_network(dataset, **kwargs): start = datetime.now() Network.objects.filter(dataset=dataset).delete() - VectorMapLayer.objects.filter(dataset=dataset).delete() gdf = download_all_deduped_vector_features(**kwargs) zones_dataset_name = kwargs.get('zones_dataset_name') diff --git a/uvdat/core/tasks/networks.py b/uvdat/core/tasks/networks.py index b742d2ce..ec34f97d 100644 --- a/uvdat/core/tasks/networks.py +++ b/uvdat/core/tasks/networks.py @@ -6,7 +6,7 @@ import numpy import shapely -from uvdat.core.models import Network, NetworkEdge, NetworkNode +from uvdat.core.models import Network, NetworkEdge, NetworkNode, VectorMapLayer, VectorFeature NODE_RECOVERY_MODES = [ 'random', @@ -213,6 +213,32 @@ def geojson_from_network(dataset): return new_geodata.to_json() +def vector_features_from_network(network): + VectorMapLayer.objects.filter(dataset=network.dataset).delete() + map_layer, created = VectorMapLayer.objects.get_or_create(dataset=network.dataset, index=0) + VectorFeature.objects.bulk_create([ + VectorFeature( + map_layer=map_layer, + geometry=node.location, + properties=dict(node_id=node.id, **node.metadata), + ) + for node in network.nodes.all() + ]) + VectorFeature.objects.bulk_create([ + VectorFeature( + map_layer=map_layer, + geometry=edge.line_geometry, + properties=dict( + edge_id=edge.id, + from_node_id=edge.from_node.id, + to_node_id=edge.to_node.id, + **edge.metadata, + ), + ) + for edge in network.edges.all() + ]) + + def get_network_graph(network): from uvdat.core.models import NetworkEdge, NetworkNode From 0f985c96452d0fa834b371f990b045af85e4d29c Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Mon, 29 Jul 2024 18:34:48 +0000 Subject: [PATCH 08/19] style: Reformat with tox --- .../use_cases/new_york_energy/contexts.json | 2 +- uvdat/core/tasks/networks.py | 48 ++++++++++--------- 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/sample_data/use_cases/new_york_energy/contexts.json b/sample_data/use_cases/new_york_energy/contexts.json index 23d4550f..d58f67e5 100644 --- a/sample_data/use_cases/new_york_energy/contexts.json +++ b/sample_data/use_cases/new_york_energy/contexts.json @@ -8,7 +8,7 @@ "default_map_zoom": 8, "datasets": [ "National Grid CompanyBoundary", - "National Grid Network", + "National Grid County Networks", "National Grid Substations", "County Boundaries" ] diff --git a/uvdat/core/tasks/networks.py b/uvdat/core/tasks/networks.py index ec34f97d..2c2ec9e4 100644 --- a/uvdat/core/tasks/networks.py +++ b/uvdat/core/tasks/networks.py @@ -6,7 +6,7 @@ import numpy import shapely -from uvdat.core.models import Network, NetworkEdge, NetworkNode, VectorMapLayer, VectorFeature +from uvdat.core.models import Network, NetworkEdge, NetworkNode, VectorFeature, VectorMapLayer NODE_RECOVERY_MODES = [ 'random', @@ -216,27 +216,31 @@ def geojson_from_network(dataset): def vector_features_from_network(network): VectorMapLayer.objects.filter(dataset=network.dataset).delete() map_layer, created = VectorMapLayer.objects.get_or_create(dataset=network.dataset, index=0) - VectorFeature.objects.bulk_create([ - VectorFeature( - map_layer=map_layer, - geometry=node.location, - properties=dict(node_id=node.id, **node.metadata), - ) - for node in network.nodes.all() - ]) - VectorFeature.objects.bulk_create([ - VectorFeature( - map_layer=map_layer, - geometry=edge.line_geometry, - properties=dict( - edge_id=edge.id, - from_node_id=edge.from_node.id, - to_node_id=edge.to_node.id, - **edge.metadata, - ), - ) - for edge in network.edges.all() - ]) + VectorFeature.objects.bulk_create( + [ + VectorFeature( + map_layer=map_layer, + geometry=node.location, + properties=dict(node_id=node.id, **node.metadata), + ) + for node in network.nodes.all() + ] + ) + VectorFeature.objects.bulk_create( + [ + VectorFeature( + map_layer=map_layer, + geometry=edge.line_geometry, + properties=dict( + edge_id=edge.id, + from_node_id=edge.from_node.id, + to_node_id=edge.to_node.id, + **edge.metadata, + ), + ) + for edge in network.edges.all() + ] + ) def get_network_graph(network): From d71c90a73d9ff28ab4adddfc2f5478bf01efa904 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Mon, 29 Jul 2024 18:37:46 +0000 Subject: [PATCH 09/19] fix: only delete old map layers at beginning of dataset ingest --- sample_data/use_cases/new_york_energy/import_networks.py | 3 ++- sample_data/use_cases/new_york_energy/nysdp.py | 1 + uvdat/core/tasks/networks.py | 3 +-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/sample_data/use_cases/new_york_energy/import_networks.py b/sample_data/use_cases/new_york_energy/import_networks.py index 88798f7d..89b4298e 100644 --- a/sample_data/use_cases/new_york_energy/import_networks.py +++ b/sample_data/use_cases/new_york_energy/import_networks.py @@ -8,7 +8,7 @@ from django.contrib.gis.measure import D from django.contrib.gis.db.models.functions import Distance from django.contrib.gis.geos import Point, LineString -from uvdat.core.models import Network, NetworkEdge, NetworkNode +from uvdat.core.models import Network, NetworkEdge, NetworkNode, VectorMapLayer from uvdat.core.tasks.networks import vector_features_from_network @@ -75,6 +75,7 @@ def perform_import(dataset, **kwargs): print('\tEstimated time: 90 minutes.') start = datetime.now() Network.objects.filter(dataset=dataset).delete() + VectorMapLayer.objects.filter(dataset=network.dataset).delete() for file_item in dataset.source_files.all(): with tempfile.TemporaryDirectory() as temp_dir: archive_path = Path(temp_dir, 'archive.zip') diff --git a/sample_data/use_cases/new_york_energy/nysdp.py b/sample_data/use_cases/new_york_energy/nysdp.py index fc69324b..7fb7217a 100644 --- a/sample_data/use_cases/new_york_energy/nysdp.py +++ b/sample_data/use_cases/new_york_energy/nysdp.py @@ -123,6 +123,7 @@ def download_all_deduped_vector_features(**kwargs): def create_consolidated_network(dataset, **kwargs): start = datetime.now() Network.objects.filter(dataset=dataset).delete() + VectorMapLayer.objects.filter(dataset=network.dataset).delete() gdf = download_all_deduped_vector_features(**kwargs) zones_dataset_name = kwargs.get('zones_dataset_name') diff --git a/uvdat/core/tasks/networks.py b/uvdat/core/tasks/networks.py index 2c2ec9e4..48ceb845 100644 --- a/uvdat/core/tasks/networks.py +++ b/uvdat/core/tasks/networks.py @@ -6,7 +6,7 @@ import numpy import shapely -from uvdat.core.models import Network, NetworkEdge, NetworkNode, VectorFeature, VectorMapLayer +from uvdat.core.models import Network, NetworkEdge, NetworkNode, VectorFeature NODE_RECOVERY_MODES = [ 'random', @@ -214,7 +214,6 @@ def geojson_from_network(dataset): def vector_features_from_network(network): - VectorMapLayer.objects.filter(dataset=network.dataset).delete() map_layer, created = VectorMapLayer.objects.get_or_create(dataset=network.dataset, index=0) VectorFeature.objects.bulk_create( [ From 3f180067ba6ee44ff9c0f7a9d6ac06840180d09e Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Mon, 29 Jul 2024 18:41:34 +0000 Subject: [PATCH 10/19] style: Additional style fixes --- uvdat/core/tasks/networks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uvdat/core/tasks/networks.py b/uvdat/core/tasks/networks.py index 48ceb845..325e8958 100644 --- a/uvdat/core/tasks/networks.py +++ b/uvdat/core/tasks/networks.py @@ -6,7 +6,7 @@ import numpy import shapely -from uvdat.core.models import Network, NetworkEdge, NetworkNode, VectorFeature +from uvdat.core.models import Network, NetworkEdge, NetworkNode, VectorFeature, VectorMapLayer NODE_RECOVERY_MODES = [ 'random', From f859588f1e5d4142dec8c2c40db4974757ed48f4 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Wed, 31 Jul 2024 16:26:00 +0000 Subject: [PATCH 11/19] fix: `network.dataset` -> `dataset` --- sample_data/use_cases/new_york_energy/import_networks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sample_data/use_cases/new_york_energy/import_networks.py b/sample_data/use_cases/new_york_energy/import_networks.py index 89b4298e..50bc30e9 100644 --- a/sample_data/use_cases/new_york_energy/import_networks.py +++ b/sample_data/use_cases/new_york_energy/import_networks.py @@ -75,7 +75,7 @@ def perform_import(dataset, **kwargs): print('\tEstimated time: 90 minutes.') start = datetime.now() Network.objects.filter(dataset=dataset).delete() - VectorMapLayer.objects.filter(dataset=network.dataset).delete() + VectorMapLayer.objects.filter(dataset=dataset).delete() for file_item in dataset.source_files.all(): with tempfile.TemporaryDirectory() as temp_dir: archive_path = Path(temp_dir, 'archive.zip') From fb2cb073be55d6fd22e78376fc22978efc660c60 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Fri, 2 Aug 2024 16:38:40 +0000 Subject: [PATCH 12/19] refactor: remove unnecessary string casting --- sample_data/ingest_sample_data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sample_data/ingest_sample_data.py b/sample_data/ingest_sample_data.py index 7fae18de..63d64806 100644 --- a/sample_data/ingest_sample_data.py +++ b/sample_data/ingest_sample_data.py @@ -53,7 +53,7 @@ def ingest_file(file_info, index=0, dataset=None, chart=None): def ingest_contexts(use_case): print('Creating Context objects...') - with open(str(USE_CASE_FOLDER / use_case / 'contexts.json')) as contexts_json: + with open(USE_CASE_FOLDER / use_case / 'contexts.json') as contexts_json: data = json.load(contexts_json) for context in data: print('\t- ', context['name']) @@ -73,7 +73,7 @@ def ingest_contexts(use_case): def ingest_charts(use_case): print('Creating Chart objects...') - with open(str(USE_CASE_FOLDER / use_case / 'charts.json')) as charts_json: + with open(USE_CASE_FOLDER / use_case / 'charts.json') as charts_json: data = json.load(charts_json) for chart in data: print('\t- ', chart['name']) @@ -107,7 +107,7 @@ def ingest_charts(use_case): def ingest_datasets(use_case, include_large=False, dataset_indexes=None): print('Creating Dataset objects...') - with open(str(USE_CASE_FOLDER / use_case / 'datasets.json')) as datasets_json: + with open(USE_CASE_FOLDER / use_case / 'datasets.json') as datasets_json: data = json.load(datasets_json) for index, dataset in enumerate(data): if dataset_indexes is None or index in dataset_indexes: From 48a769d0511dee3dbf929b779ca198885d00b15c Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Fri, 2 Aug 2024 17:45:23 +0000 Subject: [PATCH 13/19] refactor: create ingest modules with `convert_dataset` function for each use case --- sample_data/ingest_sample_data.py | 149 ---------------- sample_data/ingest_use_case.py | 166 ++++++++++++++++++ sample_data/use_cases/boston_floods/ingest.py | 9 + .../use_cases/new_york_energy/charts.json | 1 - .../use_cases/new_york_energy/datasets.json | 4 +- .../use_cases/new_york_energy/ingest.py | 27 +++ uvdat/core/management/commands/populate.py | 11 +- 7 files changed, 207 insertions(+), 160 deletions(-) delete mode 100644 sample_data/ingest_sample_data.py create mode 100644 sample_data/ingest_use_case.py create mode 100644 sample_data/use_cases/boston_floods/ingest.py delete mode 100644 sample_data/use_cases/new_york_energy/charts.json create mode 100644 sample_data/use_cases/new_york_energy/ingest.py diff --git a/sample_data/ingest_sample_data.py b/sample_data/ingest_sample_data.py deleted file mode 100644 index 63d64806..00000000 --- a/sample_data/ingest_sample_data.py +++ /dev/null @@ -1,149 +0,0 @@ -from datetime import datetime -import importlib -import json -import os -from pathlib import Path - -from django.contrib.gis.geos import Point -from django.core.files.base import ContentFile -import requests - -from uvdat.core.models import Chart, Context, Dataset, FileItem - - -USE_CASE_FOLDER = Path('sample_data/use_cases') -DOWNLOADS_FOLDER = Path('sample_data/downloads') - - -def ingest_file(file_info, index=0, dataset=None, chart=None): - file_path = file_info.get('path') - file_name = file_info.get('name', file_path.split('/')[-1]) - file_url = file_info.get('url') - file_metadata = file_info.get('metadata', {}) - - file_location = Path(DOWNLOADS_FOLDER, file_path) - file_type = file_path.split('.')[-1] - if not file_location.exists(): - print(f'\t Downloading data file {file_name}.') - file_location.parent.mkdir(parents=True, exist_ok=True) - with open(file_location, 'wb') as f: - r = requests.get(file_url) - f.write(r.content) - - existing = FileItem.objects.filter(name=file_name) - if existing.count(): - print('\t', f'FileItem {file_name} already exists.') - else: - new_file_item = FileItem.objects.create( - name=file_name, - dataset=dataset, - chart=chart, - file_type=file_type, - file_size=os.path.getsize(file_location), - metadata=dict( - **file_metadata, - uploaded=str(datetime.now()), - ), - index=index, - ) - print('\t', f'FileItem {new_file_item.name} created.') - with file_location.open('rb') as f: - new_file_item.file.save(file_path, ContentFile(f.read())) - - -def ingest_contexts(use_case): - print('Creating Context objects...') - with open(USE_CASE_FOLDER / use_case / 'contexts.json') as contexts_json: - data = json.load(contexts_json) - for context in data: - print('\t- ', context['name']) - existing = Context.objects.filter(name=context['name']) - if existing.count(): - context_for_setting = existing.first() - else: - context_for_setting = Context.objects.create( - name=context['name'], - default_map_center=Point(*context['default_map_center']), - default_map_zoom=context['default_map_zoom'], - ) - print('\t', f'Context {context_for_setting.name} created.') - - context_for_setting.datasets.set(Dataset.objects.filter(name__in=context['datasets'])) - - -def ingest_charts(use_case): - print('Creating Chart objects...') - with open(USE_CASE_FOLDER / use_case / 'charts.json') as charts_json: - data = json.load(charts_json) - for chart in data: - print('\t- ', chart['name']) - existing = Chart.objects.filter(name=chart['name']) - if existing.count(): - chart_for_conversion = existing.first() - else: - new_chart = Chart.objects.create( - name=chart['name'], - description=chart['description'], - context=Context.objects.get(name=chart['context']), - chart_options=chart.get('chart_options'), - metadata=chart.get('metadata'), - editable=chart.get('editable', False), - ) - print('\t', f'Chart {new_chart.name} created.') - for index, file_info in enumerate(chart.get('files', [])): - ingest_file( - file_info, - index=index, - chart=new_chart, - ) - chart_for_conversion = new_chart - - print('\t', f'Converting data for {chart_for_conversion.name}...') - chart_for_conversion.spawn_conversion_task( - conversion_options=chart.get('conversion_options'), - asynchronous=False, - ) - - -def ingest_datasets(use_case, include_large=False, dataset_indexes=None): - print('Creating Dataset objects...') - with open(USE_CASE_FOLDER / use_case / 'datasets.json') as datasets_json: - data = json.load(datasets_json) - for index, dataset in enumerate(data): - if dataset_indexes is None or index in dataset_indexes: - print('\t- ', dataset['name']) - existing = Dataset.objects.filter(name=dataset['name']) - if existing.count(): - dataset_for_conversion = existing.first() - else: - # Create dataset - new_dataset = Dataset.objects.create( - name=dataset['name'], - description=dataset['description'], - category=dataset['category'], - dataset_type=dataset.get('type', 'vector').upper(), - metadata=dataset.get('metadata', {}), - ) - print('\t', f'Dataset {new_dataset.name} created.') - for index, file_info in enumerate(dataset.get('files', [])): - ingest_file( - file_info, - index=index, - dataset=new_dataset, - ) - dataset_for_conversion = new_dataset - - dataset_size_mb = dataset_for_conversion.get_size() >> 20 - if include_large or dataset_size_mb < 50: - print('\t', f'Converting data for {dataset_for_conversion.name}...') - dataset_for_conversion.spawn_conversion_task( - style_options=dataset.get('style_options'), - network_options=dataset.get('network_options'), - region_options=dataset.get('region_options'), - asynchronous=False, - ) - else: - print( - '\t', f'Dataset too large ({dataset_size_mb} MB); skipping conversion step.' - ) - print('\t', 'Use `--include_large` to include conversions for large datasets.') diff --git a/sample_data/ingest_use_case.py b/sample_data/ingest_use_case.py new file mode 100644 index 00000000..f8dc8b4c --- /dev/null +++ b/sample_data/ingest_use_case.py @@ -0,0 +1,166 @@ +from datetime import datetime +import importlib +import json +import os +from pathlib import Path + +from django.contrib.gis.geos import Point +from django.core.files.base import ContentFile +import requests + +from uvdat.core.models import Chart, Context, Dataset, FileItem + +from .use_cases.boston_floods import ingest as boston_floods_ingest +from .use_cases.new_york_energy import ingest as new_york_energy_ingest + + +USE_CASE_FOLDER = Path('sample_data/use_cases') +DOWNLOADS_FOLDER = Path('sample_data/downloads') + + +def ingest_file(file_info, index=0, dataset=None, chart=None): + file_path = file_info.get('path') + file_name = file_info.get('name', file_path.split('/')[-1]) + file_url = file_info.get('url') + file_metadata = file_info.get('metadata', {}) + + file_location = Path(DOWNLOADS_FOLDER, file_path) + file_type = file_path.split('.')[-1] + if not file_location.exists(): + print(f'\t Downloading data file {file_name}.') + file_location.parent.mkdir(parents=True, exist_ok=True) + with open(file_location, 'wb') as f: + r = requests.get(file_url) + f.write(r.content) + + existing = FileItem.objects.filter(name=file_name) + if existing.count(): + print('\t', f'FileItem {file_name} already exists.') + else: + new_file_item = FileItem.objects.create( + name=file_name, + dataset=dataset, + chart=chart, + file_type=file_type, + file_size=os.path.getsize(file_location), + metadata=dict( + **file_metadata, + uploaded=str(datetime.now()), + ), + index=index, + ) + print('\t', f'FileItem {new_file_item.name} created.') + with file_location.open('rb') as f: + new_file_item.file.save(file_path, ContentFile(f.read())) + + +def ingest_contexts(use_case): + context_file_path = USE_CASE_FOLDER / use_case / 'contexts.json' + if context_file_path.exists(): + print('Creating Context objects...') + with open(context_file_path) as contexts_json: + data = json.load(contexts_json) + for context in data: + print('\t- ', context['name']) + existing = Context.objects.filter(name=context['name']) + if existing.count(): + context_for_setting = existing.first() + else: + context_for_setting = Context.objects.create( + name=context['name'], + default_map_center=Point(*context['default_map_center']), + default_map_zoom=context['default_map_zoom'], + ) + print('\t', f'Context {context_for_setting.name} created.') + + context_for_setting.datasets.set(Dataset.objects.filter(name__in=context['datasets'])) + + +def ingest_charts(use_case): + chart_file_path = USE_CASE_FOLDER / use_case / 'charts.json' + if chart_file_path.exists(): + print('Creating Chart objects...') + with open(chart_file_path) as charts_json: + data = json.load(charts_json) + for chart in data: + print('\t- ', chart['name']) + existing = Chart.objects.filter(name=chart['name']) + if existing.count(): + chart_for_conversion = existing.first() + else: + new_chart = Chart.objects.create( + name=chart['name'], + description=chart['description'], + context=Context.objects.get(name=chart['context']), + chart_options=chart.get('chart_options'), + metadata=chart.get('metadata'), + editable=chart.get('editable', False), + ) + print('\t', f'Chart {new_chart.name} created.') + for index, file_info in enumerate(chart.get('files', [])): + ingest_file( + file_info, + index=index, + chart=new_chart, + ) + chart_for_conversion = new_chart + + print('\t', f'Converting data for {chart_for_conversion.name}...') + chart_for_conversion.spawn_conversion_task( + conversion_options=chart.get('conversion_options'), + asynchronous=False, + ) + + +def ingest_datasets(use_case, include_large=False, dataset_indexes=None): + dataset_file_path = USE_CASE_FOLDER / use_case / 'datasets.json' + if dataset_file_path.exists(): + print('Creating Dataset objects...') + with open(dataset_file_path) as datasets_json: + data = json.load(datasets_json) + for index, dataset in enumerate(data): + if dataset_indexes is None or index in dataset_indexes: + existing = Dataset.objects.filter(name=dataset['name']) + if existing.count(): + dataset_for_conversion = existing.first() + else: + # Create dataset + new_dataset = Dataset.objects.create( + name=dataset['name'], + description=dataset['description'], + category=dataset['category'], + dataset_type=dataset.get('type', 'vector').upper(), + metadata=dataset.get('metadata', {}), + ) + print('\t', f'Dataset {new_dataset.name} created.') + for index, file_info in enumerate(dataset.get('files', [])): + ingest_file( + file_info, + index=index, + dataset=new_dataset, + ) + dataset_for_conversion = new_dataset + + dataset_size_mb = dataset_for_conversion.get_size() >> 20 + if include_large or dataset_size_mb < 50: + if use_case == 'boston_floods': + boston_floods_ingest.convert_dataset(dataset_for_conversion, dataset) + elif use_case == 'new_york_energy': + new_york_energy_ingest.convert_dataset(dataset_for_conversion, dataset) + else: + print( + '\t', f'Dataset too large ({dataset_size_mb} MB); skipping conversion step.' + ) + print('\t', 'Use `--include_large` to include conversions for large datasets.') + + + +def ingest_use_case(use_case_name, include_large=False, dataset_indexes=None): + print(f'Populating server with data for use case {use_case_name}...') + ingest_datasets( + use_case=use_case_name, + include_large=include_large, + dataset_indexes=dataset_indexes, + ) + ingest_charts(use_case=use_case_name) + ingest_contexts(use_case=use_case_name) diff --git a/sample_data/use_cases/boston_floods/ingest.py b/sample_data/use_cases/boston_floods/ingest.py new file mode 100644 index 00000000..6d89aec0 --- /dev/null +++ b/sample_data/use_cases/boston_floods/ingest.py @@ -0,0 +1,9 @@ + +def convert_dataset(dataset, options): + print('\t', f'Converting data for {dataset.name}...') + dataset.spawn_conversion_task( + style_options=options.get('style_options'), + network_options=options.get('network_options'), + region_options=options.get('region_options'), + asynchronous=False, + ) diff --git a/sample_data/use_cases/new_york_energy/charts.json b/sample_data/use_cases/new_york_energy/charts.json deleted file mode 100644 index fe51488c..00000000 --- a/sample_data/use_cases/new_york_energy/charts.json +++ /dev/null @@ -1 +0,0 @@ -[] diff --git a/sample_data/use_cases/new_york_energy/datasets.json b/sample_data/use_cases/new_york_energy/datasets.json index 741f0236..86fabfe2 100644 --- a/sample_data/use_cases/new_york_energy/datasets.json +++ b/sample_data/use_cases/new_york_energy/datasets.json @@ -49,8 +49,6 @@ "url": "https://data.kitware.com/api/v1/item/66a7bdab0ea2cce8e698b958/download", "path": "nyc/networks.zip" } - ], - "module": "import_networks", - "function": "perform_import" + ] } ] diff --git a/sample_data/use_cases/new_york_energy/ingest.py b/sample_data/use_cases/new_york_energy/ingest.py new file mode 100644 index 00000000..9543bd99 --- /dev/null +++ b/sample_data/use_cases/new_york_energy/ingest.py @@ -0,0 +1,27 @@ +from pathlib import Path +from .import_networks import perform_import +from .export_networks import perform_export +from .nysdp import create_consolidated_network + + +DOWNLOADS_FOLDER = DOWNLOADS_FOLDER = Path('../../sample_data/downloads') +PULL_LATEST = False + + +def convert_dataset(dataset, options, ): + print('\t', f'Converting data for {dataset.name}...') + if dataset.name == 'National Grid County Networks': + if PULL_LATEST: + # pull latest data from NYSDP and run network interpretation algorithm + dataset.source_files.delete() + create_consolidated_network(dataset, downloads_folder=DOWNLOADS_FOLDER) + perform_export() + else: + perform_import(dataset, downloads_folder=DOWNLOADS_FOLDER) + else: + dataset.spawn_conversion_task( + style_options=options.get('style_options'), + network_options=options.get('network_options'), + region_options=options.get('region_options'), + asynchronous=False, + ) diff --git a/uvdat/core/management/commands/populate.py b/uvdat/core/management/commands/populate.py index ff4f07cd..f3f48573 100644 --- a/uvdat/core/management/commands/populate.py +++ b/uvdat/core/management/commands/populate.py @@ -1,6 +1,6 @@ from django.core.management.base import BaseCommand -from sample_data.ingest_sample_data import ingest_charts, ingest_contexts, ingest_datasets +from sample_data.ingest_use_case import ingest_use_case class Command(BaseCommand): @@ -20,17 +20,14 @@ def add_arguments(self, parser): parser.add_argument('--dataset_indexes', nargs='*', type=int) def handle(self, *args, **kwargs): - use_case = kwargs['use_case'] + use_case_name = kwargs['use_case'] include_large = kwargs['include_large'] dataset_indexes = kwargs['dataset_indexes'] if dataset_indexes is None or len(dataset_indexes) == 0: dataset_indexes = None - print(f'Populating server with sample data for use case {use_case}...') - ingest_datasets( - use_case, + ingest_use_case( + use_case_name, include_large=include_large, dataset_indexes=dataset_indexes, ) - ingest_contexts(use_case) - ingest_charts(use_case) From df98b5d72ac16471c55127e58327d1b587089276 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Fri, 2 Aug 2024 19:01:26 +0000 Subject: [PATCH 14/19] fix: ingest contexts before charts --- sample_data/ingest_use_case.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sample_data/ingest_use_case.py b/sample_data/ingest_use_case.py index f8dc8b4c..8d4af224 100644 --- a/sample_data/ingest_use_case.py +++ b/sample_data/ingest_use_case.py @@ -162,5 +162,5 @@ def ingest_use_case(use_case_name, include_large=False, dataset_indexes=None): include_large=include_large, dataset_indexes=dataset_indexes, ) - ingest_charts(use_case=use_case_name) ingest_contexts(use_case=use_case_name) + ingest_charts(use_case=use_case_name) From 11c2c8315b49076709ab9bd46cdda90e549ab690 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Fri, 2 Aug 2024 19:11:41 +0000 Subject: [PATCH 15/19] test: adjust expected number of contexts in populate test --- uvdat/core/tests/test_populate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uvdat/core/tests/test_populate.py b/uvdat/core/tests/test_populate.py index 55d79bd0..22021ea1 100644 --- a/uvdat/core/tests/test_populate.py +++ b/uvdat/core/tests/test_populate.py @@ -35,7 +35,7 @@ def test_populate(): ) assert Chart.objects.all().count() == 1 - assert Context.objects.all().count() == 3 + assert Context.objects.all().count() == 2 assert Dataset.objects.all().count() == 4 assert DerivedRegion.objects.all().count() == 0 assert FileItem.objects.all().count() == 7 From ee550201c20bd724d02a7d736a8807d88afb001b Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Mon, 5 Aug 2024 12:54:01 +0000 Subject: [PATCH 16/19] fix: remove other usages of module reference for nysdp datasets --- .../use_cases/new_york_energy/datasets.json | 14 ++------------ sample_data/use_cases/new_york_energy/ingest.py | 10 +++++++--- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/sample_data/use_cases/new_york_energy/datasets.json b/sample_data/use_cases/new_york_energy/datasets.json index 86fabfe2..12061158 100644 --- a/sample_data/use_cases/new_york_energy/datasets.json +++ b/sample_data/use_cases/new_york_energy/datasets.json @@ -2,12 +2,7 @@ { "name": "National Grid CompanyBoundary", "description": "CompanyBoundary vector data", - "category": "region", - "module": "nysdp", - "function": "create_vector_features", - "kwargs": { - "service_name": "CompanyBoundary" - } + "category": "region" }, { "name": "County Boundaries", @@ -33,12 +28,7 @@ { "name": "National Grid Substations", "description": "Substations vector data", - "category": "energy", - "module": "nysdp", - "function": "create_vector_features", - "kwargs": { - "service_name": "Substations" - } + "category": "energy" }, { "name": "National Grid County Networks", diff --git a/sample_data/use_cases/new_york_energy/ingest.py b/sample_data/use_cases/new_york_energy/ingest.py index 9543bd99..eb959f7c 100644 --- a/sample_data/use_cases/new_york_energy/ingest.py +++ b/sample_data/use_cases/new_york_energy/ingest.py @@ -1,23 +1,27 @@ from pathlib import Path from .import_networks import perform_import from .export_networks import perform_export -from .nysdp import create_consolidated_network +from .nysdp import create_consolidated_network, create_vector_features DOWNLOADS_FOLDER = DOWNLOADS_FOLDER = Path('../../sample_data/downloads') PULL_LATEST = False -def convert_dataset(dataset, options, ): +def convert_dataset(dataset, options): print('\t', f'Converting data for {dataset.name}...') if dataset.name == 'National Grid County Networks': if PULL_LATEST: # pull latest data from NYSDP and run network interpretation algorithm - dataset.source_files.delete() + dataset.source_files.all().delete() create_consolidated_network(dataset, downloads_folder=DOWNLOADS_FOLDER) perform_export() else: perform_import(dataset, downloads_folder=DOWNLOADS_FOLDER) + elif dataset.name == 'National Grid CompanyBoundary': + create_vector_features(dataset, 'CompanyBoundary') + elif dataset.name == 'National Grid Substations': + create_vector_features(dataset, 'Substations') else: dataset.spawn_conversion_task( style_options=options.get('style_options'), From e2c754949ff2ca66e7b67bdca4c9fb55f4aadbcb Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Mon, 5 Aug 2024 12:57:36 +0000 Subject: [PATCH 17/19] fix: filter properties upon import (key and value must exist) --- .../use_cases/new_york_energy/import_networks.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/sample_data/use_cases/new_york_energy/import_networks.py b/sample_data/use_cases/new_york_energy/import_networks.py index 50bc30e9..bcbc7b7b 100644 --- a/sample_data/use_cases/new_york_energy/import_networks.py +++ b/sample_data/use_cases/new_york_energy/import_networks.py @@ -15,6 +15,13 @@ TOLERANCE_METERS = 15 +def get_metadata(feature): + return { + k: v for k, v in feature.get('properties', {}).items() + if k and v + } + + def create_network(dataset, network_name, geodata): print(f'\t\tCreating network for {network_name}.') network = Network.objects.create( @@ -32,14 +39,14 @@ def create_network(dataset, network_name, geodata): name=f'{network_name} {len(nodes)}', network=network, location=Point(*geom.get('coordinates')), - metadata=feature.get('properties'), + metadata=get_metadata(feature), )) elif geom.get('type') == 'LineString': edges.append(NetworkEdge( name=f'{network_name} {len(edges)}', network=network, line_geometry=LineString(*geom.get('coordinates')), - metadata=feature.get('properties'), + metadata=get_metadata('feature'), )) NetworkNode.objects.bulk_create(nodes, batch_size=1000) From 296dc7865d37dc0bbed1e0838e29a6df31793589 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Mon, 5 Aug 2024 13:15:52 +0000 Subject: [PATCH 18/19] fix: remove unintentional quotes --- sample_data/use_cases/new_york_energy/import_networks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sample_data/use_cases/new_york_energy/import_networks.py b/sample_data/use_cases/new_york_energy/import_networks.py index bcbc7b7b..333a3971 100644 --- a/sample_data/use_cases/new_york_energy/import_networks.py +++ b/sample_data/use_cases/new_york_energy/import_networks.py @@ -46,7 +46,7 @@ def create_network(dataset, network_name, geodata): name=f'{network_name} {len(edges)}', network=network, line_geometry=LineString(*geom.get('coordinates')), - metadata=get_metadata('feature'), + metadata=get_metadata(feature), )) NetworkNode.objects.bulk_create(nodes, batch_size=1000) From a287c78f0ba11aa945792c39899a94955b66fa67 Mon Sep 17 00:00:00 2001 From: Anne Haley Date: Wed, 21 Aug 2024 18:09:06 +0000 Subject: [PATCH 19/19] refactor: Rename `vector_features_from_network` -> `create_vector_features_from_network` --- sample_data/use_cases/new_york_energy/import_networks.py | 4 ++-- sample_data/use_cases/new_york_energy/nysdp.py | 4 ++-- uvdat/core/tasks/networks.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sample_data/use_cases/new_york_energy/import_networks.py b/sample_data/use_cases/new_york_energy/import_networks.py index 333a3971..2743dcf2 100644 --- a/sample_data/use_cases/new_york_energy/import_networks.py +++ b/sample_data/use_cases/new_york_energy/import_networks.py @@ -9,7 +9,7 @@ from django.contrib.gis.db.models.functions import Distance from django.contrib.gis.geos import Point, LineString from uvdat.core.models import Network, NetworkEdge, NetworkNode, VectorMapLayer -from uvdat.core.tasks.networks import vector_features_from_network +from uvdat.core.tasks.networks import create_vector_features_from_network TOLERANCE_METERS = 15 @@ -74,7 +74,7 @@ def create_network(dataset, network_name, geodata): not_connected.delete() NetworkEdge.objects.bulk_create(edges, batch_size=1000) - vector_features_from_network(network) + create_vector_features_from_network(network) print(f'\t\tCreated {network.nodes.count()} nodes and {network.edges.count()} edges.') diff --git a/sample_data/use_cases/new_york_energy/nysdp.py b/sample_data/use_cases/new_york_energy/nysdp.py index 7fb7217a..de763420 100644 --- a/sample_data/use_cases/new_york_energy/nysdp.py +++ b/sample_data/use_cases/new_york_energy/nysdp.py @@ -8,7 +8,7 @@ from django.contrib.gis.geos import GEOSGeometry, Point, LineString from uvdat.core.models import Dataset, Network, NetworkNode, NetworkEdge, VectorMapLayer, VectorFeature, SourceRegion -from uvdat.core.tasks.networks import vector_features_from_network +from uvdat.core.tasks.networks import create_vector_features_from_network from .interpret_network import interpret_group @@ -173,7 +173,7 @@ def create_consolidated_network(dataset, **kwargs): ], batch_size=1000) print(f'\t\t{network.nodes.count()} nodes created, {network.edges.count()} edges created.') - vector_features_from_network(network) + create_vector_features_from_network(network) print(f'\t\t{dataset.networks.count()} separate networks created.') print(f'\tCompleted in {(datetime.now() - start).total_seconds()} seconds.') diff --git a/uvdat/core/tasks/networks.py b/uvdat/core/tasks/networks.py index 325e8958..ddf78561 100644 --- a/uvdat/core/tasks/networks.py +++ b/uvdat/core/tasks/networks.py @@ -213,8 +213,8 @@ def geojson_from_network(dataset): return new_geodata.to_json() -def vector_features_from_network(network): - map_layer, created = VectorMapLayer.objects.get_or_create(dataset=network.dataset, index=0) +def create_vector_features_from_network(network): + map_layer, _ = VectorMapLayer.objects.get_or_create(dataset=network.dataset, index=0) VectorFeature.objects.bulk_create( [ VectorFeature(