diff --git a/.gitignore b/.gitignore index 0c317854..f086f1ab 100644 --- a/.gitignore +++ b/.gitignore @@ -4,7 +4,7 @@ minio/* rabbitmq/* .vscode/ staticfiles/ -sample_data/*/* +sample_data/downloads/* # osmnx data cache folder cache diff --git a/sample_data/cities.json b/sample_data/cities.json deleted file mode 100644 index b00ea6b4..00000000 --- a/sample_data/cities.json +++ /dev/null @@ -1,14 +0,0 @@ -[ - { - "name": "Boston, MA", - "latitude": 42.4, - "longitude": -71.1, - "default_zoom": 11 - }, - { - "name": "Washington, DC", - "latitude": 38.9, - "longitude": -77.1, - "default_zoom": 11 - } -] diff --git a/sample_data/ingest_sample_data.py b/sample_data/ingest_sample_data.py deleted file mode 100644 index 064b4170..00000000 --- a/sample_data/ingest_sample_data.py +++ /dev/null @@ -1,144 +0,0 @@ -from datetime import datetime -import json -import os -from pathlib import Path - -from django.contrib.gis.geos import Point -from django.core.files.base import ContentFile -import requests - -from uvdat.core.models import Chart, Context, Dataset, FileItem - - -def ingest_file(file_info, index=0, dataset=None, chart=None): - file_path = file_info.get('path') - file_name = file_info.get('name', file_path.split('/')[-1]) - file_url = file_info.get('url') - file_metadata = file_info.get('metadata', {}) - - file_location = Path('sample_data', file_path) - file_type = file_path.split('.')[-1] - if not file_location.exists(): - print(f'\t Downloading data file {file_name}.') - file_location.parent.mkdir(parents=True, exist_ok=True) - with open(file_location, 'wb') as f: - r = requests.get(file_url) - f.write(r.content) - - existing = FileItem.objects.filter(name=file_name) - if existing.count(): - print('\t', f'FileItem {file_name} already exists.') - else: - new_file_item = FileItem.objects.create( - name=file_name, - dataset=dataset, - chart=chart, - file_type=file_type, - file_size=os.path.getsize(file_location), - metadata=dict( - **file_metadata, - uploaded=str(datetime.now()), - ), - index=index, - ) - print('\t', f'FileItem {new_file_item.name} created.') - with file_location.open('rb') as f: - new_file_item.file.save(file_path, ContentFile(f.read())) - - -def ingest_contexts(): - print('Creating Context objects...') - with open('sample_data/contexts.json') as contexts_json: - data = json.load(contexts_json) - for context in data: - print('\t- ', context['name']) - existing = Context.objects.filter(name=context['name']) - if existing.count(): - context_for_setting = existing.first() - else: - context_for_setting = Context.objects.create( - name=context['name'], - default_map_center=Point(*context['default_map_center']), - default_map_zoom=context['default_map_zoom'], - ) - print('\t', f'Context {context_for_setting.name} created.') - - context_for_setting.datasets.set(Dataset.objects.filter(name__in=context['datasets'])) - - -def ingest_charts(): - print('Creating Chart objects...') - with open('sample_data/charts.json') as charts_json: - data = json.load(charts_json) - for chart in data: - print('\t- ', chart['name']) - existing = Chart.objects.filter(name=chart['name']) - if existing.count(): - chart_for_conversion = existing.first() - else: - new_chart = Chart.objects.create( - name=chart['name'], - description=chart['description'], - context=Context.objects.get(name=chart['context']), - chart_options=chart.get('chart_options'), - metadata=chart.get('metadata'), - editable=chart.get('editable', False), - ) - print('\t', f'Chart {new_chart.name} created.') - for index, file_info in enumerate(chart.get('files', [])): - ingest_file( - file_info, - index=index, - chart=new_chart, - ) - chart_for_conversion = new_chart - - print('\t', f'Converting data for {chart_for_conversion.name}...') - chart_for_conversion.spawn_conversion_task( - conversion_options=chart.get('conversion_options'), - asynchronous=False, - ) - - -def ingest_datasets(include_large=False, dataset_indexes=None): - print('Creating Dataset objects...') - with open('sample_data/datasets.json') as datasets_json: - data = json.load(datasets_json) - for index, dataset in enumerate(data): - if dataset_indexes is None or index in dataset_indexes: - print('\t- ', dataset['name']) - existing = Dataset.objects.filter(name=dataset['name']) - if existing.count(): - dataset_for_conversion = existing.first() - else: - # Create dataset - new_dataset = Dataset.objects.create( - name=dataset['name'], - description=dataset['description'], - category=dataset['category'], - dataset_type=dataset.get('type', 'vector').upper(), - metadata=dataset.get('metadata', {}), - ) - print('\t', f'Dataset {new_dataset.name} created.') - for index, file_info in enumerate(dataset.get('files', [])): - ingest_file( - file_info, - index=index, - dataset=new_dataset, - ) - dataset_for_conversion = new_dataset - - dataset_size_mb = dataset_for_conversion.get_size() >> 20 - if include_large or dataset_size_mb < 50: - print('\t', f'Converting data for {dataset_for_conversion.name}...') - dataset_for_conversion.spawn_conversion_task( - style_options=dataset.get('style_options'), - network_options=dataset.get('network_options'), - region_options=dataset.get('region_options'), - asynchronous=False, - ) - else: - print( - '\t', f'Dataset too large ({dataset_size_mb} MB); skipping conversion step.' - ) - print('\t', 'Use `--include_large` to include conversions for large datasets.') diff --git a/sample_data/ingest_sample_data_output.txt b/sample_data/ingest_sample_data_output.txt deleted file mode 100644 index e510a6d5..00000000 --- a/sample_data/ingest_sample_data_output.txt +++ /dev/null @@ -1,112 +0,0 @@ -root@70f88cf67dbf:/opt/django-project# python manage.py populate --include_large ------------------------------------- -Populating server with sample data... -Creating Context objects... - - Boston Transportation - Context Boston Transportation created. - - DC Transportation - Context DC Transportation created. - - Boston-Washington Transportation - Context Boston-Washington Transportation created. -Creating Chart objects... - - Boston Harbor Daily Tide Levels - Chart Boston Harbor Daily Tide Levels created. - FileItem tide_level_data.csv created. - Converting data for Boston Harbor Daily Tide Levels... - Saved converted data for chart Boston Harbor Daily Tide Levels. -Creating Dataset objects... - - MBTA Rapid Transit - Dataset MBTA Rapid Transit created. - FileItem mbta_rapid_transit.zip created. - Converting data for MBTA Rapid Transit... - VectorMapLayer 1 created. - 31 vector tiles created. - 158 nodes and 164 edges created. - - MBTA Commuter Rail - Dataset MBTA Commuter Rail created. - FileItem commuter_rail.zip created. - Converting data for MBTA Commuter Rail... - VectorMapLayer 2 created. - 686 vector tiles created. - 268 nodes and 269 edges created. - - Boston Hurricane Surge Inundation Zones - Dataset Boston Hurricane Surge Inundation Zones created. - FileItem hurr_inun.zip created. - Converting data for Boston Hurricane Surge Inundation Zones... - VectorMapLayer 3 created. - 295 vector tiles created. - - Boston FEMA National Flood Hazard - Dataset Boston FEMA National Flood Hazard created. - FileItem flood_hazard_fema.zip created. - Converting data for Boston FEMA National Flood Hazard... - VectorMapLayer 4 created. - 587 vector tiles created. - - Massachusetts Elevation Data - Dataset Massachusetts Elevation Data created. - FileItem easternmass.tif created. - Converting data for Massachusetts Elevation Data... - RasterMapLayer 1 created. - - Boston Neighborhoods - Dataset Boston Neighborhoods created. - FileItem neighborhoods2020.json created. - Converting data for Boston Neighborhoods... - VectorMapLayer 5 created. - 26 vector tiles created. - 24 regions created. - - Boston Census 2020 Block Groups - Dataset Boston Census 2020 Block Groups created. - FileItem blockgroups.zip created. - Converting data for Boston Census 2020 Block Groups... - VectorMapLayer 6 created. - 26 vector tiles created. - 581 regions created. - - Boston Zip Codes - Dataset Boston Zip Codes created. - FileItem zipcodes.zip created. - Converting data for Boston Zip Codes... - VectorMapLayer 7 created. - 780 vector tiles created. - 539 regions created. - - Boston Sea Level Rises - Dataset Boston Sea Level Rises created. - FileItem 9in_rise.geojson created. - FileItem 21in_rise.geojson created. - FileItem 36in_rise.geojson created. - Converting data for Boston Sea Level Rises... - VectorMapLayer 8 created. - 26 vector tiles created. - VectorMapLayer 9 created. - 26 vector tiles created. - VectorMapLayer 10 created. - 26 vector tiles created. - - Boston 10-Year Flood Events - Dataset Boston 10-Year Flood Events created. - FileItem 9in_10yr_flood.geojson created. - FileItem 21in_10yr_flood.geojson created. - FileItem 36in_10yr_flood.geojson created. - Converting data for Boston 10-Year Flood Events... - VectorMapLayer 11 created. - 26 vector tiles created. - VectorMapLayer 12 created. - 26 vector tiles created. - VectorMapLayer 13 created. - 26 vector tiles created. - - Boston 100-Year Flood Events - Dataset Boston 100-Year Flood Events created. - FileItem 9in_100yr_flood.geojson created. - FileItem 21in_100yr_flood.geojson created. - FileItem 36in_100yr_flood.geojson created. - Converting data for Boston 100-Year Flood Events... - VectorMapLayer 14 created. - 26 vector tiles created. - VectorMapLayer 15 created. - 26 vector tiles created. - VectorMapLayer 16 created. - 26 vector tiles created. - - DC Metro - Dataset DC Metro created. - FileItem DC_Metro.zip created. - Converting data for DC Metro... - VectorMapLayer 17 created. - 56 vector tiles created. - 98 nodes and 134 edges created. diff --git a/sample_data/ingest_use_case.py b/sample_data/ingest_use_case.py new file mode 100644 index 00000000..8d4af224 --- /dev/null +++ b/sample_data/ingest_use_case.py @@ -0,0 +1,166 @@ +from datetime import datetime +import importlib +import json +import os +from pathlib import Path + +from django.contrib.gis.geos import Point +from django.core.files.base import ContentFile +import requests + +from uvdat.core.models import Chart, Context, Dataset, FileItem + +from .use_cases.boston_floods import ingest as boston_floods_ingest +from .use_cases.new_york_energy import ingest as new_york_energy_ingest + + +USE_CASE_FOLDER = Path('sample_data/use_cases') +DOWNLOADS_FOLDER = Path('sample_data/downloads') + + +def ingest_file(file_info, index=0, dataset=None, chart=None): + file_path = file_info.get('path') + file_name = file_info.get('name', file_path.split('/')[-1]) + file_url = file_info.get('url') + file_metadata = file_info.get('metadata', {}) + + file_location = Path(DOWNLOADS_FOLDER, file_path) + file_type = file_path.split('.')[-1] + if not file_location.exists(): + print(f'\t Downloading data file {file_name}.') + file_location.parent.mkdir(parents=True, exist_ok=True) + with open(file_location, 'wb') as f: + r = requests.get(file_url) + f.write(r.content) + + existing = FileItem.objects.filter(name=file_name) + if existing.count(): + print('\t', f'FileItem {file_name} already exists.') + else: + new_file_item = FileItem.objects.create( + name=file_name, + dataset=dataset, + chart=chart, + file_type=file_type, + file_size=os.path.getsize(file_location), + metadata=dict( + **file_metadata, + uploaded=str(datetime.now()), + ), + index=index, + ) + print('\t', f'FileItem {new_file_item.name} created.') + with file_location.open('rb') as f: + new_file_item.file.save(file_path, ContentFile(f.read())) + + +def ingest_contexts(use_case): + context_file_path = USE_CASE_FOLDER / use_case / 'contexts.json' + if context_file_path.exists(): + print('Creating Context objects...') + with open(context_file_path) as contexts_json: + data = json.load(contexts_json) + for context in data: + print('\t- ', context['name']) + existing = Context.objects.filter(name=context['name']) + if existing.count(): + context_for_setting = existing.first() + else: + context_for_setting = Context.objects.create( + name=context['name'], + default_map_center=Point(*context['default_map_center']), + default_map_zoom=context['default_map_zoom'], + ) + print('\t', f'Context {context_for_setting.name} created.') + + context_for_setting.datasets.set(Dataset.objects.filter(name__in=context['datasets'])) + + +def ingest_charts(use_case): + chart_file_path = USE_CASE_FOLDER / use_case / 'charts.json' + if chart_file_path.exists(): + print('Creating Chart objects...') + with open(chart_file_path) as charts_json: + data = json.load(charts_json) + for chart in data: + print('\t- ', chart['name']) + existing = Chart.objects.filter(name=chart['name']) + if existing.count(): + chart_for_conversion = existing.first() + else: + new_chart = Chart.objects.create( + name=chart['name'], + description=chart['description'], + context=Context.objects.get(name=chart['context']), + chart_options=chart.get('chart_options'), + metadata=chart.get('metadata'), + editable=chart.get('editable', False), + ) + print('\t', f'Chart {new_chart.name} created.') + for index, file_info in enumerate(chart.get('files', [])): + ingest_file( + file_info, + index=index, + chart=new_chart, + ) + chart_for_conversion = new_chart + + print('\t', f'Converting data for {chart_for_conversion.name}...') + chart_for_conversion.spawn_conversion_task( + conversion_options=chart.get('conversion_options'), + asynchronous=False, + ) + + +def ingest_datasets(use_case, include_large=False, dataset_indexes=None): + dataset_file_path = USE_CASE_FOLDER / use_case / 'datasets.json' + if dataset_file_path.exists(): + print('Creating Dataset objects...') + with open(dataset_file_path) as datasets_json: + data = json.load(datasets_json) + for index, dataset in enumerate(data): + if dataset_indexes is None or index in dataset_indexes: + existing = Dataset.objects.filter(name=dataset['name']) + if existing.count(): + dataset_for_conversion = existing.first() + else: + # Create dataset + new_dataset = Dataset.objects.create( + name=dataset['name'], + description=dataset['description'], + category=dataset['category'], + dataset_type=dataset.get('type', 'vector').upper(), + metadata=dataset.get('metadata', {}), + ) + print('\t', f'Dataset {new_dataset.name} created.') + for index, file_info in enumerate(dataset.get('files', [])): + ingest_file( + file_info, + index=index, + dataset=new_dataset, + ) + dataset_for_conversion = new_dataset + + dataset_size_mb = dataset_for_conversion.get_size() >> 20 + if include_large or dataset_size_mb < 50: + if use_case == 'boston_floods': + boston_floods_ingest.convert_dataset(dataset_for_conversion, dataset) + elif use_case == 'new_york_energy': + new_york_energy_ingest.convert_dataset(dataset_for_conversion, dataset) + else: + print( + '\t', f'Dataset too large ({dataset_size_mb} MB); skipping conversion step.' + ) + print('\t', 'Use `--include_large` to include conversions for large datasets.') + + + +def ingest_use_case(use_case_name, include_large=False, dataset_indexes=None): + print(f'Populating server with data for use case {use_case_name}...') + ingest_datasets( + use_case=use_case_name, + include_large=include_large, + dataset_indexes=dataset_indexes, + ) + ingest_contexts(use_case=use_case_name) + ingest_charts(use_case=use_case_name) diff --git a/sample_data/charts.json b/sample_data/use_cases/boston_floods/charts.json similarity index 100% rename from sample_data/charts.json rename to sample_data/use_cases/boston_floods/charts.json diff --git a/sample_data/contexts.json b/sample_data/use_cases/boston_floods/contexts.json similarity index 55% rename from sample_data/contexts.json rename to sample_data/use_cases/boston_floods/contexts.json index 0f6428b2..409a7911 100644 --- a/sample_data/contexts.json +++ b/sample_data/use_cases/boston_floods/contexts.json @@ -30,27 +30,5 @@ "datasets": [ "DC Metro" ] - }, - { - "name": "Boston-Washington Transportation", - "default_map_center": [ - 40.5, - -74.5 - ], - "default_map_zoom": 8, - "datasets": [ - "MBTA Rapid Transit", - "MBTA Commuter Rail", - "Massachusetts Elevation Data", - "Boston Hurricane Surge Inundation Zones", - "Bsoton FEMA National Flood Hazard", - "Boston Neighborhoods", - "Boston Census 2020 Block Groups", - "Boston Zip Codes", - "Boston Sea Level Rises", - "Boston 10-Year Flood Events", - "Boston 100-Year Flood Events", - "DC Metro" - ] } ] diff --git a/sample_data/datasets.json b/sample_data/use_cases/boston_floods/datasets.json similarity index 100% rename from sample_data/datasets.json rename to sample_data/use_cases/boston_floods/datasets.json diff --git a/sample_data/use_cases/boston_floods/ingest.py b/sample_data/use_cases/boston_floods/ingest.py new file mode 100644 index 00000000..6d89aec0 --- /dev/null +++ b/sample_data/use_cases/boston_floods/ingest.py @@ -0,0 +1,9 @@ + +def convert_dataset(dataset, options): + print('\t', f'Converting data for {dataset.name}...') + dataset.spawn_conversion_task( + style_options=options.get('style_options'), + network_options=options.get('network_options'), + region_options=options.get('region_options'), + asynchronous=False, + ) diff --git a/sample_data/use_cases/new_york_energy/contexts.json b/sample_data/use_cases/new_york_energy/contexts.json new file mode 100644 index 00000000..d58f67e5 --- /dev/null +++ b/sample_data/use_cases/new_york_energy/contexts.json @@ -0,0 +1,16 @@ +[ + { + "name": "NY Energy", + "default_map_center": [ + 43.5, + -75.5 + ], + "default_map_zoom": 8, + "datasets": [ + "National Grid CompanyBoundary", + "National Grid County Networks", + "National Grid Substations", + "County Boundaries" + ] + } +] diff --git a/sample_data/use_cases/new_york_energy/datasets.json b/sample_data/use_cases/new_york_energy/datasets.json new file mode 100644 index 00000000..12061158 --- /dev/null +++ b/sample_data/use_cases/new_york_energy/datasets.json @@ -0,0 +1,44 @@ +[ + { + "name": "National Grid CompanyBoundary", + "description": "CompanyBoundary vector data", + "category": "region" + }, + { + "name": "County Boundaries", + "description": "From https://gis.ny.gov/civil-boundaries", + "category": "region", + "type": "vector", + "files": [ + { + "url": "https://data.kitware.com/api/v1/item/66a2a19a5d2551c516b1e502/download", + "path": "nyc/counties.zip" + } + ], + "region_options": { + "name_property": "NAME" + }, + "style_options": { + "outline": "white", + "palette": [ + "grey" + ] + } + }, + { + "name": "National Grid Substations", + "description": "Substations vector data", + "category": "energy" + }, + { + "name": "National Grid County Networks", + "description": "Energy networks for each county in New York", + "category": "energy", + "files": [ + { + "url": "https://data.kitware.com/api/v1/item/66a7bdab0ea2cce8e698b958/download", + "path": "nyc/networks.zip" + } + ] + } +] diff --git a/sample_data/use_cases/new_york_energy/export_networks.py b/sample_data/use_cases/new_york_energy/export_networks.py new file mode 100644 index 00000000..83918540 --- /dev/null +++ b/sample_data/use_cases/new_york_energy/export_networks.py @@ -0,0 +1,55 @@ +import json +import shapely + +from datetime import datetime +from pathlib import Path + +from uvdat.core.models import Network, SourceRegion + + +OUTPUT_FOLDER = Path('sample_data/use_cases/new_york_energy/networks') + + +def perform_export(): + start = datetime.now() + networks = Network.objects.filter(dataset__name='National Grid Network') + zones = SourceRegion.objects.filter(dataset__name='County Boundaries') + + for network in networks: + sample_node = network.nodes.first() + zone = zones.filter(boundary__contains=sample_node.location).first() + if zone: + features = [] + for node in network.nodes.all(): + geom = shapely.geometry.mapping(shapely.wkt.loads(node.location.wkt)) + features.append(dict( + geometry=geom, + properties=dict( + **node.metadata, + county=zone.name + ) + )) + for edge in network.edges.all(): + geom = shapely.geometry.mapping(shapely.wkt.loads(edge.line_geometry.wkt)) + from_point = shapely.geometry.mapping(shapely.wkt.loads(edge.from_node.location.wkt)) + to_point = shapely.geometry.mapping(shapely.wkt.loads(edge.to_node.location.wkt)) + features.append(dict( + geometry=geom, + properties=dict( + **edge.metadata, + from_point=from_point, + to_point=to_point, + county=zone.name + ) + )) + geodata = dict( + type="FeatureCollection", + crs=dict(type='name', properties=dict(name='EPSG:4326')), + features=features, + ) + filename = OUTPUT_FOLDER / f'{zone.name}.json' + with open(filename, 'w') as f: + json.dump(geodata, f) + print(f'Wrote {len(features)} features to {filename}.') + + print(f'\tCompleted in {(datetime.now() - start).total_seconds()} seconds.') diff --git a/sample_data/use_cases/new_york_energy/import_networks.py b/sample_data/use_cases/new_york_energy/import_networks.py new file mode 100644 index 00000000..2743dcf2 --- /dev/null +++ b/sample_data/use_cases/new_york_energy/import_networks.py @@ -0,0 +1,99 @@ +import json +import tempfile +import zipfile + +from datetime import datetime +from pathlib import Path + +from django.contrib.gis.measure import D +from django.contrib.gis.db.models.functions import Distance +from django.contrib.gis.geos import Point, LineString +from uvdat.core.models import Network, NetworkEdge, NetworkNode, VectorMapLayer +from uvdat.core.tasks.networks import create_vector_features_from_network + + +TOLERANCE_METERS = 15 + + +def get_metadata(feature): + return { + k: v for k, v in feature.get('properties', {}).items() + if k and v + } + + +def create_network(dataset, network_name, geodata): + print(f'\t\tCreating network for {network_name}.') + network = Network.objects.create( + dataset=dataset, + category='energy', + metadata=dict(name=network_name) + ) + features = geodata.get('features') + nodes = [] + edges = [] + for feature in features: + geom = feature.get('geometry') + if geom.get('type') == 'Point': + nodes.append(NetworkNode( + name=f'{network_name} {len(nodes)}', + network=network, + location=Point(*geom.get('coordinates')), + metadata=get_metadata(feature), + )) + elif geom.get('type') == 'LineString': + edges.append(NetworkEdge( + name=f'{network_name} {len(edges)}', + network=network, + line_geometry=LineString(*geom.get('coordinates')), + metadata=get_metadata(feature), + )) + NetworkNode.objects.bulk_create(nodes, batch_size=1000) + + # fill in node relationships on edges now that nodes exist + connected_node_ids = [] + for edge in edges: + metadata = edge.metadata + from_point = metadata.get('from_point', {}).get('coordinates') + to_point = metadata.get('to_point', {}).get('coordinates') + if from_point is not None and to_point is not None: + from_point = Point(*from_point) + to_point = Point(*to_point) + from_nodes = NetworkNode.objects.filter(network=network, location=from_point) + to_nodes = NetworkNode.objects.filter(network=network, location=to_point) + if from_nodes.count() > 0 and to_nodes.count() > 0: + edge.from_node = from_nodes.first() + edge.to_node = to_nodes.first() + if edge.from_node.id not in connected_node_ids: + connected_node_ids.append(edge.from_node.id) + if edge.to_node.id not in connected_node_ids: + connected_node_ids.append(edge.to_node.id) + + # remove any nodes that have no connections + not_connected = NetworkNode.objects.filter(network=network).exclude(id__in=connected_node_ids) + not_connected.delete() + + NetworkEdge.objects.bulk_create(edges, batch_size=1000) + create_vector_features_from_network(network) + print(f'\t\tCreated {network.nodes.count()} nodes and {network.edges.count()} edges.') + + +def perform_import(dataset, **kwargs): + print('\tEstimated time: 90 minutes.') + start = datetime.now() + Network.objects.filter(dataset=dataset).delete() + VectorMapLayer.objects.filter(dataset=dataset).delete() + for file_item in dataset.source_files.all(): + with tempfile.TemporaryDirectory() as temp_dir: + archive_path = Path(temp_dir, 'archive.zip') + with open(archive_path, 'wb') as archive_file: + archive_file.write(file_item.file.open('rb').read()) + with zipfile.ZipFile(archive_path) as zip_archive: + filenames = zip_archive.namelist() + for filename in filenames: + if filename.endswith('.json'): + network_name = filename.split('/')[-1].replace('.json', '') + content = zip_archive.open(filename).read() + geodata = json.loads(content) + create_network(dataset, network_name, geodata) + print(f'\tCompleted in {(datetime.now() - start).total_seconds()} seconds.') diff --git a/sample_data/use_cases/new_york_energy/ingest.py b/sample_data/use_cases/new_york_energy/ingest.py new file mode 100644 index 00000000..eb959f7c --- /dev/null +++ b/sample_data/use_cases/new_york_energy/ingest.py @@ -0,0 +1,31 @@ +from pathlib import Path +from .import_networks import perform_import +from .export_networks import perform_export +from .nysdp import create_consolidated_network, create_vector_features + + +DOWNLOADS_FOLDER = DOWNLOADS_FOLDER = Path('../../sample_data/downloads') +PULL_LATEST = False + + +def convert_dataset(dataset, options): + print('\t', f'Converting data for {dataset.name}...') + if dataset.name == 'National Grid County Networks': + if PULL_LATEST: + # pull latest data from NYSDP and run network interpretation algorithm + dataset.source_files.all().delete() + create_consolidated_network(dataset, downloads_folder=DOWNLOADS_FOLDER) + perform_export() + else: + perform_import(dataset, downloads_folder=DOWNLOADS_FOLDER) + elif dataset.name == 'National Grid CompanyBoundary': + create_vector_features(dataset, 'CompanyBoundary') + elif dataset.name == 'National Grid Substations': + create_vector_features(dataset, 'Substations') + else: + dataset.spawn_conversion_task( + style_options=options.get('style_options'), + network_options=options.get('network_options'), + region_options=options.get('region_options'), + asynchronous=False, + ) diff --git a/sample_data/use_cases/new_york_energy/interpret_network.py b/sample_data/use_cases/new_york_energy/interpret_network.py new file mode 100644 index 00000000..49e75ca9 --- /dev/null +++ b/sample_data/use_cases/new_york_energy/interpret_network.py @@ -0,0 +1,198 @@ +import json +import geopandas +import shapely + +from webcolors import name_to_hex + + +TOLERANCE = 0.0001 + + +def get_properties(feature): + properties = json.loads( + feature.drop(['geometry', 'index'], errors='ignore').fillna('').to_json() + ) + properties.update(dict( + colors=','.join([ + name_to_hex((properties.get('color',) or 'black').replace(' ', '')), + '#ffffff' + ]) + )) + return properties + + +def merge_properties(p1, p2): + properties = {} + if p1 is None: + return p2 + if p2 is None: + return p1 + for k1, v1 in p1.items(): + v2 = p2.get(k1) + if v2 is None or v2 == '': + properties[k1] = v1 + else: + if v1 is None or v1 == '': + properties[k1] = v2 + else: + properties[k1] = ','.join([str(v1), str(v2)]) + # update p2 with merged properties to catch keys not in p1 + return p2.update(properties) + + +def cut_crossed_lines(gdf): + # cut lines at any cross points + separated_features = [] + for feat_index, feature in gdf.iterrows(): + properties = json.loads( + feature.drop(['geometry', 'index'], errors='ignore').fillna('').to_json() + ) + curr_geom = feature.geometry + crosses = gdf[gdf.geometry.crosses(curr_geom)] + separated = [] + if len(crosses) > 0: + split_points = [] + for c_id, cross in crosses.iterrows(): + p = cross.geometry.intersection(curr_geom) + if p.geom_type == 'MultiPoint': + split_points.append(p.geoms[0]) + elif p.geom_type == 'Point': + split_points.append(p) + separated = shapely.ops.split(curr_geom, shapely.MultiPoint(split_points)).geoms + else: + separated = [feature.geometry] + separated_features.extend([ + dict( + type='Feature', + geometry=json.loads(shapely.to_geojson(s)), + properties=properties + ) + for s in separated + ]) + + gdf = geopandas.GeoDataFrame.from_features(separated_features) + return gdf + + +def merge_lines(gdf): + visited = [] + merged_features = [] + for feat_index, feature in gdf.iterrows(): + if feat_index not in visited: + visited.append(feat_index) + properties = json.loads( + feature.drop(['geometry', 'index'], errors='ignore').fillna('').to_json() + ) + curr_geom = feature.geometry + not_visited = gdf[~gdf.index.isin(visited)] + touching = not_visited[not_visited.geometry.touches(curr_geom)] + snapped = touching.snap(curr_geom, TOLERANCE * 2) + merge = shapely.line_merge(shapely.union_all([ + *snapped.geometry, curr_geom + ])) + curr_segment = None + if merge.geom_type == 'MultiLineString': + for segment in merge.geoms: + if segment.contains(curr_geom) and not any( + s.touches(segment) + for s in merge.geoms + if s != segment + ): + curr_segment = segment + elif merge.geom_type == 'LineString': + curr_segment = merge + + if curr_segment is None: + # no valid merge segment, include feature as-is + merged_features.append(dict( + type='Feature', + geometry=json.loads(shapely.to_geojson(curr_geom)), + properties=properties + )) + else: + # valid merge segment, mark constituent features as visited + visited_indexes = list(snapped[snapped.geometry.within(curr_segment)].index) + visited += visited_indexes + visited_features = gdf.loc[visited_indexes] + for v_index, v_feature in visited_features.iterrows(): + properties = merge_properties(properties, json.loads( + v_feature.drop(['geometry', 'index'], errors='ignore').fillna('').to_json() + )) + merged_features.append(dict( + type='Feature', + geometry=json.loads(shapely.to_geojson(curr_segment)), + properties=properties + )) + gdf = geopandas.GeoDataFrame.from_features(merged_features) + return cut_crossed_lines(gdf) + + +def find_nodes(gdf): + nodes = [] + + for feat_index, feature in gdf.iterrows(): + properties = get_properties(feature) + curr_geom = feature.geometry + points = [shapely.Point(*p) for p in curr_geom.coords] + # create nodes at line endpoints + for endpoint in [points[0], points[-1]]: + # touching_lines = gdf[gdf.geometry.snap(endpoint, TOLERANCE).touches(endpoint)] + existing_node_locations = geopandas.GeoSeries([n['location'] for n in nodes]) + if ( + # allow endpoints (1 touching) and intersections (>2 touching) + # (len(touching_lines) == 1 or len(touching_lines) > 2 ) and + # omit duplicates within tolerance radius + not existing_node_locations.dwithin(endpoint, TOLERANCE).any() + ): + nodes.append(dict( + location=endpoint, + metadata=properties + )) + return nodes + + +def find_edges(gdf, nodes): + edges = [] + existing_node_locations = geopandas.GeoSeries([n['location'] for n in nodes]) + for feat_index, feature in gdf.iterrows(): + properties = get_properties(feature) + curr_geom = feature.geometry + points = [shapely.Point(*p) for p in curr_geom.coords] + nearby_nodes = existing_node_locations[existing_node_locations.dwithin(curr_geom, TOLERANCE)] + snapped = shapely.snap(shapely.MultiPoint(list(nearby_nodes.geometry)), curr_geom, TOLERANCE) + separated = shapely.ops.split(shapely.LineString(points), snapped).geoms + existing_edge_geometries = geopandas.GeoSeries([e['line_geometry'] for e in edges]) + for segment in separated: + endpoints = [ + shapely.Point(segment.coords[0]), + shapely.Point(segment.coords[-1]), + ] + from_points = nearby_nodes[nearby_nodes.dwithin(endpoints[0], TOLERANCE)] + to_points = nearby_nodes[nearby_nodes.dwithin(endpoints[1], TOLERANCE)] + if ( + len(from_points) > 0 and + len(to_points) > 0 and + not shapely.snap(existing_edge_geometries, segment, TOLERANCE).covers(segment).any() + ): + edges.append(dict( + line_geometry=segment, + from_point=from_points.iloc[0], + to_point=to_points.iloc[0], + metadata=properties, + )) + return edges + + +def interpret_group(gdf): + print(f'\t\t Reading group with {len(gdf)} features.') + # iteratively merge lines until no more merging can be done + merged_gdf = merge_lines(gdf) + while len(merged_gdf) < len(gdf): + gdf = merged_gdf + merged_gdf = merge_lines(gdf) + print(f'\t\tMerged to {len(gdf)} lines.') + + nodes = find_nodes(gdf) + edges = find_edges(gdf, nodes) + + return nodes, edges diff --git a/sample_data/use_cases/new_york_energy/nysdp.py b/sample_data/use_cases/new_york_energy/nysdp.py new file mode 100644 index 00000000..de763420 --- /dev/null +++ b/sample_data/use_cases/new_york_energy/nysdp.py @@ -0,0 +1,179 @@ +import json +import geopandas +import requests + +from datetime import datetime +from pathlib import Path +from concurrent.futures import ThreadPoolExecutor + +from django.contrib.gis.geos import GEOSGeometry, Point, LineString +from uvdat.core.models import Dataset, Network, NetworkNode, NetworkEdge, VectorMapLayer, VectorFeature, SourceRegion +from uvdat.core.tasks.networks import create_vector_features_from_network + +from .interpret_network import interpret_group + +DOWNLOAD_PATH = Path(__file__).parent.parent +NYDSP_URL = 'https://systemdataportal.nationalgrid.com/arcgis/rest/services/NYSDP' +RECORDS_PER_PAGE = 1000 +SERVICE_SUFFIX = 'MapServer' +FORMAT_SUFFIX = 'f=pjson' +QUERY_CONTENT = f'where=1%3D1&returnGeometry=true&outFields=*&resultRecordCount={RECORDS_PER_PAGE}&f=geojson' + + +def fetch_vector_features(service_name=None, **kwargs): + feature_sets = {} + if service_name is None: + return feature_sets + service_url = f'{NYDSP_URL}/{service_name}/{SERVICE_SUFFIX}' + service_info = requests.get(f'{service_url}?{FORMAT_SUFFIX}').json() + for layer in service_info.get('layers', []): + feature_set = [] + layer_id = layer.get('id') + if layer_id is not None: + feature_page = None + result_offset = 0 + while feature_page is None or len(feature_page) == RECORDS_PER_PAGE: + query_response = requests.get( + f"{service_url}/{layer_id}/query?resultOffset={result_offset}&{QUERY_CONTENT}" + ) + try: + query_json = query_response.json() + feature_page = query_json.get('features', []) + feature_set += feature_page + result_offset += RECORDS_PER_PAGE + except Exception as e: + print(f'\t\tFailed to get {service_name} data from NYSDP.') + if len(feature_set): + feature_sets[layer_id] = feature_set + return feature_sets + + +def create_vector_features(dataset, service_name=None, **kwargs): + VectorMapLayer.objects.filter(dataset=dataset).delete() + + feature_sets = fetch_vector_features(service_name=service_name) + vector_features = [] + for index, feature_set in feature_sets.items(): + map_layer = VectorMapLayer.objects.create(dataset=dataset, index=index) + for feature in feature_set: + vector_features.append( + VectorFeature( + map_layer=map_layer, + geometry=GEOSGeometry(json.dumps(feature['geometry'])), + properties=feature['properties'], + ) + ) + VectorFeature.objects.bulk_create(vector_features) + + +def download_all_deduped_vector_features(**kwargs): + start = datetime.now() + include_services = kwargs.get('include_services', [ + "DistAssetsOverview", + "Electrification_Data", + "EV_Load_Serving_Capacity", + "Hosting_Capacity_Data", + "LSRV", + "NY_SubT_SDP" + ]) + downloads_folder = kwargs.get('downloads_folder') + if downloads_folder is None: + downloads_folder = Path(__file__).parent + else: + downloads_folder = Path(downloads_folder) + filename = downloads_folder / 'nyc' / 'network_basic_features.json' + if filename.exists(): + print('\t\tReading saved file of basic features.') + return geopandas.GeoDataFrame.from_file(filename) + + print('\t\tDownloading basic features from NYSDP.') + feature_sets = None + with ThreadPoolExecutor(max_workers=len(include_services)) as pool: + feature_sets = pool.map(fetch_vector_features, include_services) + + features = [] + for feature_set in feature_sets: + for set_id, feature_set in feature_set.items(): + for feature in feature_set: + properties = feature['properties'] + geometry = GEOSGeometry(json.dumps(feature['geometry'])) + geoms = [] + # split multilinestrings + if geometry.geom_type == 'MultiLineString': + geoms = [LineString(*line) for line in geometry.coords] + elif geometry.geom_type == 'LineString': + geoms = [geometry] + for geom in geoms: + features.append(dict( + type='Feature', + geometry=json.loads(geom.json), + properties=properties + )) + # normalize and eliminate duplicates + gdf = geopandas.GeoDataFrame.from_features(features, crs='EPSG:4326') + gdf["geometry"] = gdf.normalize() + gdf = gdf.groupby(gdf.geometry.to_wkt()).first() + gdf.reset_index(inplace=True) + + gdf.to_file(filename) + print(f'\t\tCompleted download in {(datetime.now() - start).total_seconds()} seconds.') + return gdf + + +def create_consolidated_network(dataset, **kwargs): + start = datetime.now() + Network.objects.filter(dataset=dataset).delete() + VectorMapLayer.objects.filter(dataset=network.dataset).delete() + gdf = download_all_deduped_vector_features(**kwargs) + + zones_dataset_name = kwargs.get('zones_dataset_name') + if zones_dataset_name is None: + raise ValueError('`zones_dataset_name` is required.') + zones = SourceRegion.objects.filter(dataset__name=zones_dataset_name) + if zones.count() == 0: + raise ValueError(f'No regions found with dataset name "{zones_dataset_name}".') + + print(f'\t\tInterpreting networks from {len(gdf)} basic features...') + # Divide into groups + zone_geometries = [ + geopandas.GeoSeries.from_wkt([zone.boundary.wkt]).set_crs(4326).iloc[0] + for zone in zones + ] + groups = [ + gdf[gdf.geometry.covered_by(zone_geom)] + for zone_geom in zone_geometries + ] + groups = [g for g in groups if len(g) > 0] + print(f'\t\tSeparated into {len(groups)} groups.') + + with ThreadPoolExecutor(max_workers=10) as pool: + results = pool.map(interpret_group, groups) + for result in results: + nodes, edges = result + network = Network.objects.create(dataset=dataset) + NetworkNode.objects.bulk_create([ + NetworkNode( + network=network, + name=f'Node {i}', + location=Point(n.get('location').x, n.get('location').y), + metadata=n.get('metadata', {}) + ) + for i, n in enumerate(nodes) + ], batch_size=1000) + NetworkEdge.objects.bulk_create([ + NetworkEdge( + network=network, + name=f'Edge {i}', + from_node=NetworkNode.objects.get(network=network, location=Point(e.get('from_point').x, e.get('from_point').y)), + to_node=NetworkNode.objects.get(network=network, location=Point(e.get('to_point').x, e.get('to_point').y)), + line_geometry=LineString(*e.get('line_geometry').coords), + metadata=e.get('metadata', {}) + ) + for i, e in enumerate(edges) + ], batch_size=1000) + + print(f'\t\t{network.nodes.count()} nodes created, {network.edges.count()} edges created.') + create_vector_features_from_network(network) + + print(f'\t\t{dataset.networks.count()} separate networks created.') + print(f'\tCompleted in {(datetime.now() - start).total_seconds()} seconds.') diff --git a/uvdat/core/management/commands/populate.py b/uvdat/core/management/commands/populate.py index 8c8abffa..f3f48573 100644 --- a/uvdat/core/management/commands/populate.py +++ b/uvdat/core/management/commands/populate.py @@ -1,12 +1,17 @@ from django.core.management.base import BaseCommand -from sample_data.ingest_sample_data import ingest_charts, ingest_contexts, ingest_datasets +from sample_data.ingest_use_case import ingest_use_case class Command(BaseCommand): requires_migrations_checks = True def add_arguments(self, parser): + parser.add_argument( + 'use_case', + choices=['boston_floods', 'new_york_energy'], + help='Sample data collection to load', + ) parser.add_argument( '--include_large', action='store_true', @@ -15,15 +20,14 @@ def add_arguments(self, parser): parser.add_argument('--dataset_indexes', nargs='*', type=int) def handle(self, *args, **kwargs): - print('Populating server with sample data...') + use_case_name = kwargs['use_case'] include_large = kwargs['include_large'] dataset_indexes = kwargs['dataset_indexes'] if dataset_indexes is None or len(dataset_indexes) == 0: dataset_indexes = None - ingest_datasets( + ingest_use_case( + use_case_name, include_large=include_large, dataset_indexes=dataset_indexes, ) - ingest_contexts() - ingest_charts() diff --git a/uvdat/core/tasks/networks.py b/uvdat/core/tasks/networks.py index b742d2ce..ddf78561 100644 --- a/uvdat/core/tasks/networks.py +++ b/uvdat/core/tasks/networks.py @@ -6,7 +6,7 @@ import numpy import shapely -from uvdat.core.models import Network, NetworkEdge, NetworkNode +from uvdat.core.models import Network, NetworkEdge, NetworkNode, VectorFeature, VectorMapLayer NODE_RECOVERY_MODES = [ 'random', @@ -213,6 +213,35 @@ def geojson_from_network(dataset): return new_geodata.to_json() +def create_vector_features_from_network(network): + map_layer, _ = VectorMapLayer.objects.get_or_create(dataset=network.dataset, index=0) + VectorFeature.objects.bulk_create( + [ + VectorFeature( + map_layer=map_layer, + geometry=node.location, + properties=dict(node_id=node.id, **node.metadata), + ) + for node in network.nodes.all() + ] + ) + VectorFeature.objects.bulk_create( + [ + VectorFeature( + map_layer=map_layer, + geometry=edge.line_geometry, + properties=dict( + edge_id=edge.id, + from_node_id=edge.from_node.id, + to_node_id=edge.to_node.id, + **edge.metadata, + ), + ) + for edge in network.edges.all() + ] + ) + + def get_network_graph(network): from uvdat.core.models import NetworkEdge, NetworkNode diff --git a/uvdat/core/tests/test_populate.py b/uvdat/core/tests/test_populate.py index f2bcb7c7..22021ea1 100644 --- a/uvdat/core/tests/test_populate.py +++ b/uvdat/core/tests/test_populate.py @@ -29,12 +29,13 @@ def test_populate(): call_command( 'populate', + 'boston_floods', include_large=True, dataset_indexes=dataset_indexes, ) assert Chart.objects.all().count() == 1 - assert Context.objects.all().count() == 3 + assert Context.objects.all().count() == 2 assert Dataset.objects.all().count() == 4 assert DerivedRegion.objects.all().count() == 0 assert FileItem.objects.all().count() == 7