diff --git a/anyway/parsers/__init__.py b/anyway/parsers/__init__.py index ae7d8244..5e7bde6a 100644 --- a/anyway/parsers/__init__.py +++ b/anyway/parsers/__init__.py @@ -1,3 +1,12 @@ +resolution_to_distance = { + "מחוז": 5, + "נפה": 5, + "עיר": 5, + "כביש בינעירוני": 5, + "רחוב": 0.3, + "צומת עירוני": 0.3, + "צומת בינעירוני": 0.3, +} resolution_dict = { "מחוז": ["region_hebrew"], "נפה": ["district_hebrew"], diff --git a/anyway/parsers/injured_around_schools.py b/anyway/parsers/injured_around_schools.py index 7578a45d..577c4dbb 100644 --- a/anyway/parsers/injured_around_schools.py +++ b/anyway/parsers/injured_around_schools.py @@ -4,7 +4,6 @@ import shutil from datetime import datetime -import math import pandas as pd from sqlalchemy import or_, not_, and_ @@ -17,6 +16,7 @@ InjuredAroundSchoolAllData, ) from anyway.utilities import time_delta, chunks +from anyway.parsers.utils import get_bounding_box_polygon from anyway.app_and_db import db SUBTYPE_ACCIDENT_WITH_PEDESTRIAN = 1 @@ -32,37 +32,13 @@ DATE_URL_FORMAT = "%Y-%m-%d" -def get_bounding_box(latitude, longitude, distance_in_km): - latitude = math.radians(latitude) - longitude = math.radians(longitude) - - radius = 6371 - # Radius of the parallel at given latitude - parallel_radius = radius * math.cos(latitude) - - lat_min = latitude - distance_in_km / radius - lat_max = latitude + distance_in_km / radius - lon_min = longitude - distance_in_km / parallel_radius - lon_max = longitude + distance_in_km / parallel_radius - rad2deg = math.degrees - - return rad2deg(lat_min), rad2deg(lon_min), rad2deg(lat_max), rad2deg(lon_max) - - def acc_inv_query(longitude, latitude, distance, start_date, end_date, school): - lat_min, lon_min, lat_max, lon_max = get_bounding_box(latitude, longitude, distance) - baseX = lon_min - baseY = lat_min - distanceX = lon_max - distanceY = lat_max - pol_str = "POLYGON(({0} {1},{0} {3},{2} {3},{2} {1},{0} {1}))".format( - baseX, baseY, distanceX, distanceY - ) + polygon_str = get_bounding_box_polygon(latitude, longitude, distance) query_obj = ( db.session.query(Involved, AccidentMarker) .join(AccidentMarker, AccidentMarker.provider_and_id == Involved.provider_and_id) - .filter(AccidentMarker.geom.intersects(pol_str)) + .filter(AccidentMarker.geom.intersects(polygon_str)) .filter(Involved.injured_type == INJURED_TYPE_PEDESTRIAN) .filter(AccidentMarker.provider_and_id == Involved.provider_and_id) .filter( diff --git a/anyway/parsers/location_extraction.py b/anyway/parsers/location_extraction.py index adb2ce39..cd5f3d21 100644 --- a/anyway/parsers/location_extraction.py +++ b/anyway/parsers/location_extraction.py @@ -1,3 +1,4 @@ +from datetime import timedelta import logging import re @@ -6,10 +7,13 @@ import numpy as np from geographiclib.geodesic import Geodesic -from anyway.models import NewsFlash -from anyway.parsers import resolution_dict +from anyway.models import NewsFlash, WazeAlert +from anyway.parsers import resolution_dict, resolution_to_distance +from anyway.parsers.utils import get_bounding_box_polygon from anyway import secrets +WAZE_ALERT_NEWSFLASH_TIME_DELTA = timedelta(hours=3) + def extract_road_number(location): """ @@ -297,15 +301,55 @@ def extract_location_text(text): return text +def get_related_waze_accident_alert(db, geo_location, newsflash): + + # determine what distance (in kilometers) to look for waze accidents in, according to the newsflash's resolution + distance = resolution_to_distance.get(newsflash.resolution, None) + if distance is None: + # unknown resolution. skip this optimization + return None + + # create the bounding box according to the coordinate we have, and the resolution distance + bounding_box_polygon_str = get_bounding_box_polygon( + geo_location["lat"], geo_location["lon"], distance + ) + + # find waze alerts in that bounding box, from the recent time delta - and return the first as the related waze alert + matching_alert = ( + db.session.query(WazeAlert) + .filter(WazeAlert.alert_type == "ACCIDENT") + .filter( + WazeAlert.created_at.between( + newsflash.date - WAZE_ALERT_NEWSFLASH_TIME_DELTA, newsflash.date + ) + ) + .filter(WazeAlert.geom.intersects(bounding_box_polygon_str)) + .first() + ) + + return matching_alert + + def extract_geo_features(db, newsflash: NewsFlash) -> None: newsflash.location = extract_location_text(newsflash.description) or extract_location_text( newsflash.title ) geo_location = geocode_extract(newsflash.location) if geo_location is not None: + newsflash.resolution = set_accident_resolution(geo_location) + newsflash.lat = geo_location["geom"]["lat"] newsflash.lon = geo_location["geom"]["lng"] - newsflash.resolution = set_accident_resolution(geo_location) + + # improve location using waze + related_waze_accident = get_related_waze_accident_alert(db, geo_location, newsflash) + if related_waze_accident: + newsflash.waze_alert = related_waze_accident.id + + # TODO: uncomment this after testing the related waze accidents mechanism is working properly on real data + # newsflash.lat = related_waze_accident.latitude + # newsflash.lon = related_waze_accident.longitude + location_from_db = get_db_matching_location( db, newsflash.lat, diff --git a/anyway/parsers/utils.py b/anyway/parsers/utils.py index ab0f9e4b..7321f437 100644 --- a/anyway/parsers/utils.py +++ b/anyway/parsers/utils.py @@ -1,3 +1,6 @@ +import math + + def batch_iterator(iterable, batch_size): iterator = iter(iterable) iteration_stopped = False @@ -14,3 +17,25 @@ def batch_iterator(iterable, batch_size): yield batch if iteration_stopped: break + + +def get_bounding_box_polygon(latitude, longitude, distance_in_km): + latitude = math.radians(latitude) + longitude = math.radians(longitude) + + radius = 6371 + # Radius of the parallel at given latitude + parallel_radius = radius * math.cos(latitude) + + lat_min = latitude - distance_in_km / radius + lat_max = latitude + distance_in_km / radius + lon_min = longitude - distance_in_km / parallel_radius + lon_max = longitude + distance_in_km / parallel_radius + + rad2deg = math.degrees + baseX = rad2deg(lon_min) + baseY = rad2deg(lat_min) + distanceX = rad2deg(lon_max) + distanceY = rad2deg(lat_max) + + return f"POLYGON(({baseX} {baseY},{baseX} {distanceY},{distanceX} {distanceY},{distanceX} {baseY},{baseX} {baseY}))" diff --git a/tests/test_news_flash.py b/tests/test_news_flash.py index 852a8a38..38f18905 100755 --- a/tests/test_news_flash.py +++ b/tests/test_news_flash.py @@ -1,5 +1,6 @@ import datetime import json +from unittest.mock import Mock import pytest @@ -7,7 +8,7 @@ from anyway.parsers.news_flash_classifiers import classify_tweets, classify_rss from anyway import secrets from anyway.parsers.news_flash_db_adapter import init_db -from anyway.models import NewsFlash +from anyway.models import NewsFlash, WazeAlert from anyway.parsers import timezones from anyway.parsers.infographics_data_cache_updater import is_cache_eligible, is_in_cache @@ -189,6 +190,24 @@ def test_extract_location(): date=datetime.datetime(2020, 4, 22, 19, 39, 51), accident=True, ) + waze_alert = WazeAlert( + id='some-waze-alert-id', + city='באר שבע', + confidence=2, + created_at=datetime.datetime.now(), + longitude=32.1, + latitude=34.9, + magvar=190, + number_thumbs_up=1, + report_rating=5, + reliability=10, + alert_type='ACCIDENT', + alert_subtype='', + street='דרך מצדה', + road_type=3, + ) + location_extraction.get_related_waze_accident_alert = Mock(return_value=waze_alert) + expected = NewsFlash( **parsed, lat=32.0861791, @@ -204,6 +223,7 @@ def test_extract_location(): street1_hebrew="ביאליק", street2_hebrew=None, yishuv_name="רמת גן", + waze_alert=waze_alert.id ) actual = NewsFlash(**parsed) @@ -220,12 +240,11 @@ def test_extract_location_text(): ), ( 'רוכב אופנוע בן 23 נפצע היום (שבת) באורח בינוני לאחר שהחליק בכביש ליד כפר חיטים הסמוך לטבריה. צוות מד"א העניק לו טיפול ראשוני ופינה אותו לבית החולים פוריה בטבריה.]]>' - ,'כביש ליד כפר חיטים הסמוך לטבריה' - + , 'כביש ליד כפר חיטים הסמוך לטבריה' ), ( 'רוכב אופנוע בן 23 החליק הלילה (שבת) בנסיעה בכביש 3 סמוך למושב בקוע, ליד בית שמש. מצבו מוגדר בינוני. צוות מד"א העניק לו טיפול רפואי ופינה אותו עם חבלה רב מערכתית לבית החולים שמיר אסף הרופא בבאר יעקב.]]>' - ,'כביש 3 סמוך למושב בקוע, ליד בית שמש' + , 'כביש 3 סמוך למושב בקוע, ליד בית שמש' ), ]: actual_location_text = location_extraction.extract_location_text(description) @@ -268,3 +287,4 @@ def test_classification_statistics_ynet(): assert precision > BEST_PRECISION_YNET assert recall > BEST_RECALL_YNET assert f1 > BEST_F1_YNET +