From 02e0f388a7007c231fa3917154119f0437362112 Mon Sep 17 00:00:00 2001 From: sahare92 Date: Tue, 20 Oct 2020 19:07:50 +0300 Subject: [PATCH 1/7] added logic skeleton --- anyway/parsers/injured_around_schools.py | 30 +++------------------ anyway/parsers/location_extraction.py | 33 +++++++++++++++++++++--- anyway/parsers/utils.py | 27 +++++++++++++++++++ 3 files changed, 60 insertions(+), 30 deletions(-) diff --git a/anyway/parsers/injured_around_schools.py b/anyway/parsers/injured_around_schools.py index 7578a45db..577c4dbbd 100644 --- a/anyway/parsers/injured_around_schools.py +++ b/anyway/parsers/injured_around_schools.py @@ -4,7 +4,6 @@ import shutil from datetime import datetime -import math import pandas as pd from sqlalchemy import or_, not_, and_ @@ -17,6 +16,7 @@ InjuredAroundSchoolAllData, ) from anyway.utilities import time_delta, chunks +from anyway.parsers.utils import get_bounding_box_polygon from anyway.app_and_db import db SUBTYPE_ACCIDENT_WITH_PEDESTRIAN = 1 @@ -32,37 +32,13 @@ DATE_URL_FORMAT = "%Y-%m-%d" -def get_bounding_box(latitude, longitude, distance_in_km): - latitude = math.radians(latitude) - longitude = math.radians(longitude) - - radius = 6371 - # Radius of the parallel at given latitude - parallel_radius = radius * math.cos(latitude) - - lat_min = latitude - distance_in_km / radius - lat_max = latitude + distance_in_km / radius - lon_min = longitude - distance_in_km / parallel_radius - lon_max = longitude + distance_in_km / parallel_radius - rad2deg = math.degrees - - return rad2deg(lat_min), rad2deg(lon_min), rad2deg(lat_max), rad2deg(lon_max) - - def acc_inv_query(longitude, latitude, distance, start_date, end_date, school): - lat_min, lon_min, lat_max, lon_max = get_bounding_box(latitude, longitude, distance) - baseX = lon_min - baseY = lat_min - distanceX = lon_max - distanceY = lat_max - pol_str = "POLYGON(({0} {1},{0} {3},{2} {3},{2} {1},{0} {1}))".format( - baseX, baseY, distanceX, distanceY - ) + polygon_str = get_bounding_box_polygon(latitude, longitude, distance) query_obj = ( db.session.query(Involved, AccidentMarker) .join(AccidentMarker, AccidentMarker.provider_and_id == Involved.provider_and_id) - .filter(AccidentMarker.geom.intersects(pol_str)) + .filter(AccidentMarker.geom.intersects(polygon_str)) .filter(Involved.injured_type == INJURED_TYPE_PEDESTRIAN) .filter(AccidentMarker.provider_and_id == Involved.provider_and_id) .filter( diff --git a/anyway/parsers/location_extraction.py b/anyway/parsers/location_extraction.py index 51c3573f2..48db8703e 100644 --- a/anyway/parsers/location_extraction.py +++ b/anyway/parsers/location_extraction.py @@ -1,3 +1,4 @@ +from datetime import datetime, timedelta import logging import re @@ -6,8 +7,9 @@ import numpy as np from geographiclib.geodesic import Geodesic -from anyway.models import NewsFlash +from anyway.models import NewsFlash, WazeAlert from anyway.parsers import resolution_dict +from anyway.parsers.utils import get_bounding_box_polygon from anyway import secrets @@ -297,15 +299,40 @@ def extract_location_text(text): return text +def get_closest_waze_alert_accident_coordinates(db, geo_location, resolution) -> (float, float): + + # TODO: choose distance according to resolution + distance = 1 + + bounding_box_polygon_str = get_bounding_box_polygon(geo_location["lat"], geo_location["lon"], distance) + + # TODO: filter by time of the news-flash + # .filter(WazeAlert.created_at.between(datetime.now() - timedelta(hours=1), datetime.now())) \ + matching_alert = db.session.query(WazeAlert) \ + .filter(WazeAlert.alert_type == "ACCIDENT") \ + .filter(WazeAlert.geom.intersects(bounding_box_polygon_str)) \ + .first() + + return matching_alert + + def extract_geo_features(db, newsflash: NewsFlash) -> None: newsflash.location = extract_location_text(newsflash.description) or extract_location_text( newsflash.title ) geo_location = geocode_extract(newsflash.location) if geo_location is not None: - newsflash.lat = geo_location["geom"]["lat"] - newsflash.lon = geo_location["geom"]["lng"] newsflash.resolution = set_accident_resolution(geo_location) + + # improve location using waze + related_waze_accident = get_closest_waze_alert_accident_coordinates(db, geo_location, newsflash.resolution) + if related_waze_accident: + newsflash.lat = related_waze_accident["lat"] + newsflash.lon = related_waze_accident["lon"] + else: + newsflash.lat = geo_location["geom"]["lat"] + newsflash.lon = geo_location["geom"]["lng"] + location_from_db = get_db_matching_location( db, newsflash.lat, diff --git a/anyway/parsers/utils.py b/anyway/parsers/utils.py index ab0f9e4bc..91b37a8c5 100644 --- a/anyway/parsers/utils.py +++ b/anyway/parsers/utils.py @@ -1,3 +1,6 @@ +import math + + def batch_iterator(iterable, batch_size): iterator = iter(iterable) iteration_stopped = False @@ -14,3 +17,27 @@ def batch_iterator(iterable, batch_size): yield batch if iteration_stopped: break + + +def get_bounding_box_polygon(latitude, longitude, distance_in_km): + latitude = math.radians(latitude) + longitude = math.radians(longitude) + + radius = 6371 + # Radius of the parallel at given latitude + parallel_radius = radius * math.cos(latitude) + + lat_min = latitude - distance_in_km / radius + lat_max = latitude + distance_in_km / radius + lon_min = longitude - distance_in_km / parallel_radius + lon_max = longitude + distance_in_km / parallel_radius + rad2deg = math.degrees + + baseX = rad2deg(lon_min) + baseY = rad2deg(lat_min) + distanceX = rad2deg(lon_max) + distanceY = rad2deg(lat_max) + + return "POLYGON(({0} {1},{0} {3},{2} {3},{2} {1},{0} {1}))".format( + baseX, baseY, distanceX, distanceY + ) From 55a4beecb71154ed8be5cb84cedfdec03fc52fb3 Mon Sep 17 00:00:00 2001 From: sahare92 Date: Tue, 20 Oct 2020 22:13:58 +0300 Subject: [PATCH 2/7] added mechanism to get the related waze accident --- anyway/parsers/__init__.py | 2 + anyway/parsers/location_extraction.py | 37 +++++++---- tests/test_news_flash.py | 93 ++++++++++++++++++++++++++- 3 files changed, 118 insertions(+), 14 deletions(-) diff --git a/anyway/parsers/__init__.py b/anyway/parsers/__init__.py index ae7d8244d..2779296ba 100644 --- a/anyway/parsers/__init__.py +++ b/anyway/parsers/__init__.py @@ -1,3 +1,5 @@ +short_distance_resolutions = ['צומת עירוני', 'צומת בינעירוני', 'רחוב'] +long_distance_resolutions = ['עיר', 'נפה', 'מחוז', 'כביש בינעירוני'] resolution_dict = { "מחוז": ["region_hebrew"], "נפה": ["district_hebrew"], diff --git a/anyway/parsers/location_extraction.py b/anyway/parsers/location_extraction.py index c402fd02c..80ff32338 100644 --- a/anyway/parsers/location_extraction.py +++ b/anyway/parsers/location_extraction.py @@ -8,10 +8,11 @@ from geographiclib.geodesic import Geodesic from anyway.models import NewsFlash, WazeAlert -from anyway.parsers import resolution_dict +from anyway.parsers import resolution_dict, short_distance_resolutions, long_distance_resolutions from anyway.parsers.utils import get_bounding_box_polygon from anyway import secrets +WAZE_ALERT_NEWSFLASH_DELTA_IN_HOURS = 3 def extract_road_number(location): """ @@ -299,17 +300,26 @@ def extract_location_text(text): return text -def get_closest_waze_alert_accident_coordinates(db, geo_location, resolution) -> (float, float): +def get_related_waze_accident_alert(db, geo_location, newsflash): - # TODO: choose distance according to resolution - distance = 1 + # determine what distance (in kilometers) to look for accidents in, according to the accident resolution + if newsflash.resolution in short_distance_resolutions: + distance = 0.3 + elif newsflash.resolution in long_distance_resolutions: + distance = 5 + else: + # unknown resolution - skip this optimization + return None + + # create the bounding box according to the coordinate we have, and the resolution distance bounding_box_polygon_str = get_bounding_box_polygon(geo_location["lat"], geo_location["lon"], distance) - # TODO: filter by time of the news-flash - # .filter(WazeAlert.created_at.between(datetime.now() - timedelta(hours=1), datetime.now())) \ + # find waze alerts in that bounding box, from the recent time delta - and return the first as the related waze alert matching_alert = db.session.query(WazeAlert) \ .filter(WazeAlert.alert_type == "ACCIDENT") \ + .filter(WazeAlert.created_at.between(newsflash.date - timedelta(hours=WAZE_ALERT_NEWSFLASH_DELTA_IN_HOURS), + datetime.now())) \ .filter(WazeAlert.geom.intersects(bounding_box_polygon_str)) \ .first() @@ -324,14 +334,17 @@ def extract_geo_features(db, newsflash: NewsFlash) -> None: if geo_location is not None: newsflash.resolution = set_accident_resolution(geo_location) + newsflash.lat = geo_location["geom"]["lat"] + newsflash.lon = geo_location["geom"]["lng"] + # improve location using waze - related_waze_accident = get_closest_waze_alert_accident_coordinates(db, geo_location, newsflash.resolution) + related_waze_accident = get_related_waze_accident_alert(db, geo_location, newsflash) if related_waze_accident: - newsflash.lat = related_waze_accident["lat"] - newsflash.lon = related_waze_accident["lon"] - else: - newsflash.lat = geo_location["geom"]["lat"] - newsflash.lon = geo_location["geom"]["lng"] + newsflash.waze_alert = related_waze_accident.id + + # TODO: uncomment this after testing the related waze accidents mechanism is working properly on real data + # newsflash.lat = related_waze_accident.latitude + # newsflash.lon = related_waze_accident.longitude location_from_db = get_db_matching_location( db, diff --git a/tests/test_news_flash.py b/tests/test_news_flash.py index 852a8a38b..73af91c31 100755 --- a/tests/test_news_flash.py +++ b/tests/test_news_flash.py @@ -3,12 +3,13 @@ import pytest +from anyway.app_and_db import db from anyway.parsers import rss_sites, twitter, location_extraction from anyway.parsers.news_flash_classifiers import classify_tweets, classify_rss from anyway import secrets from anyway.parsers.news_flash_db_adapter import init_db -from anyway.models import NewsFlash -from anyway.parsers import timezones +from anyway.models import NewsFlash, WazeAlert +from anyway.parsers import timezones, short_distance_resolutions, long_distance_resolutions from anyway.parsers.infographics_data_cache_updater import is_cache_eligible, is_in_cache @@ -232,6 +233,57 @@ def test_extract_location_text(): assert expected_location_text == actual_location_text +def test_waze_alert(): + + # create a waze alert + waze_alert = _create_waze_accident_alert() + + try: + newsflash = NewsFlash(date=datetime.datetime.now()) + + # set the geo_location to be close to the waze accident alert location + geo_location = { + "lon": waze_alert.longitude + 0.001, + "lat": waze_alert.latitude + 0.0001, + } + + # check that we successfully get the related waze accident event + for resolution in short_distance_resolutions: + newsflash.resolution = resolution + related_waze_accident_alert = location_extraction.get_related_waze_accident_alert(db, + geo_location, + newsflash) + + assert waze_alert == related_waze_accident_alert + + # set geo_location to a further location + geo_location = { + "lon": waze_alert.longitude + 0.01, + "lat": waze_alert.latitude + 0.0001, + } + + # make sure short_distance_resolutions *do not* get any waze accident alert + for resolution in short_distance_resolutions: + newsflash.resolution = resolution + related_waze_accident_alert = location_extraction.get_related_waze_accident_alert(db, + geo_location, + newsflash) + + assert related_waze_accident_alert is None + + # make sure we successfully get the related waze accident for long_distance_resolutions + for resolution in long_distance_resolutions: + newsflash.resolution = resolution + related_waze_accident_alert = location_extraction.get_related_waze_accident_alert(db, + geo_location, + newsflash) + + assert waze_alert == related_waze_accident_alert + + finally: + _delete_waze_alert(waze_alert.id) + + def test_timeparse(): twitter = timezones.parse_creation_datetime("Sun May 31 08:26:18 +0000 2020") ynet = timezones.parse_creation_datetime("Sun, 31 May 2020 11:26:18 +0300") @@ -268,3 +320,40 @@ def test_classification_statistics_ynet(): assert precision > BEST_PRECISION_YNET assert recall > BEST_RECALL_YNET assert f1 > BEST_F1_YNET + + +def _create_waze_accident_alert(): + id = db.session.query(WazeAlert).count() + 1, + + longitude, latitude = ( + float(31.0), + float(34.0), + ) + point_str = "POINT({0} {1})".format(longitude, latitude) + + waze_alert = WazeAlert( + id=id[0], + city='באר שבע', + confidence=2, + created_at=datetime.datetime.now(), + longitude=longitude, + latitude=latitude, + magvar=190, + number_thumbs_up=1, + report_rating=5, + reliability=10, + alert_type='ACCIDENT', + alert_subtype='', + street='דרך מצדה', + road_type=3, + geom=point_str, + ) + db.session.add(waze_alert) + db.session.commit() + + return waze_alert + + +def _delete_waze_alert(waze_alert_id): + db.session.query(WazeAlert).filter_by(id=waze_alert_id).delete() + db.session.commit() From 570db171ca07470202aaae95cf3519ae63086ecd Mon Sep 17 00:00:00 2001 From: sahare92 Date: Tue, 20 Oct 2020 22:32:01 +0300 Subject: [PATCH 3/7] black lint --- anyway/parsers/__init__.py | 4 ++-- anyway/parsers/location_extraction.py | 23 ++++++++++++++++------- anyway/parsers/waze/waze_data_parser.py | 10 +++++++--- anyway/parsers/waze/waze_db_functions.py | 6 ++++-- 4 files changed, 29 insertions(+), 14 deletions(-) diff --git a/anyway/parsers/__init__.py b/anyway/parsers/__init__.py index 2779296ba..afe6a766e 100644 --- a/anyway/parsers/__init__.py +++ b/anyway/parsers/__init__.py @@ -1,5 +1,5 @@ -short_distance_resolutions = ['צומת עירוני', 'צומת בינעירוני', 'רחוב'] -long_distance_resolutions = ['עיר', 'נפה', 'מחוז', 'כביש בינעירוני'] +short_distance_resolutions = ["צומת עירוני", "צומת בינעירוני", "רחוב"] +long_distance_resolutions = ["עיר", "נפה", "מחוז", "כביש בינעירוני"] resolution_dict = { "מחוז": ["region_hebrew"], "נפה": ["district_hebrew"], diff --git a/anyway/parsers/location_extraction.py b/anyway/parsers/location_extraction.py index 80ff32338..2d660c50a 100644 --- a/anyway/parsers/location_extraction.py +++ b/anyway/parsers/location_extraction.py @@ -14,6 +14,7 @@ WAZE_ALERT_NEWSFLASH_DELTA_IN_HOURS = 3 + def extract_road_number(location): """ extract road number from location if exist @@ -302,7 +303,7 @@ def extract_location_text(text): def get_related_waze_accident_alert(db, geo_location, newsflash): - # determine what distance (in kilometers) to look for accidents in, according to the accident resolution + # determine what distance (in kilometers) to look for waze accidents in, according to the newsflash's resolution if newsflash.resolution in short_distance_resolutions: distance = 0.3 elif newsflash.resolution in long_distance_resolutions: @@ -313,15 +314,23 @@ def get_related_waze_accident_alert(db, geo_location, newsflash): return None # create the bounding box according to the coordinate we have, and the resolution distance - bounding_box_polygon_str = get_bounding_box_polygon(geo_location["lat"], geo_location["lon"], distance) + bounding_box_polygon_str = get_bounding_box_polygon( + geo_location["lat"], geo_location["lon"], distance + ) # find waze alerts in that bounding box, from the recent time delta - and return the first as the related waze alert - matching_alert = db.session.query(WazeAlert) \ - .filter(WazeAlert.alert_type == "ACCIDENT") \ - .filter(WazeAlert.created_at.between(newsflash.date - timedelta(hours=WAZE_ALERT_NEWSFLASH_DELTA_IN_HOURS), - datetime.now())) \ - .filter(WazeAlert.geom.intersects(bounding_box_polygon_str)) \ + matching_alert = ( + db.session.query(WazeAlert) + .filter(WazeAlert.alert_type == "ACCIDENT") + .filter( + WazeAlert.created_at.between( + newsflash.date - timedelta(hours=WAZE_ALERT_NEWSFLASH_DELTA_IN_HOURS), + datetime.now(), + ) + ) + .filter(WazeAlert.geom.intersects(bounding_box_polygon_str)) .first() + ) return matching_alert diff --git a/anyway/parsers/waze/waze_data_parser.py b/anyway/parsers/waze/waze_data_parser.py index 05ebfb8f3..166a44d35 100644 --- a/anyway/parsers/waze/waze_data_parser.py +++ b/anyway/parsers/waze/waze_data_parser.py @@ -73,9 +73,11 @@ def parse_waze_alerts_data(waze_alerts): ) waze_df["road_type"] = int(waze_df["road_type"].fillna(-1)[0]) waze_df["number_thumbs_up"] = int(waze_df.get("number_thumbs_up").fillna(0)[0]) - waze_df["report_by_municipality_user"] = _convert_to_bool(waze_df.get("report_by_municipality_user", False)) + waze_df["report_by_municipality_user"] = _convert_to_bool( + waze_df.get("report_by_municipality_user", False) + ) - waze_df.drop(["country", "pubMillis"], axis=1, inplace=True, errors='ignore') + waze_df.drop(["country", "pubMillis"], axis=1, inplace=True, errors="ignore") for key in waze_df.keys(): if waze_df[key] is None or key not in [field.name for field in WazeAlert.__table__.columns]: waze_df.drop([key], axis=1, inplace=True) @@ -112,7 +114,9 @@ def parse_waze_traffic_jams_data(waze_jams): inplace=True, ) for key in waze_df.keys(): - if waze_df[key] is None or key not in [field.name for field in WazeTrafficJams.__table__.columns]: + if waze_df[key] is None or key not in [ + field.name for field in WazeTrafficJams.__table__.columns + ]: waze_df.drop([key], axis=1, inplace=True) return waze_df.to_dict("records") diff --git a/anyway/parsers/waze/waze_db_functions.py b/anyway/parsers/waze/waze_db_functions.py index a356f9a82..bfe3f9254 100644 --- a/anyway/parsers/waze/waze_db_functions.py +++ b/anyway/parsers/waze/waze_db_functions.py @@ -25,7 +25,9 @@ def _upsert_waze_objects_by_uuid(model, waze_objects): with db.session.no_autoflush: for waze_object in waze_objects: db.session.flush() - existing_objects = db.session.query(model).filter(model.uuid == str(waze_object["uuid"])) + existing_objects = db.session.query(model).filter( + model.uuid == str(waze_object["uuid"]) + ) object_count = existing_objects.count() if object_count == 0: new_object = model(**waze_object) @@ -34,7 +36,7 @@ def _upsert_waze_objects_by_uuid(model, waze_objects): elif object_count > 1: # sanity: as the uuid field is unique - this should never happen - raise RuntimeError('Too many waze objects with the same uuid') + raise RuntimeError("Too many waze objects with the same uuid") else: # update the existing alert From dd2b36a595c0e7ddd77ab1c41154f9fc6090fe06 Mon Sep 17 00:00:00 2001 From: sahare92 Date: Wed, 21 Oct 2020 15:30:30 +0300 Subject: [PATCH 4/7] fix review stuff --- anyway/parsers/__init__.py | 11 +++++++++-- anyway/parsers/location_extraction.py | 12 ++++-------- anyway/parsers/utils.py | 6 ++---- tests/test_news_flash.py | 24 +++++++++++++++--------- 4 files changed, 30 insertions(+), 23 deletions(-) diff --git a/anyway/parsers/__init__.py b/anyway/parsers/__init__.py index afe6a766e..5e7bde6a1 100644 --- a/anyway/parsers/__init__.py +++ b/anyway/parsers/__init__.py @@ -1,5 +1,12 @@ -short_distance_resolutions = ["צומת עירוני", "צומת בינעירוני", "רחוב"] -long_distance_resolutions = ["עיר", "נפה", "מחוז", "כביש בינעירוני"] +resolution_to_distance = { + "מחוז": 5, + "נפה": 5, + "עיר": 5, + "כביש בינעירוני": 5, + "רחוב": 0.3, + "צומת עירוני": 0.3, + "צומת בינעירוני": 0.3, +} resolution_dict = { "מחוז": ["region_hebrew"], "נפה": ["district_hebrew"], diff --git a/anyway/parsers/location_extraction.py b/anyway/parsers/location_extraction.py index 2d660c50a..327c6db54 100644 --- a/anyway/parsers/location_extraction.py +++ b/anyway/parsers/location_extraction.py @@ -8,7 +8,7 @@ from geographiclib.geodesic import Geodesic from anyway.models import NewsFlash, WazeAlert -from anyway.parsers import resolution_dict, short_distance_resolutions, long_distance_resolutions +from anyway.parsers import resolution_dict, resolution_to_distance from anyway.parsers.utils import get_bounding_box_polygon from anyway import secrets @@ -304,13 +304,9 @@ def extract_location_text(text): def get_related_waze_accident_alert(db, geo_location, newsflash): # determine what distance (in kilometers) to look for waze accidents in, according to the newsflash's resolution - if newsflash.resolution in short_distance_resolutions: - distance = 0.3 - elif newsflash.resolution in long_distance_resolutions: - distance = 5 - else: - - # unknown resolution - skip this optimization + distance = resolution_to_distance.get(newsflash.resolution, None) + if distance is None: + # unknown resolution. skip this optimization return None # create the bounding box according to the coordinate we have, and the resolution distance diff --git a/anyway/parsers/utils.py b/anyway/parsers/utils.py index 91b37a8c5..7321f4375 100644 --- a/anyway/parsers/utils.py +++ b/anyway/parsers/utils.py @@ -31,13 +31,11 @@ def get_bounding_box_polygon(latitude, longitude, distance_in_km): lat_max = latitude + distance_in_km / radius lon_min = longitude - distance_in_km / parallel_radius lon_max = longitude + distance_in_km / parallel_radius - rad2deg = math.degrees + rad2deg = math.degrees baseX = rad2deg(lon_min) baseY = rad2deg(lat_min) distanceX = rad2deg(lon_max) distanceY = rad2deg(lat_max) - return "POLYGON(({0} {1},{0} {3},{2} {3},{2} {1},{0} {1}))".format( - baseX, baseY, distanceX, distanceY - ) + return f"POLYGON(({baseX} {baseY},{baseX} {distanceY},{distanceX} {distanceY},{distanceX} {baseY},{baseX} {baseY}))" diff --git a/tests/test_news_flash.py b/tests/test_news_flash.py index 73af91c31..098f14a3d 100755 --- a/tests/test_news_flash.py +++ b/tests/test_news_flash.py @@ -1,3 +1,4 @@ +from contextlib import contextmanager import datetime import json @@ -9,7 +10,7 @@ from anyway import secrets from anyway.parsers.news_flash_db_adapter import init_db from anyway.models import NewsFlash, WazeAlert -from anyway.parsers import timezones, short_distance_resolutions, long_distance_resolutions +from anyway.parsers import timezones from anyway.parsers.infographics_data_cache_updater import is_cache_eligible, is_in_cache @@ -234,13 +235,12 @@ def test_extract_location_text(): def test_waze_alert(): - - # create a waze alert - waze_alert = _create_waze_accident_alert() - - try: + with _managed_waze_accident_alert() as waze_alert: newsflash = NewsFlash(date=datetime.datetime.now()) + short_distance_resolutions = ["צומת עירוני", "צומת בינעירוני", "רחוב"] + long_distance_resolutions = ["עיר", "נפה", "מחוז", "כביש בינעירוני"] + # set the geo_location to be close to the waze accident alert location geo_location = { "lon": waze_alert.longitude + 0.001, @@ -280,9 +280,6 @@ def test_waze_alert(): assert waze_alert == related_waze_accident_alert - finally: - _delete_waze_alert(waze_alert.id) - def test_timeparse(): twitter = timezones.parse_creation_datetime("Sun May 31 08:26:18 +0000 2020") @@ -322,6 +319,15 @@ def test_classification_statistics_ynet(): assert f1 > BEST_F1_YNET +@contextmanager +def _managed_waze_accident_alert(): + waze_alert = _create_waze_accident_alert() + try: + yield waze_alert + finally: + _delete_waze_alert(waze_alert.id) + + def _create_waze_accident_alert(): id = db.session.query(WazeAlert).count() + 1, From be81f6ea77bbc7f5297093c45256c3be7827d257 Mon Sep 17 00:00:00 2001 From: sahare92 Date: Thu, 22 Oct 2020 11:20:31 +0300 Subject: [PATCH 5/7] change test to unit test --- anyway/parsers/location_extraction.py | 9 +- tests/test_news_flash.py | 119 +++++--------------------- 2 files changed, 24 insertions(+), 104 deletions(-) diff --git a/anyway/parsers/location_extraction.py b/anyway/parsers/location_extraction.py index 327c6db54..c92adc4f7 100644 --- a/anyway/parsers/location_extraction.py +++ b/anyway/parsers/location_extraction.py @@ -12,7 +12,7 @@ from anyway.parsers.utils import get_bounding_box_polygon from anyway import secrets -WAZE_ALERT_NEWSFLASH_DELTA_IN_HOURS = 3 +WAZE_ALERT_NEWSFLASH_TIME_DELTA = timedelta(hours=3) def extract_road_number(location): @@ -318,12 +318,7 @@ def get_related_waze_accident_alert(db, geo_location, newsflash): matching_alert = ( db.session.query(WazeAlert) .filter(WazeAlert.alert_type == "ACCIDENT") - .filter( - WazeAlert.created_at.between( - newsflash.date - timedelta(hours=WAZE_ALERT_NEWSFLASH_DELTA_IN_HOURS), - datetime.now(), - ) - ) + .filter(WazeAlert.created_at.between(newsflash.date - WAZE_ALERT_NEWSFLASH_TIME_DELTA, datetime.now())) .filter(WazeAlert.geom.intersects(bounding_box_polygon_str)) .first() ) diff --git a/tests/test_news_flash.py b/tests/test_news_flash.py index 098f14a3d..38f189055 100755 --- a/tests/test_news_flash.py +++ b/tests/test_news_flash.py @@ -1,10 +1,9 @@ -from contextlib import contextmanager import datetime import json +from unittest.mock import Mock import pytest -from anyway.app_and_db import db from anyway.parsers import rss_sites, twitter, location_extraction from anyway.parsers.news_flash_classifiers import classify_tweets, classify_rss from anyway import secrets @@ -191,6 +190,24 @@ def test_extract_location(): date=datetime.datetime(2020, 4, 22, 19, 39, 51), accident=True, ) + waze_alert = WazeAlert( + id='some-waze-alert-id', + city='באר שבע', + confidence=2, + created_at=datetime.datetime.now(), + longitude=32.1, + latitude=34.9, + magvar=190, + number_thumbs_up=1, + report_rating=5, + reliability=10, + alert_type='ACCIDENT', + alert_subtype='', + street='דרך מצדה', + road_type=3, + ) + location_extraction.get_related_waze_accident_alert = Mock(return_value=waze_alert) + expected = NewsFlash( **parsed, lat=32.0861791, @@ -206,6 +223,7 @@ def test_extract_location(): street1_hebrew="ביאליק", street2_hebrew=None, yishuv_name="רמת גן", + waze_alert=waze_alert.id ) actual = NewsFlash(**parsed) @@ -222,65 +240,17 @@ def test_extract_location_text(): ), ( 'רוכב אופנוע בן 23 נפצע היום (שבת) באורח בינוני לאחר שהחליק בכביש ליד כפר חיטים הסמוך לטבריה. צוות מד"א העניק לו טיפול ראשוני ופינה אותו לבית החולים פוריה בטבריה.]]>' - ,'כביש ליד כפר חיטים הסמוך לטבריה' - + , 'כביש ליד כפר חיטים הסמוך לטבריה' ), ( 'רוכב אופנוע בן 23 החליק הלילה (שבת) בנסיעה בכביש 3 סמוך למושב בקוע, ליד בית שמש. מצבו מוגדר בינוני. צוות מד"א העניק לו טיפול רפואי ופינה אותו עם חבלה רב מערכתית לבית החולים שמיר אסף הרופא בבאר יעקב.]]>' - ,'כביש 3 סמוך למושב בקוע, ליד בית שמש' + , 'כביש 3 סמוך למושב בקוע, ליד בית שמש' ), ]: actual_location_text = location_extraction.extract_location_text(description) assert expected_location_text == actual_location_text -def test_waze_alert(): - with _managed_waze_accident_alert() as waze_alert: - newsflash = NewsFlash(date=datetime.datetime.now()) - - short_distance_resolutions = ["צומת עירוני", "צומת בינעירוני", "רחוב"] - long_distance_resolutions = ["עיר", "נפה", "מחוז", "כביש בינעירוני"] - - # set the geo_location to be close to the waze accident alert location - geo_location = { - "lon": waze_alert.longitude + 0.001, - "lat": waze_alert.latitude + 0.0001, - } - - # check that we successfully get the related waze accident event - for resolution in short_distance_resolutions: - newsflash.resolution = resolution - related_waze_accident_alert = location_extraction.get_related_waze_accident_alert(db, - geo_location, - newsflash) - - assert waze_alert == related_waze_accident_alert - - # set geo_location to a further location - geo_location = { - "lon": waze_alert.longitude + 0.01, - "lat": waze_alert.latitude + 0.0001, - } - - # make sure short_distance_resolutions *do not* get any waze accident alert - for resolution in short_distance_resolutions: - newsflash.resolution = resolution - related_waze_accident_alert = location_extraction.get_related_waze_accident_alert(db, - geo_location, - newsflash) - - assert related_waze_accident_alert is None - - # make sure we successfully get the related waze accident for long_distance_resolutions - for resolution in long_distance_resolutions: - newsflash.resolution = resolution - related_waze_accident_alert = location_extraction.get_related_waze_accident_alert(db, - geo_location, - newsflash) - - assert waze_alert == related_waze_accident_alert - - def test_timeparse(): twitter = timezones.parse_creation_datetime("Sun May 31 08:26:18 +0000 2020") ynet = timezones.parse_creation_datetime("Sun, 31 May 2020 11:26:18 +0300") @@ -318,48 +288,3 @@ def test_classification_statistics_ynet(): assert recall > BEST_RECALL_YNET assert f1 > BEST_F1_YNET - -@contextmanager -def _managed_waze_accident_alert(): - waze_alert = _create_waze_accident_alert() - try: - yield waze_alert - finally: - _delete_waze_alert(waze_alert.id) - - -def _create_waze_accident_alert(): - id = db.session.query(WazeAlert).count() + 1, - - longitude, latitude = ( - float(31.0), - float(34.0), - ) - point_str = "POINT({0} {1})".format(longitude, latitude) - - waze_alert = WazeAlert( - id=id[0], - city='באר שבע', - confidence=2, - created_at=datetime.datetime.now(), - longitude=longitude, - latitude=latitude, - magvar=190, - number_thumbs_up=1, - report_rating=5, - reliability=10, - alert_type='ACCIDENT', - alert_subtype='', - street='דרך מצדה', - road_type=3, - geom=point_str, - ) - db.session.add(waze_alert) - db.session.commit() - - return waze_alert - - -def _delete_waze_alert(waze_alert_id): - db.session.query(WazeAlert).filter_by(id=waze_alert_id).delete() - db.session.commit() From f7b383f54f6bef9876c9466493a45fb388aee5ac Mon Sep 17 00:00:00 2001 From: sahare92 Date: Thu, 22 Oct 2020 11:29:44 +0300 Subject: [PATCH 6/7] black lint --- anyway/parsers/location_extraction.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/anyway/parsers/location_extraction.py b/anyway/parsers/location_extraction.py index c92adc4f7..dbf56b376 100644 --- a/anyway/parsers/location_extraction.py +++ b/anyway/parsers/location_extraction.py @@ -318,7 +318,11 @@ def get_related_waze_accident_alert(db, geo_location, newsflash): matching_alert = ( db.session.query(WazeAlert) .filter(WazeAlert.alert_type == "ACCIDENT") - .filter(WazeAlert.created_at.between(newsflash.date - WAZE_ALERT_NEWSFLASH_TIME_DELTA, datetime.now())) + .filter( + WazeAlert.created_at.between( + newsflash.date - WAZE_ALERT_NEWSFLASH_TIME_DELTA, datetime.now() + ) + ) .filter(WazeAlert.geom.intersects(bounding_box_polygon_str)) .first() ) From c9ef9322c6ceced2705accc04962902ce25c08bc Mon Sep 17 00:00:00 2001 From: sahare92 Date: Wed, 4 Nov 2020 19:08:20 +0200 Subject: [PATCH 7/7] fix waze alerts query time range --- anyway/parsers/location_extraction.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/anyway/parsers/location_extraction.py b/anyway/parsers/location_extraction.py index dbf56b376..cd5f3d219 100644 --- a/anyway/parsers/location_extraction.py +++ b/anyway/parsers/location_extraction.py @@ -1,4 +1,4 @@ -from datetime import datetime, timedelta +from datetime import timedelta import logging import re @@ -320,7 +320,7 @@ def get_related_waze_accident_alert(db, geo_location, newsflash): .filter(WazeAlert.alert_type == "ACCIDENT") .filter( WazeAlert.created_at.between( - newsflash.date - WAZE_ALERT_NEWSFLASH_TIME_DELTA, datetime.now() + newsflash.date - WAZE_ALERT_NEWSFLASH_TIME_DELTA, newsflash.date ) ) .filter(WazeAlert.geom.intersects(bounding_box_polygon_str))