Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More precise newsflash coordinates (using Waze data) #1552

Draft
wants to merge 10 commits into
base: dev
Choose a base branch
from
9 changes: 9 additions & 0 deletions anyway/parsers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
resolution_to_distance = {
"מחוז": 5,
"נפה": 5,
"עיר": 5,
"כביש בינעירוני": 5,
"רחוב": 0.3,
"צומת עירוני": 0.3,
"צומת בינעירוני": 0.3,
}
resolution_dict = {
"מחוז": ["region_hebrew"],
"נפה": ["district_hebrew"],
Expand Down
30 changes: 3 additions & 27 deletions anyway/parsers/injured_around_schools.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import shutil
from datetime import datetime

import math
import pandas as pd
from sqlalchemy import or_, not_, and_

Expand All @@ -17,6 +16,7 @@
InjuredAroundSchoolAllData,
)
from anyway.utilities import time_delta, chunks
from anyway.parsers.utils import get_bounding_box_polygon
from anyway.app_and_db import db

SUBTYPE_ACCIDENT_WITH_PEDESTRIAN = 1
Expand All @@ -32,37 +32,13 @@
DATE_URL_FORMAT = "%Y-%m-%d"


def get_bounding_box(latitude, longitude, distance_in_km):
latitude = math.radians(latitude)
longitude = math.radians(longitude)

radius = 6371
# Radius of the parallel at given latitude
parallel_radius = radius * math.cos(latitude)

lat_min = latitude - distance_in_km / radius
lat_max = latitude + distance_in_km / radius
lon_min = longitude - distance_in_km / parallel_radius
lon_max = longitude + distance_in_km / parallel_radius
rad2deg = math.degrees

return rad2deg(lat_min), rad2deg(lon_min), rad2deg(lat_max), rad2deg(lon_max)


def acc_inv_query(longitude, latitude, distance, start_date, end_date, school):
lat_min, lon_min, lat_max, lon_max = get_bounding_box(latitude, longitude, distance)
baseX = lon_min
baseY = lat_min
distanceX = lon_max
distanceY = lat_max
pol_str = "POLYGON(({0} {1},{0} {3},{2} {3},{2} {1},{0} {1}))".format(
baseX, baseY, distanceX, distanceY
)
polygon_str = get_bounding_box_polygon(latitude, longitude, distance)

query_obj = (
db.session.query(Involved, AccidentMarker)
.join(AccidentMarker, AccidentMarker.provider_and_id == Involved.provider_and_id)
.filter(AccidentMarker.geom.intersects(pol_str))
.filter(AccidentMarker.geom.intersects(polygon_str))
.filter(Involved.injured_type == INJURED_TYPE_PEDESTRIAN)
.filter(AccidentMarker.provider_and_id == Involved.provider_and_id)
.filter(
Expand Down
50 changes: 47 additions & 3 deletions anyway/parsers/location_extraction.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from datetime import timedelta
import logging
import re

Expand All @@ -6,10 +7,13 @@
import numpy as np
from geographiclib.geodesic import Geodesic

from anyway.models import NewsFlash
from anyway.parsers import resolution_dict
from anyway.models import NewsFlash, WazeAlert
from anyway.parsers import resolution_dict, resolution_to_distance
from anyway.parsers.utils import get_bounding_box_polygon
from anyway import secrets

WAZE_ALERT_NEWSFLASH_TIME_DELTA = timedelta(hours=3)


def extract_road_number(location):
"""
Expand Down Expand Up @@ -297,15 +301,55 @@ def extract_location_text(text):
return text


def get_related_waze_accident_alert(db, geo_location, newsflash):

# determine what distance (in kilometers) to look for waze accidents in, according to the newsflash's resolution
distance = resolution_to_distance.get(newsflash.resolution, None)
if distance is None:
# unknown resolution. skip this optimization
return None
elazarg marked this conversation as resolved.
Show resolved Hide resolved

# create the bounding box according to the coordinate we have, and the resolution distance
bounding_box_polygon_str = get_bounding_box_polygon(
geo_location["lat"], geo_location["lon"], distance
)

# find waze alerts in that bounding box, from the recent time delta - and return the first as the related waze alert
matching_alert = (
db.session.query(WazeAlert)
.filter(WazeAlert.alert_type == "ACCIDENT")
.filter(
WazeAlert.created_at.between(
newsflash.date - WAZE_ALERT_NEWSFLASH_TIME_DELTA, newsflash.date
)
)
.filter(WazeAlert.geom.intersects(bounding_box_polygon_str))
.first()
)

return matching_alert


def extract_geo_features(db, newsflash: NewsFlash) -> None:
newsflash.location = extract_location_text(newsflash.description) or extract_location_text(
newsflash.title
)
geo_location = geocode_extract(newsflash.location)
if geo_location is not None:
newsflash.resolution = set_accident_resolution(geo_location)

newsflash.lat = geo_location["geom"]["lat"]
newsflash.lon = geo_location["geom"]["lng"]
newsflash.resolution = set_accident_resolution(geo_location)

# improve location using waze
related_waze_accident = get_related_waze_accident_alert(db, geo_location, newsflash)
if related_waze_accident:
newsflash.waze_alert = related_waze_accident.id

# TODO: uncomment this after testing the related waze accidents mechanism is working properly on real data
# newsflash.lat = related_waze_accident.latitude
# newsflash.lon = related_waze_accident.longitude

location_from_db = get_db_matching_location(
db,
newsflash.lat,
Expand Down
25 changes: 25 additions & 0 deletions anyway/parsers/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import math


def batch_iterator(iterable, batch_size):
iterator = iter(iterable)
iteration_stopped = False
Expand All @@ -14,3 +17,25 @@ def batch_iterator(iterable, batch_size):
yield batch
if iteration_stopped:
break


def get_bounding_box_polygon(latitude, longitude, distance_in_km):
latitude = math.radians(latitude)
longitude = math.radians(longitude)

radius = 6371
# Radius of the parallel at given latitude
parallel_radius = radius * math.cos(latitude)

lat_min = latitude - distance_in_km / radius
lat_max = latitude + distance_in_km / radius
lon_min = longitude - distance_in_km / parallel_radius
lon_max = longitude + distance_in_km / parallel_radius

elazarg marked this conversation as resolved.
Show resolved Hide resolved
rad2deg = math.degrees
baseX = rad2deg(lon_min)
baseY = rad2deg(lat_min)
distanceX = rad2deg(lon_max)
distanceY = rad2deg(lat_max)

return f"POLYGON(({baseX} {baseY},{baseX} {distanceY},{distanceX} {distanceY},{distanceX} {baseY},{baseX} {baseY}))"
28 changes: 24 additions & 4 deletions tests/test_news_flash.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import datetime
import json
from unittest.mock import Mock

import pytest

from anyway.parsers import rss_sites, twitter, location_extraction
from anyway.parsers.news_flash_classifiers import classify_tweets, classify_rss
from anyway import secrets
from anyway.parsers.news_flash_db_adapter import init_db
from anyway.models import NewsFlash
from anyway.models import NewsFlash, WazeAlert
from anyway.parsers import timezones
from anyway.parsers.infographics_data_cache_updater import is_cache_eligible, is_in_cache

Expand Down Expand Up @@ -189,6 +190,24 @@ def test_extract_location():
date=datetime.datetime(2020, 4, 22, 19, 39, 51),
accident=True,
)
waze_alert = WazeAlert(
id='some-waze-alert-id',
city='באר שבע',
confidence=2,
created_at=datetime.datetime.now(),
longitude=32.1,
latitude=34.9,
magvar=190,
number_thumbs_up=1,
report_rating=5,
reliability=10,
alert_type='ACCIDENT',
alert_subtype='',
street='דרך מצדה',
road_type=3,
)
location_extraction.get_related_waze_accident_alert = Mock(return_value=waze_alert)

expected = NewsFlash(
**parsed,
lat=32.0861791,
Expand All @@ -204,6 +223,7 @@ def test_extract_location():
street1_hebrew="ביאליק",
street2_hebrew=None,
yishuv_name="רמת גן",
waze_alert=waze_alert.id
)

actual = NewsFlash(**parsed)
Expand All @@ -220,12 +240,11 @@ def test_extract_location_text():
),
(
'רוכב אופנוע בן 23 נפצע היום (שבת) באורח בינוני לאחר שהחליק בכביש ליד כפר חיטים הסמוך לטבריה. צוות מד"א העניק לו טיפול ראשוני ופינה אותו לבית החולים פוריה בטבריה.]]>'
,'כביש ליד כפר חיטים הסמוך לטבריה'

, 'כביש ליד כפר חיטים הסמוך לטבריה'
),
(
'רוכב אופנוע בן 23 החליק הלילה (שבת) בנסיעה בכביש 3 סמוך למושב בקוע, ליד בית שמש. מצבו מוגדר בינוני. צוות מד"א העניק לו טיפול רפואי ופינה אותו עם חבלה רב מערכתית לבית החולים שמיר אסף הרופא בבאר יעקב.]]>'
,'כביש 3 סמוך למושב בקוע, ליד בית שמש'
, 'כביש 3 סמוך למושב בקוע, ליד בית שמש'
),
]:
actual_location_text = location_extraction.extract_location_text(description)
Expand Down Expand Up @@ -268,3 +287,4 @@ def test_classification_statistics_ynet():
assert precision > BEST_PRECISION_YNET
assert recall > BEST_RECALL_YNET
assert f1 > BEST_F1_YNET