Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Moving dependence from custom branch's tour_model to master's trip_model #933

Merged
merged 10 commits into from
Sep 14, 2023
164 changes: 137 additions & 27 deletions emission/tests/modellingTests/modellingTestAssets.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,145 @@
import random
from typing import Optional, Tuple, List, Dict
from uuid import UUID
import emission.analysis.modelling.trip_model.trip_model as eamtm
import emission.analysis.modelling.trip_model.greedy_similarity_binning as eamtg
import emission.tests.modellingTests.modellingTestAssets as etmm
import emission.core.wrapper.confirmedtrip as ecwc

import emission.core.wrapper.entry as ecwe
import time
import math

def setModelConfig(metric,threshold,cutoff,clustering_way,incrementalevaluation):
"""
TODO: Write about each parameter to the function
pass in a test configuration to the binning algorithm.

shankari marked this conversation as resolved.
Show resolved Hide resolved
clustering_way : Part of the trip used for checking pairwise proximity.
Can take one of the three values:

1. 'origin' -> using origin of the trip to check if 2 points
lie within the mentioned similarity_threshold_meters
2. 'destination' -> using destination of the trip to check if 2 points
lie within the mentioned similarity_threshold_meters
3. 'origin-destination' -> both origin and destination of the trip to check
if 2 points lie within the mentioned
similarity_threshold_meters
"""
model_config = {
"metric": metric,
"similarity_threshold_meters": threshold, # meters,
"apply_cutoff": cutoff,
"clustering_way": clustering_way,
"incremental_evaluation": incrementalevaluation
}

return eamtg.GreedySimilarityBinning(model_config)

def generate_random_point():
"""Generate a completetly random point valid WGS84 latitiude and longtidude"""
lat=random.uniform(-90,90)
lon=random.uniform(-180,180)
return [lat,lon]
shankari marked this conversation as resolved.
Show resolved Hide resolved

def generate_nearby_random_points(ref_coords,threshold):
"""
Generate valid WGS84 latitiude and longtidude in threshold(m) proximity to
ref coordinates
"""

thresholdInWGS84 = threshold* (0.000001/0.11)
dx=random.uniform(-thresholdInWGS84/2,thresholdInWGS84/2)
dy=random.uniform(-thresholdInWGS84/2,thresholdInWGS84/2)
return [ref_coords[0] +dx , ref_coords[1] +dy]
shankari marked this conversation as resolved.
Show resolved Hide resolved

def calDistanceTest(point1, point2, coordinates=False):
"""haversine distance

:param point1: a coordinate in degrees WGS84
:param point2: another coordinate in degrees WGS84
:param coordinates: if false, expect a list of coordinates, defaults to False
:return: distance approximately in meters
"""
earthRadius = 6371000 # meters
shankari marked this conversation as resolved.
Show resolved Hide resolved
if coordinates:
dLat = math.radians(point1.lat-point2.lat)
dLon = math.radians(point1.lon-point2.lon)
lat1 = math.radians(point1.lat)
lat2 = math.radians(point2.lat)
else:
dLat = math.radians(point1[1]-point2[1])
dLon = math.radians(point1[0]-point2[0])
lat1 = math.radians(point1[1])
lat2 = math.radians(point2[1])


a = (math.sin(dLat/2) ** 2) + ((math.sin(dLon/2) ** 2) * math.cos(lat1) * math.cos(lat2))
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
d = earthRadius * c

return d

def setTripConfig(trips,trip_part,threshold,within_thr=None,label_data=None):
shankari marked this conversation as resolved.
Show resolved Hide resolved
"""
TODO: Write about each parameter to the function
trip_part: when mock trips are generated, coordinates of this part of
m trips will be within the threshold. trip_part can take one
among the four values:

1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie
within the mentioned threshold when trips are generated),

2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned
threshold when trips are generated),

3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the
mentioned threshold when trips are generated)

4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips
will lie within the mentioned threshold when trips are generated)
"""
if label_data == None:
label_data = {
"mode_confirm": ['walk', 'bike', 'transit'],
"purpose_confirm": ['work', 'home', 'school'],
"replaced_mode": ['drive']
}

trip =etmm.generate_mock_trips(
user_id="joe",
trips=trips,
trip_part=trip_part,
label_data=label_data,
within_threshold=within_thr,
threshold=threshold,
)
return trip

def generate_trip_coordinates(
ref_coords: Tuple[float, float],
points_list: list[float],
within_threshold: bool,
threshold: float,
max: float = 0.1 # approx. 10km in WGS84
threshold_meters: float,
) -> Tuple[float, float]:
"""generates trip coordinate data to use when mocking a set of trip data.
"""generates trip coordinate data to use when mocking a set of trip data.i
If the coordinate generated is to be binned together, it is generated in proximity of
the previous points in the points_list. Otherwise, if this point is not to be binned together,
keep generating a random trip unless we find one that would not bin with previously
accepeted trips.

:param ref_coords: reference coordinates to use as the center of the sampling circle
:param within_threshold: how many of these trips are within some distance threshold
:param threshold: the distance threshold, in WGS84
:param max: max distance, in WGS84, defaults to 0.1 (approx. 10km)
:param points_list: list of all the previoushlt selected points
:param within_threshold: is this point supposed to be within some distance threshold
:param threshold_meters: the distance threshold, in meters
:return: generated coordinate pairs sampled in a
circle from some coordinates up to some threshold
"""
angle = 2 * math.pi * random.random()
radius_threshold = threshold / 2
radius = random.uniform(0, radius_threshold) if within_threshold else random.uniform(radius_threshold, max)
x = radius * math.cos(angle) + ref_coords[0]
y = radius * math.sin(angle) + ref_coords[1]
return (x, y)

if within_threshold and points_list:
new_point = generate_nearby_random_points(random.choice(points_list), threshold_meters)
else:
new_point = generate_random_point()
while not all(calDistanceTest(new_point, pt) > threshold_meters for pt in points_list):
new_point = generate_random_point()
return new_point


def extract_trip_labels(trips: List[ecwc.Confirmedtrip]) -> Dict:
Expand Down Expand Up @@ -129,15 +239,12 @@ def build_mock_trip(
def generate_mock_trips(
user_id,
trips,
origin,
destination,
threshold,
trip_part='od',
label_data = None,
within_threshold = None,
start_ts: None = None,
end_ts: None = None,
threshold = 0.01,
max = 0.1,
has_label_p = 1.0,
seed = 0):
"""mocking function that generates multiple trips for a user. some are sampled
Expand All @@ -158,8 +265,6 @@ def generate_mock_trips(

:param user_id: user UUID
:param trips: number of trips
:param origin: origin coordinates
:param destination: destination coordinates
:param trip_part: when mock trips are generated, coordinates of this part of
the trips will be within the threshold. trip_part can take one
among the four values:
Expand All @@ -173,10 +278,8 @@ def generate_mock_trips(
will lie within the mentioned threshold when trips are generated)
:param label_data: dictionary of label data, see above, defaults to None
:param within_threshold: number of trips that should fall within the provided
distance threshold in degrees WGS84, defaults to None
:param threshold: distance threshold in WGS84 for sampling, defaults to 0.01
:param max: maximum distance beyond the threshold for trips sampled that
are not within the threshold, defaults to 0.1 degrees WGS84
distance threshold in m
:param threshold: distance threshold in WGS84 for sampling
:param has_label_p: probability a trip has labels, defaults to 1.0
:param seed: random seed, defaults to 0
:return: randomly sampled trips
Expand All @@ -186,9 +289,16 @@ def generate_mock_trips(
within = within_threshold if within_threshold is not None else trips
trips_within_threshold = [i < within for i in range(trips)]
result = []
origin_points=[]
destination_points=[]

# generate trip number of points based on which among 'o' ,'d' or 'od' should be in threshold
# proximity to each other.
for within in trips_within_threshold:
o = generate_trip_coordinates(origin, (trip_part[0] == 'o' and within), threshold, max)
d = generate_trip_coordinates(destination, (trip_part[1] == 'd' and within), threshold, max)
origin_points.append(generate_trip_coordinates(origin_points, (trip_part[0] == 'o' and within), threshold))
destination_points.append(generate_trip_coordinates(destination_points, (trip_part[1] == 'd' and within), threshold))

for o,d in zip(origin_points,destination_points):
labels = {} if label_data is None or random.random() > has_label_p \
else sample_trip_labels(
mode_labels=label_data.get('mode_confirm'),
Expand Down
68 changes: 0 additions & 68 deletions emission/tests/modellingTests/utilities.py
shankari marked this conversation as resolved.
Outdated
Show resolved Hide resolved

This file was deleted.