Skip to content

Commit

Permalink
Changed logic for Random trip genration
Browse files Browse the repository at this point in the history
Random trips are now generated like this :

if  certain trips is are to be binned together ( by 'o','d' or 'od' or '__' (meaning NONE)) they are generated in proximity of the previous in-bin trip.  Otherwise, if they are not to be binned together, we keep generating a random trip unless we find one that would not bin with previously accepted trips.
  • Loading branch information
humbleOldSage committed Sep 8, 2023
1 parent 7f6d754 commit c35b7c1
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 95 deletions.
164 changes: 137 additions & 27 deletions emission/tests/modellingTests/modellingTestAssets.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,145 @@
import random
from typing import Optional, Tuple, List, Dict
from uuid import UUID
import emission.analysis.modelling.trip_model.trip_model as eamtm
import emission.analysis.modelling.trip_model.greedy_similarity_binning as eamtg
import emission.tests.modellingTests.modellingTestAssets as etmm
import emission.core.wrapper.confirmedtrip as ecwc

import emission.core.wrapper.entry as ecwe
import time
import math

def setModelConfig(metric,threshold,cutoff,clustering_way,incrementalevaluation):
"""
TODO: Write about each parameter to the function
pass in a test configuration to the binning algorithm.
clustering_way : Part of the trip used for checking pairwise proximity.
Can take one of the three values:
1. 'origin' -> using origin of the trip to check if 2 points
lie within the mentioned similarity_threshold_meters
2. 'destination' -> using destination of the trip to check if 2 points
lie within the mentioned similarity_threshold_meters
3. 'origin-destination' -> both origin and destination of the trip to check
if 2 points lie within the mentioned
similarity_threshold_meters
"""
model_config = {
"metric": metric,
"similarity_threshold_meters": threshold, # meters,
"apply_cutoff": cutoff,
"clustering_way": clustering_way,
"incremental_evaluation": incrementalevaluation
}

return eamtg.GreedySimilarityBinning(model_config)

def generate_random_point():
"""Generate a completetly random point valid WGS84 latitiude and longtidude"""
lat=random.uniform(-90,90)
lon=random.uniform(-180,180)
return [lat,lon]

def generate_nearby_random_points(ref_coords,threshold):
"""
Generate valid WGS84 latitiude and longtidude in threshold(m) proximity to
ref coordinates
"""

thresholdInWGS84 = threshold* (0.000001/0.11)
dx=random.uniform(-thresholdInWGS84/2,thresholdInWGS84/2)
dy=random.uniform(-thresholdInWGS84/2,thresholdInWGS84/2)
return [ref_coords[0] +dx , ref_coords[1] +dy]

def calDistanceTest(point1, point2, coordinates=False):
"""haversine distance
:param point1: a coordinate in degrees WGS84
:param point2: another coordinate in degrees WGS84
:param coordinates: if false, expect a list of coordinates, defaults to False
:return: distance approximately in meters
"""
earthRadius = 6371000 # meters
if coordinates:
dLat = math.radians(point1.lat-point2.lat)
dLon = math.radians(point1.lon-point2.lon)
lat1 = math.radians(point1.lat)
lat2 = math.radians(point2.lat)
else:
dLat = math.radians(point1[1]-point2[1])
dLon = math.radians(point1[0]-point2[0])
lat1 = math.radians(point1[1])
lat2 = math.radians(point2[1])


a = (math.sin(dLat/2) ** 2) + ((math.sin(dLon/2) ** 2) * math.cos(lat1) * math.cos(lat2))
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
d = earthRadius * c

return d

def setTripConfig(trips,trip_part,threshold,within_thr=None,label_data=None):
"""
TODO: Write about each parameter to the function
trip_part: when mock trips are generated, coordinates of this part of
m trips will be within the threshold. trip_part can take one
among the four values:
1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie
within the mentioned threshold when trips are generated),
2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned
threshold when trips are generated),
3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the
mentioned threshold when trips are generated)
4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips
will lie within the mentioned threshold when trips are generated)
"""
if label_data == None:
label_data = {
"mode_confirm": ['walk', 'bike', 'transit'],
"purpose_confirm": ['work', 'home', 'school'],
"replaced_mode": ['drive']
}

trip =etmm.generate_mock_trips(
user_id="joe",
trips=trips,
trip_part=trip_part,
label_data=label_data,
within_threshold=within_thr,
threshold=threshold,
)
return trip

def generate_trip_coordinates(
ref_coords: Tuple[float, float],
points_list: list[float],
within_threshold: bool,
threshold: float,
max: float = 0.1 # approx. 10km in WGS84
threshold_meters: float,
) -> Tuple[float, float]:
"""generates trip coordinate data to use when mocking a set of trip data.
"""generates trip coordinate data to use when mocking a set of trip data.i
If the coordinate generated is to be binned together, it is generated in proximity of
the previous points in the points_list. Otherwise, if this point is not to be binned together,
keep generating a random trip unless we find one that would not bin with previously
accepeted trips.
:param ref_coords: reference coordinates to use as the center of the sampling circle
:param within_threshold: how many of these trips are within some distance threshold
:param threshold: the distance threshold, in WGS84
:param max: max distance, in WGS84, defaults to 0.1 (approx. 10km)
:param points_list: list of all the previoushlt selected points
:param within_threshold: is this point supposed to be within some distance threshold
:param threshold_meters: the distance threshold, in meters
:return: generated coordinate pairs sampled in a
circle from some coordinates up to some threshold
"""
angle = 2 * math.pi * random.random()
radius_threshold = threshold / 2
radius = random.uniform(0, radius_threshold) if within_threshold else random.uniform(radius_threshold, max)
x = radius * math.cos(angle) + ref_coords[0]
y = radius * math.sin(angle) + ref_coords[1]
return (x, y)

if within_threshold and points_list:
new_point = generate_nearby_random_points(random.choice(points_list), threshold_meters)
else:
new_point = generate_random_point()
while not all(calDistanceTest(new_point, pt) > threshold_meters for pt in points_list):
new_point = generate_random_point()
return new_point


def extract_trip_labels(trips: List[ecwc.Confirmedtrip]) -> Dict:
Expand Down Expand Up @@ -129,15 +239,12 @@ def build_mock_trip(
def generate_mock_trips(
user_id,
trips,
origin,
destination,
threshold,
trip_part='od',
label_data = None,
within_threshold = None,
start_ts: None = None,
end_ts: None = None,
threshold = 0.01,
max = 0.1,
has_label_p = 1.0,
seed = 0):
"""mocking function that generates multiple trips for a user. some are sampled
Expand All @@ -158,8 +265,6 @@ def generate_mock_trips(
:param user_id: user UUID
:param trips: number of trips
:param origin: origin coordinates
:param destination: destination coordinates
:param trip_part: when mock trips are generated, coordinates of this part of
the trips will be within the threshold. trip_part can take one
among the four values:
Expand All @@ -173,10 +278,8 @@ def generate_mock_trips(
will lie within the mentioned threshold when trips are generated)
:param label_data: dictionary of label data, see above, defaults to None
:param within_threshold: number of trips that should fall within the provided
distance threshold in degrees WGS84, defaults to None
:param threshold: distance threshold in WGS84 for sampling, defaults to 0.01
:param max: maximum distance beyond the threshold for trips sampled that
are not within the threshold, defaults to 0.1 degrees WGS84
distance threshold in m
:param threshold: distance threshold in WGS84 for sampling
:param has_label_p: probability a trip has labels, defaults to 1.0
:param seed: random seed, defaults to 0
:return: randomly sampled trips
Expand All @@ -186,9 +289,16 @@ def generate_mock_trips(
within = within_threshold if within_threshold is not None else trips
trips_within_threshold = [i < within for i in range(trips)]
result = []
origin_points=[]
destination_points=[]

# generate trip number of points based on which among 'o' ,'d' or 'od' should be in threshold
# proximity to each other.
for within in trips_within_threshold:
o = generate_trip_coordinates(origin, (trip_part[0] == 'o' and within), threshold, max)
d = generate_trip_coordinates(destination, (trip_part[1] == 'd' and within), threshold, max)
origin_points.append(generate_trip_coordinates(origin_points, (trip_part[0] == 'o' and within), threshold))
destination_points.append(generate_trip_coordinates(destination_points, (trip_part[1] == 'd' and within), threshold))

for o,d in zip(origin_points,destination_points):
labels = {} if label_data is None or random.random() > has_label_p \
else sample_trip_labels(
mode_labels=label_data.get('mode_confirm'),
Expand Down
68 changes: 0 additions & 68 deletions emission/tests/modellingTests/utilities.py

This file was deleted.

0 comments on commit c35b7c1

Please sign in to comment.