diff --git a/emission/tests/modellingTests/modellingTestAssets.py b/emission/tests/modellingTests/modellingTestAssets.py index cb886670c..f98736048 100644 --- a/emission/tests/modellingTests/modellingTestAssets.py +++ b/emission/tests/modellingTests/modellingTestAssets.py @@ -1,35 +1,145 @@ import random from typing import Optional, Tuple, List, Dict from uuid import UUID -import emission.analysis.modelling.trip_model.trip_model as eamtm +import emission.analysis.modelling.trip_model.greedy_similarity_binning as eamtg +import emission.tests.modellingTests.modellingTestAssets as etmm import emission.core.wrapper.confirmedtrip as ecwc import emission.core.wrapper.entry as ecwe import time import math +def setModelConfig(metric,threshold,cutoff,clustering_way,incrementalevaluation): + """ + TODO: Write about each parameter to the function + pass in a test configuration to the binning algorithm. + + clustering_way : Part of the trip used for checking pairwise proximity. + Can take one of the three values: + + 1. 'origin' -> using origin of the trip to check if 2 points + lie within the mentioned similarity_threshold_meters + 2. 'destination' -> using destination of the trip to check if 2 points + lie within the mentioned similarity_threshold_meters + 3. 'origin-destination' -> both origin and destination of the trip to check + if 2 points lie within the mentioned + similarity_threshold_meters + """ + model_config = { + "metric": metric, + "similarity_threshold_meters": threshold, # meters, + "apply_cutoff": cutoff, + "clustering_way": clustering_way, + "incremental_evaluation": incrementalevaluation + } + + return eamtg.GreedySimilarityBinning(model_config) + +def generate_random_point(): + """Generate a completetly random point valid WGS84 latitiude and longtidude""" + lat=random.uniform(-90,90) + lon=random.uniform(-180,180) + return [lat,lon] + +def generate_nearby_random_points(ref_coords,threshold): + """ + Generate valid WGS84 latitiude and longtidude in threshold(m) proximity to + ref coordinates + """ + + thresholdInWGS84 = threshold* (0.000001/0.11) + dx=random.uniform(-thresholdInWGS84/2,thresholdInWGS84/2) + dy=random.uniform(-thresholdInWGS84/2,thresholdInWGS84/2) + return [ref_coords[0] +dx , ref_coords[1] +dy] + +def calDistanceTest(point1, point2, coordinates=False): + """haversine distance + + :param point1: a coordinate in degrees WGS84 + :param point2: another coordinate in degrees WGS84 + :param coordinates: if false, expect a list of coordinates, defaults to False + :return: distance approximately in meters + """ + earthRadius = 6371000 # meters + if coordinates: + dLat = math.radians(point1.lat-point2.lat) + dLon = math.radians(point1.lon-point2.lon) + lat1 = math.radians(point1.lat) + lat2 = math.radians(point2.lat) + else: + dLat = math.radians(point1[1]-point2[1]) + dLon = math.radians(point1[0]-point2[0]) + lat1 = math.radians(point1[1]) + lat2 = math.radians(point2[1]) + + + a = (math.sin(dLat/2) ** 2) + ((math.sin(dLon/2) ** 2) * math.cos(lat1) * math.cos(lat2)) + c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a)) + d = earthRadius * c + + return d + +def setTripConfig(trips,trip_part,threshold,within_thr=None,label_data=None): + """ + TODO: Write about each parameter to the function + trip_part: when mock trips are generated, coordinates of this part of + m trips will be within the threshold. trip_part can take one + among the four values: + + 1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie + within the mentioned threshold when trips are generated), + + 2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned + threshold when trips are generated), + + 3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the + mentioned threshold when trips are generated) + + 4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips + will lie within the mentioned threshold when trips are generated) + """ + if label_data == None: + label_data = { + "mode_confirm": ['walk', 'bike', 'transit'], + "purpose_confirm": ['work', 'home', 'school'], + "replaced_mode": ['drive'] + } + + trip =etmm.generate_mock_trips( + user_id="joe", + trips=trips, + trip_part=trip_part, + label_data=label_data, + within_threshold=within_thr, + threshold=threshold, + ) + return trip def generate_trip_coordinates( - ref_coords: Tuple[float, float], + points_list: list[float], within_threshold: bool, - threshold: float, - max: float = 0.1 # approx. 10km in WGS84 + threshold_meters: float, ) -> Tuple[float, float]: - """generates trip coordinate data to use when mocking a set of trip data. + """generates trip coordinate data to use when mocking a set of trip data.i + If the coordinate generated is to be binned together, it is generated in proximity of + the previous points in the points_list. Otherwise, if this point is not to be binned together, + keep generating a random trip unless we find one that would not bin with previously + accepeted trips. - :param ref_coords: reference coordinates to use as the center of the sampling circle - :param within_threshold: how many of these trips are within some distance threshold - :param threshold: the distance threshold, in WGS84 - :param max: max distance, in WGS84, defaults to 0.1 (approx. 10km) + :param points_list: list of all the previoushlt selected points + :param within_threshold: is this point supposed to be within some distance threshold + :param threshold_meters: the distance threshold, in meters :return: generated coordinate pairs sampled in a circle from some coordinates up to some threshold """ - angle = 2 * math.pi * random.random() - radius_threshold = threshold / 2 - radius = random.uniform(0, radius_threshold) if within_threshold else random.uniform(radius_threshold, max) - x = radius * math.cos(angle) + ref_coords[0] - y = radius * math.sin(angle) + ref_coords[1] - return (x, y) + + if within_threshold and points_list: + new_point = generate_nearby_random_points(random.choice(points_list), threshold_meters) + else: + new_point = generate_random_point() + while not all(calDistanceTest(new_point, pt) > threshold_meters for pt in points_list): + new_point = generate_random_point() + return new_point def extract_trip_labels(trips: List[ecwc.Confirmedtrip]) -> Dict: @@ -129,15 +239,12 @@ def build_mock_trip( def generate_mock_trips( user_id, trips, - origin, - destination, + threshold, trip_part='od', label_data = None, within_threshold = None, start_ts: None = None, end_ts: None = None, - threshold = 0.01, - max = 0.1, has_label_p = 1.0, seed = 0): """mocking function that generates multiple trips for a user. some are sampled @@ -158,8 +265,6 @@ def generate_mock_trips( :param user_id: user UUID :param trips: number of trips - :param origin: origin coordinates - :param destination: destination coordinates :param trip_part: when mock trips are generated, coordinates of this part of the trips will be within the threshold. trip_part can take one among the four values: @@ -173,10 +278,8 @@ def generate_mock_trips( will lie within the mentioned threshold when trips are generated) :param label_data: dictionary of label data, see above, defaults to None :param within_threshold: number of trips that should fall within the provided - distance threshold in degrees WGS84, defaults to None - :param threshold: distance threshold in WGS84 for sampling, defaults to 0.01 - :param max: maximum distance beyond the threshold for trips sampled that - are not within the threshold, defaults to 0.1 degrees WGS84 + distance threshold in m + :param threshold: distance threshold in WGS84 for sampling :param has_label_p: probability a trip has labels, defaults to 1.0 :param seed: random seed, defaults to 0 :return: randomly sampled trips @@ -186,9 +289,16 @@ def generate_mock_trips( within = within_threshold if within_threshold is not None else trips trips_within_threshold = [i < within for i in range(trips)] result = [] + origin_points=[] + destination_points=[] + + # generate trip number of points based on which among 'o' ,'d' or 'od' should be in threshold + # proximity to each other. for within in trips_within_threshold: - o = generate_trip_coordinates(origin, (trip_part[0] == 'o' and within), threshold, max) - d = generate_trip_coordinates(destination, (trip_part[1] == 'd' and within), threshold, max) + origin_points.append(generate_trip_coordinates(origin_points, (trip_part[0] == 'o' and within), threshold)) + destination_points.append(generate_trip_coordinates(destination_points, (trip_part[1] == 'd' and within), threshold)) + + for o,d in zip(origin_points,destination_points): labels = {} if label_data is None or random.random() > has_label_p \ else sample_trip_labels( mode_labels=label_data.get('mode_confirm'), diff --git a/emission/tests/modellingTests/utilities.py b/emission/tests/modellingTests/utilities.py deleted file mode 100644 index 9f03358bb..000000000 --- a/emission/tests/modellingTests/utilities.py +++ /dev/null @@ -1,68 +0,0 @@ -import emission.analysis.modelling.trip_model.greedy_similarity_binning as eamtg -import emission.tests.modellingTests.modellingTestAssets as etmm - -def setModelConfig(metric,threshold,cutoff,clustering_way,incrementalevaluation): - """ - TODO : tell about each param. - pass in a test configuration to the binning algorithm. - - clustering_way : Part of the trip used for checking pairwise proximity. - Can take one of the three values: - - 1. 'origin' -> using origin of the trip to check if 2 points - lie within the mentioned similarity_threshold_meters - 2. 'destination' -> using destination of the trip to check if 2 points - lie within the mentioned similarity_threshold_meters - 3. 'origin-destination' -> both origin and destination of the trip to check - if 2 points lie within the mentioned - similarity_threshold_meters - """ - model_config = { - "metric": metric, - "similarity_threshold_meters": threshold, # meters, - "apply_cutoff": cutoff, - "clustering_way": clustering_way, - "incremental_evaluation": incrementalevaluation - } - - return eamtg.GreedySimilarityBinning(model_config) - - -def setTripConfig(trips,org,dest,trip_part,within_thr=None,label_data=None,threshold=0.001): - """ - TODO: Tell about each - trip_part: when mock trips are generated, coordinates of this part of - m trips will be within the threshold. trip_part can take one - among the four values: - - 1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie - within the mentioned threshold when trips are generated), - - 2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned - threshold when trips are generated), - - 3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the - mentioned threshold when trips are generated) - - 4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips - will lie within the mentioned threshold when trips are generated) - """ - if label_data == None: - label_data = { - "mode_confirm": ['walk', 'bike', 'transit'], - "purpose_confirm": ['work', 'home', 'school'], - "replaced_mode": ['drive'] - } - - trip =etmm.generate_mock_trips( - user_id="joe", - trips=trips, - origin=org, - destination=dest, - trip_part=trip_part, - label_data=label_data, - within_threshold=within_thr, - threshold=threshold, - ) - return trip - \ No newline at end of file