From 7f6d7542b5561861e5edfd0e27435cb11564078a Mon Sep 17 00:00:00 2001 From: $aTyam Date: Tue, 5 Sep 2023 16:02:28 -0400 Subject: [PATCH] Correcting logic behind binning and non-binning while testing 1. improved logic based on this comment . https://github.com/e-mission/e-mission-server/pull/933/commits/710d1a5791212b540b883dedd5613a562071edc9#r1314065502 2.Created a utilities file for repetitive code required by multiple files. 3. clustering threshold back to 500 4. More in-code comments. --- .../TestGreedySimilarityBinning.py | 505 ++---------------- .../modellingTests/TestSimilarityMetric.py | 127 ++--- emission/tests/modellingTests/utilities.py | 68 +++ 3 files changed, 165 insertions(+), 535 deletions(-) create mode 100644 emission/tests/modellingTests/utilities.py diff --git a/emission/tests/modellingTests/TestGreedySimilarityBinning.py b/emission/tests/modellingTests/TestGreedySimilarityBinning.py index 620f2cf99..3e1cd78c2 100644 --- a/emission/tests/modellingTests/TestGreedySimilarityBinning.py +++ b/emission/tests/modellingTests/TestGreedySimilarityBinning.py @@ -1,6 +1,6 @@ import unittest import emission.analysis.modelling.trip_model.greedy_similarity_binning as eamtg -import emission.tests.modellingTests.modellingTestAssets as etmm +import emission.tests.modellingTests.utilities as etmu import logging @@ -15,428 +15,60 @@ def testNoBinning(self): Tests the three (origin, destination and origin-destination based) binning configuration for trips. - When both the origin and destination points of trips are outside a threshold + When the origin and destination points of trips are outside a threshold limit, none of the trips should be binned with the other in any of the three configs (origin, destination or origin-and-destination based). """ - label_data = { - "mode_confirm": ['walk', 'bike', 'transit'], - "purpose_confirm": ['work', 'home', 'school'], - "replaced_mode": ['drive'] - } - - # generate $n trips. $m of them should have origin and destinations sampled - # within a radius that should have them binned. - n = 20 - m = 5 - - # trip_part: when mock trips are generated, coordinates of this part of - # m trips will be within the threshold. trip_part can take one - # among the four values: - # - # 1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie - # within the mentioned threshold when trips are generated), - # - # 2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned - # threshold when trips are generated), - # - # 3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the - # mentioned threshold when trips are generated) - # - # 4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips - # will lie within the mentioned threshold when trips are generated) - - trips = etmm.generate_mock_trips( - user_id="joe", - trips=n, - origin=(0, 0), - destination=(1, 1), - trip_part='__', - label_data=label_data, - within_threshold=m, - threshold=0.001, # ~ 111 meters in degrees WGS84 - ) - - - # pass in a test configuration to the binning algorithm. - # - # clustering_way : Part of the trip used for checking pairwise proximity. - # Can take one of the three values: - # - # 1. 'origin' -> using origin of the trip to check if 2 points - # lie within the mentioned similarity_threshold_meters - # 2. 'destination' -> using destination of the trip to check if 2 points - # lie within the mentioned similarity_threshold_meters - # 3. 'origin-destination' -> both origin and destination of the trip to check - # if 2 points lie within the mentioned - # similarity_threshold_meters - - model1_config = { - "metric": "od_similarity", - "similarity_threshold_meters": 111, # meters, - "apply_cutoff": False, - "clustering_way": "origin", - "incremental_evaluation": False - } - model1 = eamtg.GreedySimilarityBinning(model1_config) - model1.fit(trips) - - - model2_config = { - "metric": "od_similarity", - "similarity_threshold_meters":111, # meters, - "apply_cutoff": False, - "clustering_way": "destination", - "incremental_evaluation": False - } - model2 = eamtg.GreedySimilarityBinning(model2_config) - model2.fit(trips) - - - model3_config = { - "metric": "od_similarity", - "similarity_threshold_meters": 111, # meters, - "apply_cutoff": False, - "clustering_way": "origin-destination", - "incremental_evaluation": False - } - model3 = eamtg.GreedySimilarityBinning(model3_config) - model3.fit(trips) - - # Since neither the origin nor the destination of the points generated lie - # within the threshold, there should be no binning at all. All the bins should - # have size 1. - - at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model1.bins.values())) - self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it") - - at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model2.bins.values())) - self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it") - - at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model3.bins.values())) - self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it") - - - - def testBinningByOrigin(self): - """ - Tests the 'origin' based binning method for trips. - - When only the origin points of trips are within a threshold - limit, trips must be binned together that too if binned based on - 'origins', otherwise no binning. - """ - - label_data = { - "mode_confirm": ['walk', 'bike', 'transit'], - "purpose_confirm": ['work', 'home', 'school'], - "replaced_mode": ['drive'] - } - - # generate $n trips. $m of them should have origin and destinations sampled - # within a radius that should have them binned. - n = 20 - m = 5 - - # trip_part: when mock trips are generated, coordinates of this part of - # m trips will be within the threshold. trip_part can take one - # among the four values: - # - # 1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie - # within the mentioned threshold when trips are generated), - # - # 2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned - # threshold when trips are generated), - # - # 3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the - # mentioned threshold when trips are generated) - # - # 4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips - # will lie within the mentioned threshold when trips are generated) + # generate $n trips. + n = 20 - trips = etmm.generate_mock_trips( - user_id="joe", - trips=n, - origin=(0, 0), - destination=(1, 1), - trip_part='o_', - label_data=label_data, - within_threshold=m, - threshold=0.001, # ~ 111 meters in degrees WGS84 - ) + #this generates 20 trips one-by-one, where each trip's respective origin and destination + # points are more than 500m away. + trips = [ etmu.setTripConfig(1, (i, i), (i+1, i+1), 'od', 1)[0] for i in range(n)] - # pass in a test configuration to the binning algorithm. - # - # clustering_way : Part of the trip used for checking pairwise proximity. - # Can take one of the three values: - # - # 1. 'origin' -> using origin of the trip to check if 2 points - # lie within the mentioned similarity_threshold_meters - # 2. 'destination' -> using destination of the trip to check if 2 points - # lie within the mentioned similarity_threshold_meters - # 3. 'origin-destination' -> both origin and destination of the trip to check - # if 2 points lie within the mentioned - # similarity_threshold_meters + # parameters passed for testing. A list, where each element is one way of clustering + clustering_ways_paramters= ["origin","destination","origin-destination"] - model1_config = { - "metric": "od_similarity", - "similarity_threshold_meters": 111, # meters, - "apply_cutoff": False, - "clustering_way": "origin", - "incremental_evaluation": False - } - model1 = eamtg.GreedySimilarityBinning(model1_config) - model1.fit(trips) - - - model2_config = { - "metric": "od_similarity", - "similarity_threshold_meters":111, # meters, - "apply_cutoff": False, - "clustering_way": "destination", - "incremental_evaluation": False - } - model2 = eamtg.GreedySimilarityBinning(model2_config) - model2.fit(trips) - - - model3_config = { - "metric": "od_similarity", - "similarity_threshold_meters": 111, # meters, - "apply_cutoff": False, - "clustering_way": "origin-destination", - "incremental_evaluation": False - } - model3 = eamtg.GreedySimilarityBinning(model3_config) - model3.fit(trips) - - - # Since only the origin of the points generated lies within the threshold, - # there should be binning only when 'origin' config is used. Otherwise all - # the bins should have size 1. - - at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model1.bins.values())) - self.assertTrue(at_least_one_large_bin, "at least one bin should have at least 5 features in it") - - at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) ==1, model2.bins.values())) - self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it") - - at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model3.bins.values())) - self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it") - - - - def testBinningByDestination(self): - """ - Tests the 'destination' based binning method for trips. - - When only the destination points of trips are within a threshold - limit, trips must be binned together that too if binned based on - 'destination', otherwise no binning. + #Testing each of the three clustering_ways by passing them as parameters + for cw in clustering_ways_paramters: + with self.subTest(clustering_way=cw): + #initialise the binning model and fit with previously generated trips + model = etmu.setModelConfig("od_similarity", 500, False, cw, False) + model.fit(trips) + #check each bins for no of trips + no_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model.bins.values())) + #Since all trips were sampled outside the threshold, there should be no bin + # with more then 1 trip + self.assertTrue(no_large_bin,"no bin should have more than 1 features in it") + + def testBinning(self): """ + Tests the three (origin, destination and origin-destination based) + binning configuration for trips. - label_data = { - "mode_confirm": ['walk', 'bike', 'transit'], - "purpose_confirm": ['work', 'home', 'school'], - "replaced_mode": ['drive'] - } - - # generate $n trips. $m of them should have origin and destinations sampled - # within a radius that should have them binned. - n = 20 - m = 5 - - # trip_part: when mock trips are generated, coordinates of this part of - # m trips will be within the threshold. trip_part can take one - # among the four values: - # - # 1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie - # within the mentioned threshold when trips are generated), - # - # 2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned - # threshold when trips are generated), - # - # 3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the - # mentioned threshold when trips are generated) - # - # 4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips - # will lie within the mentioned threshold when trips are generated) - - trips = etmm.generate_mock_trips( - user_id="joe", - trips=n, - origin=(0, 0), - destination=(1, 1), - trip_part='_d', - label_data=label_data, - within_threshold=m, - threshold=0.001, # ~ 111 meters in degrees WGS84 - ) - - # pass in a test configuration to the binning algorithm. - # - # clustering_way : Part of the trip used for checking pairwise proximity. - # Can take one of the three values: - # - # 1. 'origin' -> using origin of the trip to check if 2 points - # lie within the mentioned similarity_threshold_meters - # 2. 'destination' -> using destination of the trip to check if 2 points - # lie within the mentioned similarity_threshold_meters - # 3. 'origin-destination' -> both origin and destination of the trip to check - # if 2 points lie within the mentioned - # similarity_threshold_meters - - model1_config = { - "metric": "od_similarity", - "similarity_threshold_meters": 111, # meters, - "apply_cutoff": False, - "clustering_way": "origin", - "incremental_evaluation": False - } - model1 = eamtg.GreedySimilarityBinning(model1_config) - model1.fit(trips) - - - model2_config = { - "metric": "od_similarity", - "similarity_threshold_meters":111, # meters, - "apply_cutoff": False, - "clustering_way": "destination", - "incremental_evaluation": False - } - model2 = eamtg.GreedySimilarityBinning(model2_config) - model2.fit(trips) - - - model3_config = { - "metric": "od_similarity", - "similarity_threshold_meters": 111, # meters, - "apply_cutoff": False, - "clustering_way": "origin-destination", - "incremental_evaluation": False - } - model3 = eamtg.GreedySimilarityBinning(model3_config) - model3.fit(trips) - - # Since only the destination of the points generated lies within the threshold, - # there should be binning only when 'destination' config is used. Otherwise all - # the bins should have size 1. - - at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model1.bins.values())) - self.assertTrue(at_least_one_large_bin, "at least one bin should have at least 5 features in it") - - at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) ==m, model2.bins.values())) - self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it") - - at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model3.bins.values())) - self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it") - - - def testBinningByOriginAndDestination(self): + When the points lie within threshold ,the trips are binned together. """ - Tests the 'origin-destination' based binning method for trips. - - When both the origin and destination points of trips are within - a threshold limit, trips will be binned together in all three (origin , - destination, origin-and-destinaiton) configurations. - """ - - label_data = { - "mode_confirm": ['walk', 'bike', 'transit'], - "purpose_confirm": ['work', 'home', 'school'], - "replaced_mode": ['drive'] - } - - # generate $n trips. $m of them should have origin and destinations sampled + # generate $n trips. $m of them should have origin sampled # within a radius that should have them binned. n = 20 m = 5 - # trip_part: when mock trips are generated, coordinates of this part of - # m trips will be within the threshold. trip_part can take one - # among the four values: - # - # 1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie - # within the mentioned threshold when trips are generated), - # - # 2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned - # threshold when trips are generated), - # - # 3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the - # mentioned threshold when trips are generated) - # - # 4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips - # will lie within the mentioned threshold when trips are generated) - - trips = etmm.generate_mock_trips( - user_id="joe", - trips=n, - origin=(0, 0), - destination=(1, 1), - trip_part='od', - label_data=label_data, - within_threshold=m, - threshold=0.001, # ~ 111 meters in degrees WGS84 - ) - - # pass in a test configuration to the binning algorithm. - # - # clustering_way : Part of the trip used for checking pairwise proximity. - # Can take one of the three values: - # - # 1. 'origin' -> using origin of the trip to check if 2 points - # lie within the mentioned similarity_threshold_meters - # 2. 'destination' -> using destination of the trip to check if 2 points - # lie within the mentioned similarity_threshold_meters - # 3. 'origin-destination' -> both origin and destination of the trip to check - # if 2 points lie within the mentioned - # similarity_threshold_meters - - model1_config = { - "metric": "od_similarity", - "similarity_threshold_meters": 111, # meters, - "apply_cutoff": False, - "clustering_way": "origin", - "incremental_evaluation": False - } - model1 = eamtg.GreedySimilarityBinning(model1_config) - model1.fit(trips) - - - model2_config = { - "metric": "od_similarity", - "similarity_threshold_meters":111, # meters, - "apply_cutoff": False, - "clustering_way": "destination", - "incremental_evaluation": False - } - model2 = eamtg.GreedySimilarityBinning(model2_config) - model2.fit(trips) - - - model3_config = { - "metric": "od_similarity", - "similarity_threshold_meters": 111, # meters, - "apply_cutoff": False, - "clustering_way": "origin-destination", - "incremental_evaluation": False - } - model3 = eamtg.GreedySimilarityBinning(model3_config) - model3.fit(trips) - - # Since both the origin and the destination points of the generated trips lie - # within the threshold, there should be binning in all three configs. - - at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model1.bins.values())) - self.assertTrue(at_least_one_large_bin, "at least one bin should have at least 5 features in it") - - at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) ==m, model2.bins.values())) - self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it") - - at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model3.bins.values())) - self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it") - + # parameters passed for testing. A list, where each element of this list takes the form + # [trip part to be sampled within mentioned threshold , clustering way used to check similarity] + parameters= [["o_",'origin'],["_d",'destination'],["od",'origin-destination']] + for tp,cw in parameters: + with self.subTest(trip_part=tp,clustering_way=cw): + #generate random trips using utilities + trips = etmu.setTripConfig(trips=n, org=(0, 0), dest=(1, 1), + trip_part=tp, within_thr=m) + #initialise the binning model and fit with previously generated trips + model = etmu.setModelConfig("od_similarity", 500, False, cw, False) + model.fit(trips) + #check each bins for no of trips + at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model.bins.values())) + #Since 5 trips were sampled within the threshold, there should be one bin with 5 trips + self.assertTrue(at_least_one_large_bin, "at least one bin should have at least 5 features in it") def testPrediction(self): """ @@ -449,24 +81,10 @@ def testPrediction(self): } n = 6 - trips = etmm.generate_mock_trips( - user_id="joe", - trips=n, - origin=(0, 0), - destination=(1, 1), - trip_part='od', - label_data=label_data, - threshold=0.001, # ~ 111 meters in degrees WGS84 + trips = etmu.setTripConfig(trips=n, org=(0, 0), dest=(1, 1), + trip_part='od', label_data=label_data, ) - - model_config = { - "metric": "od_similarity", - "similarity_threshold_meters": 500, # meters, - "apply_cutoff": False, - "clustering_way": "origin-destination", - "incremental_evaluation": False - } - model = eamtg.GreedySimilarityBinning(model_config) + model = etmu.setModelConfig("od_similarity", 500, False, "origin-destination", False) train = trips[0:5] test = trips[5] @@ -486,36 +104,17 @@ def testNoPrediction(self): "purpose_confirm": ['pizza_party'], "replaced_mode": ['crabwalking'] } - n = 5 - train = etmm.generate_mock_trips( - user_id="joe", - trips=n, - origin=(39.7645187, -104.9951944), # Denver, CO - destination=(39.7435206, -105.2369292), # Golden, CO - trip_part='od', - label_data=label_data, - threshold=0.001, # ~ 111 meters in degrees WGS84 + + train = etmu.setTripConfig(trips=n, org=(39.7645187, -104.9951944), # Denver, CO + dest=(39.7435206, -105.2369292), # Golden, CO + trip_part='od', label_data=label_data ) - test = etmm.generate_mock_trips( - user_id="joe", - trips=1, - origin=(61.1042262, -150.5611644), # Anchorage, AK - destination=(62.2721466, -150.3233046), # Talkeetna, AK - trip_part='od', - label_data=label_data, - threshold=0.001, # ~ 111 meters in degrees WGS84 + test = etmu.setTripConfig(trips=n, org=(61.1042262, -150.5611644), # Denver, CO + dest=(62.2721466, -150.3233046), # Golden, CO + trip_part='od', label_data=label_data, ) - - model_config = { - "metric": "od_similarity", - "similarity_threshold_meters": 500, # meters, - "apply_cutoff": False, - "clustering_way": "origin-destination", - "incremental_evaluation": False - } - model = eamtg.GreedySimilarityBinning(model_config) - + model = etmu.setModelConfig("od_similarity", 500, False, "origin-destination", False) model.fit(train) results, n = model.predict(test[0]) diff --git a/emission/tests/modellingTests/TestSimilarityMetric.py b/emission/tests/modellingTests/TestSimilarityMetric.py index f7c7b195f..cbe500b23 100644 --- a/emission/tests/modellingTests/TestSimilarityMetric.py +++ b/emission/tests/modellingTests/TestSimilarityMetric.py @@ -1,98 +1,61 @@ import unittest -import emission.tests.modellingTests.modellingTestAssets as etmm import emission.analysis.modelling.similarity.od_similarity as eamso +import emission.tests.modellingTests.utilities as etmu class TestSimilarityMetric(unittest.TestCase): def testODsAreSimilar(self): generate_points_thresh = 0.001 # approx. 111 meters - similarity_threshold = 111 # - + similarity_threshold = 500 # in meters metric = eamso.OriginDestinationSimilarity() - ## Sub-Test 1 - 3 : - # random, but, origin and destination points are sampled within a circle and should always be < sim threshold - # Since both origin and destination poitns lie within threshold limits,they should be similar - # when we check by just origin or just destination or both origin-and-destination - - trips = etmm.generate_mock_trips('bob', 2, [0, 0], [1, 1], 'od',threshold=generate_points_thresh) - coords0 = metric.extract_features(trips[0]) - coords1 = metric.extract_features(trips[1]) - similarOD1 = metric.similar(coords0, coords1, similarity_threshold) - similarOD2 = metric.similar(coords0[:2], coords1[:2], similarity_threshold) - similarOD3 = metric.similar(coords0[2:], coords1[2:], similarity_threshold) - - ## Sub-Test 4 : - # random, but, only origin points are sampled within a circle and should always be < sim threshold - # Since origin of two points lies within threshold limits,they should be similar - # when we check just origin for similarity. - - - trips = etmm.generate_mock_trips('alice', 2, [0, 0], [1, 1], 'o_',threshold=generate_points_thresh) - coords0 = metric.extract_features(trips[0])[:2] - coords1 = metric.extract_features(trips[1])[:2] - similarO = metric.similar(coords0, coords1, similarity_threshold) - - ##Sub-Test 5 : - # random, but, only destination points are sampled within a circle and should always be < sim threshold - # Since destination of two points lies within threshold limits,they should be similar - # when we check just destination for similarity. - - trips = etmm.generate_mock_trips('Caty', 2, [0, 0], [1, 1], '_d',threshold=generate_points_thresh) - coords0 = metric.extract_features(trips[0])[2:] - coords1 = metric.extract_features(trips[1])[2:] - similarD = metric.similar(coords0, coords1, similarity_threshold) - # All the similars must be true - self.assertTrue(similarOD1) # RESULT SUB-TEST 1 - self.assertTrue(similarOD2) # RESULT SUB-TEST 2 - self.assertTrue(similarOD3) # RESULT SUB-TEST 3 - self.assertTrue(similarO) # RESULT SUB-TEST 4 - self.assertTrue(similarD) # RESULT SUB-TEST 5 + # parameters passed for testing is set here. A list, where each element of this list takes the form + # [trip part to be sampled within mentioned threshold, (start_coord,end_coord)] + # Since the extracted_features function returns in the form [origin_lat,origin_long,destination_lat,destination_long], + # if clustering is to be done by : + # a.origin, we pass first two values of this list,i.e. from 0 till before 2 index + # b.destination, we pas last two values of this list,i.e. from 2 till before 4 index + # c.origin-destination, we pass the entire list , i.e. from 0 till before 4 index + parameters= [["od",(0,4)],["_d",(2,4)],["o_",(0,2)]] + + for tp,(coord_start,coord_end) in parameters: + with self.subTest(trip_part=tp): + #generate 2 trips with parameter values + trips = etmu.setTripConfig(2, [0, 0], [1, 1], trip_part=tp,threshold=generate_points_thresh) + # depending on the parametrs, extract the relevant coordinates + trip0_coords = metric.extract_features(trips[0])[coord_start:coord_end] + trip1_coords = metric.extract_features(trips[1])[coord_start:coord_end] + #check for similarity using relevant coordinates + similarOD = metric.similar(trip0_coords,trip1_coords, similarity_threshold) + # Since both origin and destination poitns lie within threshold limits,they should be similar + # when we check by just origin or just destination or both origin-and-destination + self.assertTrue(similarOD) def testODsAreNotSimilar(self): - generate_points_thresh = 0.001 # approx. 111 meters - similarity_threshold = 111 # - metric = eamso.OriginDestinationSimilarity() - - ## Sub-Test 1-2: - # Two trips with neither origin nor destination coordinates within threshold - # must not be similar in any configuration of similarity testing. - trips = etmm.generate_mock_trips('bob', 2, [0, 0], [1, 1], '__', threshold=generate_points_thresh) - coords0 = metric.extract_features(trips[0]) - coords1 = metric.extract_features(trips[1]) - similar11 = metric.similar(coords0[:2], coords1[:2], similarity_threshold) - similar12 = metric.similar(coords0[2:], coords1[:], similarity_threshold) - - ## Sub-Test 3-4: - # Two trips with origin coordinates within threshold but we check - # similarity using destination coordinates or origin-and-destination - # should not be similar. - trips = etmm.generate_mock_trips('Alice', 2, [2, 2], [3, 3], 'o_', threshold=generate_points_thresh) + similarity_threshold = 500 metric = eamso.OriginDestinationSimilarity() - coords0 = metric.extract_features(trips[0]) - coords1 = metric.extract_features(trips[1]) - similar21 = metric.similar(coords0[2:], coords1[2:], similarity_threshold) - similar22 = metric.similar(coords0, coords1, similarity_threshold) - - ## Sub-Test 5-6: - # Two trips with destination coordinates within threshold but we check - # similarity using origin coordinates or origin-and-destination - # should not be similar. - trips = etmm.generate_mock_trips('Caty', 2, [3, 3], [4, 4], '_d', threshold=generate_points_thresh) - metric = eamso.OriginDestinationSimilarity() - coords0 = metric.extract_features(trips[0]) - coords1 = metric.extract_features(trips[1]) - similar31 = metric.similar(coords0[:2], coords1[:2], similarity_threshold) - similar32 = metric.similar(coords0, coords1, similarity_threshold) - - # All the similars must be False - self.assertFalse(similar11) # RESULT SUB-TEST 1 - self.assertFalse(similar12) # RESULT SUB-TEST 2 - self.assertFalse(similar21) # RESULT SUB-TEST 3 - self.assertFalse(similar22) # RESULT SUB-TEST 4 - self.assertFalse(similar31) # RESULT SUB-TEST 5 - self.assertFalse(similar32) # RESULT SUB-TEST 6 + # parameters passed for testing is set. A list, where each element of this list takes the form + # [(start_coord,end_coord)] + # Since the extracted_features function return in the form [origin_lat,origin_long,destination_lat,destination_long], + # if clustering shouldn't happend, then + # a.origin, we pass first two values of this list,i.e. from 0 till before 2 index + # b.destination, we pas last two values of this list,i.e. from 2 till before 4 index + # c.origin-destination, we pass the entire list , i.e. from 0 till before 4 index + parameters= [(0,2),(2,4),[0,4]] + n=2 + #this generates 2 trips one-by-one, where each trip's respective origin and destination + # points are more than 500m away. + trips = [etmu.setTripConfig(1, (i, i), (i+1, i+1), 'od', 1)[0] for i in range(n)] + trip0_coord = metric.extract_features(trips[0]) + trip1_coord = metric.extract_features(trips[1]) + + for (coord_start,coord_end) in parameters: + with self.subTest(coordinates=(coord_start,coord_end)): + IsSimilar = metric.similar(trip0_coord[coord_start:coord_end],trip1_coord[coord_start:coord_end], similarity_threshold) + # Two trips with neither origin nor destination coordinates within the threshold + # must not be similar by any configuration of similarity testing. + self.assertFalse(IsSimilar) if __name__ == '__main__': unittest.main() diff --git a/emission/tests/modellingTests/utilities.py b/emission/tests/modellingTests/utilities.py new file mode 100644 index 000000000..9f03358bb --- /dev/null +++ b/emission/tests/modellingTests/utilities.py @@ -0,0 +1,68 @@ +import emission.analysis.modelling.trip_model.greedy_similarity_binning as eamtg +import emission.tests.modellingTests.modellingTestAssets as etmm + +def setModelConfig(metric,threshold,cutoff,clustering_way,incrementalevaluation): + """ + TODO : tell about each param. + pass in a test configuration to the binning algorithm. + + clustering_way : Part of the trip used for checking pairwise proximity. + Can take one of the three values: + + 1. 'origin' -> using origin of the trip to check if 2 points + lie within the mentioned similarity_threshold_meters + 2. 'destination' -> using destination of the trip to check if 2 points + lie within the mentioned similarity_threshold_meters + 3. 'origin-destination' -> both origin and destination of the trip to check + if 2 points lie within the mentioned + similarity_threshold_meters + """ + model_config = { + "metric": metric, + "similarity_threshold_meters": threshold, # meters, + "apply_cutoff": cutoff, + "clustering_way": clustering_way, + "incremental_evaluation": incrementalevaluation + } + + return eamtg.GreedySimilarityBinning(model_config) + + +def setTripConfig(trips,org,dest,trip_part,within_thr=None,label_data=None,threshold=0.001): + """ + TODO: Tell about each + trip_part: when mock trips are generated, coordinates of this part of + m trips will be within the threshold. trip_part can take one + among the four values: + + 1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie + within the mentioned threshold when trips are generated), + + 2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned + threshold when trips are generated), + + 3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the + mentioned threshold when trips are generated) + + 4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips + will lie within the mentioned threshold when trips are generated) + """ + if label_data == None: + label_data = { + "mode_confirm": ['walk', 'bike', 'transit'], + "purpose_confirm": ['work', 'home', 'school'], + "replaced_mode": ['drive'] + } + + trip =etmm.generate_mock_trips( + user_id="joe", + trips=trips, + origin=org, + destination=dest, + trip_part=trip_part, + label_data=label_data, + within_threshold=within_thr, + threshold=threshold, + ) + return trip + \ No newline at end of file