Testing upgraded Similarity functionality

Checking `Similarity` behaves as expected when list of size 2 ( for only origin OR only destination ) or size 4 (for origin AND destination) are passed.
e-mission · Aug 31, 2023 · 6d9ea77 · 6d9ea77
1 parent 710d1a5
commit 6d9ea77
Show file tree

Hide file tree

Showing 5 changed files with 92 additions and 15 deletions.
diff --git a/emission/tests/modellingTests/TestBackwardsCompat.py b/emission/tests/modellingTests/TestBackwardsCompat.py
@@ -59,6 +59,7 @@ def testAnyVsAllWhilePredicting(self):
             "metric": "od_similarity",
             "similarity_threshold_meters": 16000,      # meters,
             "apply_cutoff": False,
+            "clustering_way": 'origin-destination',
             "incremental_evaluation": False
         }
         new_builder = eamtg.GreedySimilarityBinning(model_config)
@@ -96,6 +97,7 @@ def testRandomTripsWithinTheSameThreshold(self):
             trips=n, 
             origin=(0, 0), 
             destination=(1, 1), 
+            trip_part='od',
             label_data=label_data, 
             threshold=0.001,  # ~ 111 meters in degrees WGS84
         )
@@ -113,6 +115,7 @@ def testRandomTripsWithinTheSameThreshold(self):
             "metric": "od_similarity",
             "similarity_threshold_meters": 500,      # meters,
             "apply_cutoff": False,
+            "clustering_way": 'origin-destination',
             "incremental_evaluation": False
         }
         new_model = eamtg.GreedySimilarityBinning(model_config)
@@ -156,6 +159,7 @@ def testRandomTripsOutsideTheSameThreshold(self):
             trips=n, 
             origin=(0, 0), 
             destination=(1, 1), 
+            trip_part='od', 
             label_data=label_data, 
             threshold=0.1,  # Much bigger than the 500m threshold, so we will get multiple bins
         )
@@ -173,6 +177,7 @@ def testRandomTripsOutsideTheSameThreshold(self):
             "metric": "od_similarity",
             "similarity_threshold_meters": 500,      # meters,
             "apply_cutoff": False,
+            "clustering_way": 'origin-destination',
             "incremental_evaluation": False
         }
         new_model = eamtg.GreedySimilarityBinning(model_config)

diff --git a/emission/tests/modellingTests/TestRunGreedyIncrementalModel.py b/emission/tests/modellingTests/TestRunGreedyIncrementalModel.py
@@ -44,6 +44,7 @@ def setUp(self):
             "metric": "od_similarity",
             "similarity_threshold_meters": sim_threshold,
             "apply_cutoff": False,
+            "clustering_way": 'origin-destination',
             "incremental_evaluation": True
         }
 
@@ -162,6 +163,7 @@ def testIncrementalRun(self):
             trips=self.new_trips_per_invocation,
             origin=self.origin,
             destination=self.destination,
+            trip_part='od',
             label_data=label_data,
             threshold=0.0001, # ~10m,
             start_ts=time.time() - 20,

diff --git a/emission/tests/modellingTests/TestRunGreedyModel.py b/emission/tests/modellingTests/TestRunGreedyModel.py
@@ -62,6 +62,7 @@ def setUp(self):
                 trips=self.total_trips,
                 origin=self.origin,
                 destination=self.destination,
+                trip_part='od',
                 label_data=label_data,
                 within_threshold=self.clustered_trips,  
                 threshold=0.004, # ~400m
@@ -106,6 +107,7 @@ def testTrainGreedyModelWithZeroTrips(self):
             "metric": "od_similarity",
             "similarity_threshold_meters": 500,
             "apply_cutoff": False,
+            "clustering_way": 'origin-destination',
             "incremental_evaluation": False
         }
 
@@ -142,6 +144,7 @@ def test1RoundTripGreedySimilarityBinning(self):
             "metric": "od_similarity",
             "similarity_threshold_meters": 500,
             "apply_cutoff": False,
+            "clustering_way": 'origin-destination',
             "incremental_evaluation": False
         }
 

diff --git a/emission/tests/modellingTests/TestSimilarityMetric.py b/emission/tests/modellingTests/TestSimilarityMetric.py
@@ -6,26 +6,93 @@ class TestSimilarityMetric(unittest.TestCase):
 
     def testODsAreSimilar(self):
         generate_points_thresh = 0.001  # approx. 111 meters
-        similarity_threshold = 500  # 
-        # random, but, points are sampled within a circle and should always be < sim threshold
-        trips = etmm.generate_mock_trips('bob', 2, [0, 0], [1, 1], threshold=generate_points_thresh)
+        similarity_threshold = 111  # 
+
         metric = eamso.OriginDestinationSimilarity()
+        ## Sub-Test 1 - 3 :
+        # random, but, origin and destination points are sampled within a circle and should always be < sim threshold
+        # Since both origin and destination poitns lie within threshold limits,they should be similar
+        # when we check by just origin or just destination or both origin-and-destination
+
+        trips = etmm.generate_mock_trips('bob', 2, [0, 0], [1, 1], 'od',threshold=generate_points_thresh) 
         coords0 = metric.extract_features(trips[0])
-        coords1 = metric.extract_features(trips[1])
-        similar = metric.similar(coords0, coords1, similarity_threshold)
-        self.assertTrue(similar)
+        coords1 = metric.extract_features(trips[1])        
+        similarOD1 = metric.similar(coords0, coords1, similarity_threshold)
+        similarOD2 = metric.similar(coords0[:2], coords1[:2], similarity_threshold)
+        similarOD3 = metric.similar(coords0[2:], coords1[2:], similarity_threshold)
+
+        ## Sub-Test 4 :
+        # random, but, only origin points are sampled within a circle and should always be < sim threshold
+        # Since origin of two points lies within threshold limits,they should be similar
+        # when we check just origin for similarity.
+
+
+        trips = etmm.generate_mock_trips('alice', 2, [0, 0], [1, 1], 'o_',threshold=generate_points_thresh)        
+        coords0 = metric.extract_features(trips[0])[:2]
+        coords1 = metric.extract_features(trips[1])[:2]        
+        similarO = metric.similar(coords0, coords1, similarity_threshold)
+
+        ##Sub-Test 5 :
+        # random, but, only destination points are sampled within a circle and should always be < sim threshold
+        # Since destination of two points lies within threshold limits,they should be similar
+        # when we check just destination for similarity.
+
+        trips = etmm.generate_mock_trips('Caty', 2, [0, 0], [1, 1], '_d',threshold=generate_points_thresh)        
+        coords0 = metric.extract_features(trips[0])[2:]
+        coords1 = metric.extract_features(trips[1])[2:]        
+        similarD = metric.similar(coords0, coords1, similarity_threshold)
+
+        # All the similars must be true
+        self.assertTrue(similarOD1) # RESULT SUB-TEST 1
+        self.assertTrue(similarOD2) # RESULT SUB-TEST 2
+        self.assertTrue(similarOD3) # RESULT SUB-TEST 3
+        self.assertTrue(similarO)  # RESULT SUB-TEST 4
+        self.assertTrue(similarD) # RESULT SUB-TEST 5
 
     def testODsAreNotSimilar(self):
         generate_points_thresh = 0.001  # approx. 111 meters
-        similarity_threshold = 500  # 
-
-        trips0 = etmm.generate_mock_trips('bob', 1, [0, 0], [1, 1], threshold=generate_points_thresh)
-        trips1 = etmm.generate_mock_trips('alice', 1, [2, 2], [3, 3], threshold=generate_points_thresh)
+        similarity_threshold = 111  # 
+        metric = eamso.OriginDestinationSimilarity()
+
+        ## Sub-Test 1-2: 
+        # Two trips with neither origin nor destination coordinates within threshold
+        # must not be similar in any configuration of similarity testing.
+        trips = etmm.generate_mock_trips('bob', 2, [0, 0], [1, 1], '__', threshold=generate_points_thresh)  
+        coords0 = metric.extract_features(trips[0])
+        coords1 = metric.extract_features(trips[1])
+        similar11 = metric.similar(coords0[:2], coords1[:2], similarity_threshold)
+        similar12 = metric.similar(coords0[2:], coords1[:], similarity_threshold)
+
+        ## Sub-Test 3-4: 
+        # Two trips with  origin coordinates within threshold but we check  
+        # similarity using destination coordinates or origin-and-destination
+        # should not be similar.
+        trips = etmm.generate_mock_trips('Alice', 2, [2, 2], [3, 3], 'o_', threshold=generate_points_thresh)
         metric = eamso.OriginDestinationSimilarity()
-        coords0 = metric.extract_features(trips0[0])
-        coords1 = metric.extract_features(trips1[0])
-        similar = metric.similar(coords0, coords1, similarity_threshold)
-        self.assertFalse(similar)
+        coords0 = metric.extract_features(trips[0])
+        coords1 = metric.extract_features(trips[1])
+        similar21 = metric.similar(coords0[2:], coords1[2:], similarity_threshold)
+        similar22 = metric.similar(coords0, coords1, similarity_threshold)
+
+        ## Sub-Test 5-6: 
+        # Two trips with destination coordinates within threshold but we check 
+        # similarity using origin coordinates or origin-and-destination 
+        # should not be similar.        
+        trips = etmm.generate_mock_trips('Caty', 2, [3, 3], [4, 4], '_d', threshold=generate_points_thresh)
+        metric = eamso.OriginDestinationSimilarity()
+        coords0 = metric.extract_features(trips[0])
+        coords1 = metric.extract_features(trips[1])
+        similar31 = metric.similar(coords0[:2], coords1[:2], similarity_threshold)
+        similar32 = metric.similar(coords0, coords1, similarity_threshold)
+
+        # All the similars must be False
+        self.assertFalse(similar11) # RESULT SUB-TEST 1
+        self.assertFalse(similar12) # RESULT SUB-TEST 2
+        self.assertFalse(similar21) # RESULT SUB-TEST 3
+        self.assertFalse(similar22) # RESULT SUB-TEST 4
+        self.assertFalse(similar31) # RESULT SUB-TEST 5
+        self.assertFalse(similar32) # RESULT SUB-TEST 6
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/emission/tests/modellingTests/modellingTestAssets.py b/emission/tests/modellingTests/modellingTestAssets.py
@@ -211,6 +211,6 @@ def generate_mock_trips(
         "purpose_confirm": ['work', 'home', 'school'],
         "replaced_mode": ['walk', 'bike', 'drive']
     }
-    result = generate_mock_trips('joe-bob', 14, [0, 0], [1,1], label_data, 6)
+    result = generate_mock_trips('joe-bob', 14, [0, 0], [1,1],'od', label_data, 6)
     for r in result:
         print(r)