From 7f6d7542b5561861e5edfd0e27435cb11564078a Mon Sep 17 00:00:00 2001
From: $aTyam <satyam.saini@rutgers.edu>
Date: Tue, 5 Sep 2023 16:02:28 -0400
Subject: [PATCH] Correcting logic behind binning and non-binning while testing

1. improved logic based on this comment . https://github.com/e-mission/e-mission-server/pull/933/commits/710d1a5791212b540b883dedd5613a562071edc9#r1314065502

2.Created a utilities file for repetitive code required by multiple files.

3. clustering threshold back to 500

4. More in-code comments.
---
 .../TestGreedySimilarityBinning.py            | 505 ++----------------
 .../modellingTests/TestSimilarityMetric.py    | 127 ++---
 emission/tests/modellingTests/utilities.py    |  68 +++
 3 files changed, 165 insertions(+), 535 deletions(-)
 create mode 100644 emission/tests/modellingTests/utilities.py

diff --git a/emission/tests/modellingTests/TestGreedySimilarityBinning.py b/emission/tests/modellingTests/TestGreedySimilarityBinning.py
index 620f2cf99..3e1cd78c2 100644
--- a/emission/tests/modellingTests/TestGreedySimilarityBinning.py
+++ b/emission/tests/modellingTests/TestGreedySimilarityBinning.py
@@ -1,6 +1,6 @@
 import unittest
 import emission.analysis.modelling.trip_model.greedy_similarity_binning as eamtg
-import emission.tests.modellingTests.modellingTestAssets as etmm
+import emission.tests.modellingTests.utilities as etmu
 import logging
 
 
@@ -15,428 +15,60 @@ def testNoBinning(self):
         Tests the three (origin, destination and origin-destination based) 
         binning configuration for trips.
 
-        When both the origin and destination points of trips are outside a threshold
+        When the origin and destination points of trips are outside a threshold
         limit, none of the trips should be binned with the other in any of the three 
         configs (origin, destination or origin-and-destination based).       
         """
 
-        label_data = {
-            "mode_confirm": ['walk', 'bike', 'transit'],
-            "purpose_confirm": ['work', 'home', 'school'],
-            "replaced_mode": ['drive']
-        }
-
-        # generate $n trips. $m of them should have origin and destinations sampled
-        # within a radius that should have them binned.
-        n = 20
-        m = 5
-        
-        # trip_part: when mock trips are generated, coordinates of this part of 
-        #            m trips will be within the threshold. trip_part can take one
-        #            among the four values:
-        #
-        #            1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie 
-        #             within the mentioned threshold when trips are generated),
-        #
-        #            2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned 
-        #             threshold when trips are generated),
-        #
-        #            3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the 
-        #             mentioned threshold when trips are generated)
-        #
-        #            4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips
-        #             will lie within the mentioned threshold when trips are generated)
-
-        trips = etmm.generate_mock_trips(
-            user_id="joe", 
-            trips=n, 
-            origin=(0, 0), 
-            destination=(1, 1),
-            trip_part='__',
-            label_data=label_data, 
-            within_threshold=m, 
-            threshold=0.001,  # ~ 111 meters in degrees WGS84
-        )
-    
-
-        # pass in a test configuration to the binning algorithm.
-        #
-        # clustering_way : Part of the trip used for checking pairwise proximity.
-        #                  Can take one of the three values:
-        #                  
-        #                   1. 'origin' -> using origin of the trip to check if 2 points
-        #                                   lie within the mentioned similarity_threshold_meters
-        #                   2. 'destination' -> using destination of the trip to check if 2 points
-        #                                       lie within the mentioned similarity_threshold_meters
-        #                   3. 'origin-destination' -> both origin and destination of the trip to check 
-        #                                             if 2 points lie within the mentioned 
-        #                                              similarity_threshold_meters
-        
-        model1_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters": 111,  # meters,
-            "apply_cutoff": False,
-            "clustering_way": "origin",  
-            "incremental_evaluation": False
-        }
-        model1 = eamtg.GreedySimilarityBinning(model1_config)
-        model1.fit(trips)
-
-
-        model2_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters":111,  # meters,
-            "apply_cutoff": False,
-            "clustering_way": "destination",
-            "incremental_evaluation": False
-        }
-        model2 = eamtg.GreedySimilarityBinning(model2_config)
-        model2.fit(trips)
-
-
-        model3_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters": 111,  # meters,
-            "apply_cutoff": False,
-            "clustering_way": "origin-destination",
-            "incremental_evaluation": False
-        }
-        model3 = eamtg.GreedySimilarityBinning(model3_config)
-        model3.fit(trips)
-
-        # Since neither the origin nor the destination of the points generated lie
-        # within the threshold, there should be no binning at all. All the bins should
-        # have size 1.
-
-        at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model1.bins.values()))
-        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
-
-        at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model2.bins.values()))
-        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
-
-        at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model3.bins.values()))
-        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
-
-
-
-    def testBinningByOrigin(self):
-        """
-        Tests the 'origin' based binning method for trips.
-
-        When only the origin points of trips are within a threshold
-        limit, trips must be binned together that too if binned based on 
-        'origins', otherwise no binning.       
-        """
-
-        label_data = {
-            "mode_confirm": ['walk', 'bike', 'transit'],
-            "purpose_confirm": ['work', 'home', 'school'],
-            "replaced_mode": ['drive']
-        }
-
-        # generate $n trips. $m of them should have origin and destinations sampled
-        # within a radius that should have them binned.
-        n = 20
-        m = 5
-
-        # trip_part: when mock trips are generated, coordinates of this part of 
-        #            m trips will be within the threshold. trip_part can take one
-        #            among the four values:
-        #
-        #            1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie 
-        #             within the mentioned threshold when trips are generated),
-        #
-        #            2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned 
-        #             threshold when trips are generated),
-        #
-        #            3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the 
-        #             mentioned threshold when trips are generated)
-        #
-        #            4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips
-        #             will lie within the mentioned threshold when trips are generated)
+        # generate $n trips.
+        n = 20     
 
-        trips = etmm.generate_mock_trips(
-            user_id="joe", 
-            trips=n, 
-            origin=(0, 0), 
-            destination=(1, 1),
-            trip_part='o_',
-            label_data=label_data, 
-            within_threshold=m, 
-            threshold=0.001,  # ~ 111 meters in degrees WGS84
-        )
+        #this generates 20 trips one-by-one, where each trip's respective origin and destination 
+        # points are more than 500m away.
+        trips = [ etmu.setTripConfig(1, (i, i), (i+1, i+1), 'od', 1)[0] for i in range(n)]    
 
-        # pass in a test configuration to the binning algorithm.
-        #
-        # clustering_way : Part of the trip used for checking pairwise proximity.
-        #                  Can take one of the three values:
-        #                  
-        #                   1. 'origin' -> using origin of the trip to check if 2 points
-        #                                   lie within the mentioned similarity_threshold_meters
-        #                   2. 'destination' -> using destination of the trip to check if 2 points
-        #                                       lie within the mentioned similarity_threshold_meters
-        #                   3. 'origin-destination' -> both origin and destination of the trip to check 
-        #                                             if 2 points lie within the mentioned 
-        #                                              similarity_threshold_meters
+        # parameters passed for testing. A list, where each element is one way of clustering
+        clustering_ways_paramters= ["origin","destination","origin-destination"]
         
-        model1_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters": 111,  # meters,
-            "apply_cutoff": False,
-            "clustering_way": "origin",
-            "incremental_evaluation": False
-        }
-        model1 = eamtg.GreedySimilarityBinning(model1_config)
-        model1.fit(trips)
-
-
-        model2_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters":111,  # meters,
-            "apply_cutoff": False,
-            "clustering_way": "destination",
-            "incremental_evaluation": False
-        }
-        model2 = eamtg.GreedySimilarityBinning(model2_config)
-        model2.fit(trips)
-
-
-        model3_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters": 111,  # meters,
-            "apply_cutoff": False,
-            "clustering_way": "origin-destination",
-            "incremental_evaluation": False
-        }
-        model3 = eamtg.GreedySimilarityBinning(model3_config)
-        model3.fit(trips)
-        
-
-        # Since only the origin of the points generated lies within the threshold,
-        # there should be binning only when 'origin' config is used. Otherwise all 
-        # the bins should have size 1.
-
-        at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model1.bins.values()))
-        self.assertTrue(at_least_one_large_bin, "at least one bin should have at least 5 features in it")
-
-        at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) ==1, model2.bins.values()))
-        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
-
-        at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model3.bins.values()))
-        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
-
-
-
-    def testBinningByDestination(self):
-        """
-        Tests the 'destination' based binning method for trips.
-
-        When only the destination points of trips are within a threshold
-        limit, trips must be binned together that too if binned based on 
-        'destination', otherwise no binning.       
+        #Testing each of the three clustering_ways by passing them as parameters
+        for cw in clustering_ways_paramters:
+            with self.subTest(clustering_way=cw):
+                #initialise the binning model and fit with previously generated trips
+                model = etmu.setModelConfig("od_similarity",  500,  False, cw, False)
+                model.fit(trips)
+                #check each bins for no of trips
+                no_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model.bins.values()))
+                #Since all trips were sampled outside the threshold, there should be no bin
+                # with more then 1 trip
+                self.assertTrue(no_large_bin,"no bin should have more than 1 features in it")
+
+    def testBinning(self):
         """
+        Tests the three (origin, destination and origin-destination based) 
+        binning configuration for trips.
 
-        label_data = {
-            "mode_confirm": ['walk', 'bike', 'transit'],
-            "purpose_confirm": ['work', 'home', 'school'],
-            "replaced_mode": ['drive']
-        }
-
-        # generate $n trips. $m of them should have origin and destinations sampled
-        # within a radius that should have them binned.
-        n = 20
-        m = 5
-
-        # trip_part: when mock trips are generated, coordinates of this part of 
-        #            m trips will be within the threshold. trip_part can take one
-        #            among the four values:
-        #
-        #            1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie 
-        #             within the mentioned threshold when trips are generated),
-        #
-        #            2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned 
-        #             threshold when trips are generated),
-        #
-        #            3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the 
-        #             mentioned threshold when trips are generated)
-        #
-        #            4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips
-        #             will lie within the mentioned threshold when trips are generated)
-
-        trips = etmm.generate_mock_trips(
-            user_id="joe", 
-            trips=n, 
-            origin=(0, 0), 
-            destination=(1, 1),
-            trip_part='_d',
-            label_data=label_data, 
-            within_threshold=m, 
-            threshold=0.001,  # ~ 111 meters in degrees WGS84
-        )
-
-        # pass in a test configuration to the binning algorithm.
-        #
-        # clustering_way : Part of the trip used for checking pairwise proximity.
-        #                  Can take one of the three values:
-        #                  
-        #                   1. 'origin' -> using origin of the trip to check if 2 points
-        #                                   lie within the mentioned similarity_threshold_meters
-        #                   2. 'destination' -> using destination of the trip to check if 2 points
-        #                                       lie within the mentioned similarity_threshold_meters
-        #                   3. 'origin-destination' -> both origin and destination of the trip to check 
-        #                                             if 2 points lie within the mentioned 
-        #                                              similarity_threshold_meters
-        
-        model1_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters": 111,  # meters,
-            "apply_cutoff": False,
-            "clustering_way": "origin",
-            "incremental_evaluation": False
-        }
-        model1 = eamtg.GreedySimilarityBinning(model1_config)
-        model1.fit(trips)
-
-
-        model2_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters":111,  # meters,
-            "apply_cutoff": False,
-            "clustering_way": "destination",
-            "incremental_evaluation": False
-        }
-        model2 = eamtg.GreedySimilarityBinning(model2_config)
-        model2.fit(trips)
-
-
-        model3_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters": 111,  # meters,
-            "apply_cutoff": False,
-            "clustering_way": "origin-destination",
-            "incremental_evaluation": False
-        }
-        model3 = eamtg.GreedySimilarityBinning(model3_config)
-        model3.fit(trips)
-
-        # Since only the destination of the points generated lies within the threshold,
-        # there should be binning only when 'destination' config is used. Otherwise all 
-        # the bins should have size 1.
-
-        at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model1.bins.values()))
-        self.assertTrue(at_least_one_large_bin, "at least one bin should have at least 5 features in it")
-
-        at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) ==m, model2.bins.values()))
-        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
-
-        at_least_one_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model3.bins.values()))
-        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
-
-
-    def testBinningByOriginAndDestination(self):
+        When the points lie within threshold ,the trips are binned together.
         """
-        Tests the 'origin-destination' based binning method for trips.
-
-        When both the origin and destination points of trips are within
-        a threshold limit, trips will be binned together in all three (origin , 
-        destination, origin-and-destinaiton) configurations. 
-        """        
-
-        label_data = {
-            "mode_confirm": ['walk', 'bike', 'transit'],
-            "purpose_confirm": ['work', 'home', 'school'],
-            "replaced_mode": ['drive']
-        }
-
-        # generate $n trips. $m of them should have origin and destinations sampled
+        # generate $n trips. $m of them should have origin sampled
         # within a radius that should have them binned.
         n = 20
         m = 5
 
-        # trip_part: when mock trips are generated, coordinates of this part of 
-        #            m trips will be within the threshold. trip_part can take one
-        #            among the four values:
-        #
-        #            1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie 
-        #             within the mentioned threshold when trips are generated),
-        #
-        #            2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned 
-        #             threshold when trips are generated),
-        #
-        #            3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the 
-        #             mentioned threshold when trips are generated)
-        #
-        #            4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips
-        #             will lie within the mentioned threshold when trips are generated)
-
-        trips = etmm.generate_mock_trips(
-            user_id="joe", 
-            trips=n, 
-            origin=(0, 0), 
-            destination=(1, 1),
-            trip_part='od',
-            label_data=label_data, 
-            within_threshold=m, 
-            threshold=0.001,  # ~ 111 meters in degrees WGS84
-        )
-
-        # pass in a test configuration to the binning algorithm.
-        #
-        # clustering_way : Part of the trip used for checking pairwise proximity.
-        #                  Can take one of the three values:
-        #                  
-        #                   1. 'origin' -> using origin of the trip to check if 2 points
-        #                                   lie within the mentioned similarity_threshold_meters
-        #                   2. 'destination' -> using destination of the trip to check if 2 points
-        #                                       lie within the mentioned similarity_threshold_meters
-        #                   3. 'origin-destination' -> both origin and destination of the trip to check 
-        #                                             if 2 points lie within the mentioned 
-        #                                              similarity_threshold_meters
-        
-        model1_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters": 111,  # meters,
-            "apply_cutoff": False,
-            "clustering_way": "origin",
-            "incremental_evaluation": False
-        }
-        model1 = eamtg.GreedySimilarityBinning(model1_config)
-        model1.fit(trips)
-
-
-        model2_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters":111,  # meters,
-            "apply_cutoff": False,
-            "clustering_way": "destination",
-            "incremental_evaluation": False
-        }
-        model2 = eamtg.GreedySimilarityBinning(model2_config)
-        model2.fit(trips)
-
-
-        model3_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters": 111,  # meters,
-            "apply_cutoff": False,
-            "clustering_way": "origin-destination",
-            "incremental_evaluation": False
-        }
-        model3 = eamtg.GreedySimilarityBinning(model3_config)
-        model3.fit(trips)
-
-        # Since both the origin and the destination points of the generated trips lie 
-        # within the threshold, there should be binning in all three configs.
-
-        at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model1.bins.values()))
-        self.assertTrue(at_least_one_large_bin, "at least one bin should have at least 5 features in it")
-
-        at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) ==m, model2.bins.values()))
-        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
-
-        at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model3.bins.values()))
-        self.assertTrue(at_least_one_large_bin, "no bin should have more than 1 features in it")
-
+        # parameters passed for testing. A list, where each element of this list takes the form 
+        # [trip part to be sampled within mentioned threshold , clustering way used to check similarity]
+        parameters= [["o_",'origin'],["_d",'destination'],["od",'origin-destination']]
+        for tp,cw in parameters:
+            with self.subTest(trip_part=tp,clustering_way=cw):
+                #generate random trips using utilities
+                trips = etmu.setTripConfig(trips=n, org=(0, 0), dest=(1, 1),
+                                trip_part=tp, within_thr=m)
+                #initialise the binning model and fit with previously generated trips
+                model = etmu.setModelConfig("od_similarity",  500,  False, cw, False)
+                model.fit(trips)
+                #check each bins for no of trips
+                at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model.bins.values()))
+                #Since 5 trips were sampled within the threshold, there should be one bin with 5 trips
+                self.assertTrue(at_least_one_large_bin, "at least one bin should have at least 5 features in it")
 
     def testPrediction(self):
         """
@@ -449,24 +81,10 @@ def testPrediction(self):
         }
 
         n = 6
-        trips = etmm.generate_mock_trips(
-            user_id="joe", 
-            trips=n, 
-            origin=(0, 0), 
-            destination=(1, 1),
-            trip_part='od', 
-            label_data=label_data, 
-            threshold=0.001,  # ~ 111 meters in degrees WGS84
+        trips = etmu.setTripConfig(trips=n, org=(0, 0), dest=(1, 1),
+                                   trip_part='od', label_data=label_data,                                   
         )
-
-        model_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters": 500,      # meters,
-            "apply_cutoff": False,
-            "clustering_way": "origin-destination",
-            "incremental_evaluation": False
-        }
-        model = eamtg.GreedySimilarityBinning(model_config)
+        model = etmu.setModelConfig("od_similarity",  500,  False, "origin-destination", False)
         
         train = trips[0:5]
         test = trips[5]
@@ -486,36 +104,17 @@ def testNoPrediction(self):
             "purpose_confirm": ['pizza_party'],
             "replaced_mode": ['crabwalking']
         }
-
         n = 5
-        train = etmm.generate_mock_trips(
-            user_id="joe", 
-            trips=n, 
-            origin=(39.7645187, -104.9951944),       # Denver, CO
-            destination=(39.7435206, -105.2369292),  # Golden, CO
-            trip_part='od',
-            label_data=label_data, 
-            threshold=0.001,  # ~ 111 meters in degrees WGS84
+
+        train = etmu.setTripConfig(trips=n, org=(39.7645187, -104.9951944), # Denver, CO
+                                   dest=(39.7435206, -105.2369292),  # Golden, CO
+                                   trip_part='od', label_data=label_data                                 
         )
-        test = etmm.generate_mock_trips(
-            user_id="joe", 
-            trips=1, 
-            origin=(61.1042262, -150.5611644),       # Anchorage, AK
-            destination=(62.2721466, -150.3233046),  # Talkeetna, AK
-            trip_part='od',
-            label_data=label_data, 
-            threshold=0.001,  # ~ 111 meters in degrees WGS84
+        test = etmu.setTripConfig(trips=n, org=(61.1042262, -150.5611644), # Denver, CO
+                                   dest=(62.2721466, -150.3233046),  # Golden, CO
+                                   trip_part='od', label_data=label_data,                                   
         )
-
-        model_config = {
-            "metric": "od_similarity",
-            "similarity_threshold_meters": 500,      # meters,
-            "apply_cutoff": False,
-            "clustering_way": "origin-destination",
-            "incremental_evaluation": False
-        }
-        model = eamtg.GreedySimilarityBinning(model_config)
-
+        model = etmu.setModelConfig("od_similarity",  500,  False, "origin-destination", False)
         model.fit(train)
         results, n = model.predict(test[0])
 
diff --git a/emission/tests/modellingTests/TestSimilarityMetric.py b/emission/tests/modellingTests/TestSimilarityMetric.py
index f7c7b195f..cbe500b23 100644
--- a/emission/tests/modellingTests/TestSimilarityMetric.py
+++ b/emission/tests/modellingTests/TestSimilarityMetric.py
@@ -1,98 +1,61 @@
 import unittest
-import emission.tests.modellingTests.modellingTestAssets as etmm
 import emission.analysis.modelling.similarity.od_similarity as eamso
+import emission.tests.modellingTests.utilities as etmu
 
 class TestSimilarityMetric(unittest.TestCase):
 
     def testODsAreSimilar(self):
         generate_points_thresh = 0.001  # approx. 111 meters
-        similarity_threshold = 111  # 
-
+        similarity_threshold = 500  # in meters
         metric = eamso.OriginDestinationSimilarity()
-        ## Sub-Test 1 - 3 :
-        # random, but, origin and destination points are sampled within a circle and should always be < sim threshold
-        # Since both origin and destination poitns lie within threshold limits,they should be similar
-        # when we check by just origin or just destination or both origin-and-destination
-
-        trips = etmm.generate_mock_trips('bob', 2, [0, 0], [1, 1], 'od',threshold=generate_points_thresh) 
-        coords0 = metric.extract_features(trips[0])
-        coords1 = metric.extract_features(trips[1])        
-        similarOD1 = metric.similar(coords0, coords1, similarity_threshold)
-        similarOD2 = metric.similar(coords0[:2], coords1[:2], similarity_threshold)
-        similarOD3 = metric.similar(coords0[2:], coords1[2:], similarity_threshold)
-
-        ## Sub-Test 4 :
-        # random, but, only origin points are sampled within a circle and should always be < sim threshold
-        # Since origin of two points lies within threshold limits,they should be similar
-        # when we check just origin for similarity.
-
-
-        trips = etmm.generate_mock_trips('alice', 2, [0, 0], [1, 1], 'o_',threshold=generate_points_thresh)        
-        coords0 = metric.extract_features(trips[0])[:2]
-        coords1 = metric.extract_features(trips[1])[:2]        
-        similarO = metric.similar(coords0, coords1, similarity_threshold)
-
-        ##Sub-Test 5 :
-        # random, but, only destination points are sampled within a circle and should always be < sim threshold
-        # Since destination of two points lies within threshold limits,they should be similar
-        # when we check just destination for similarity.
-
-        trips = etmm.generate_mock_trips('Caty', 2, [0, 0], [1, 1], '_d',threshold=generate_points_thresh)        
-        coords0 = metric.extract_features(trips[0])[2:]
-        coords1 = metric.extract_features(trips[1])[2:]        
-        similarD = metric.similar(coords0, coords1, similarity_threshold)
 
-        # All the similars must be true
-        self.assertTrue(similarOD1) # RESULT SUB-TEST 1
-        self.assertTrue(similarOD2) # RESULT SUB-TEST 2
-        self.assertTrue(similarOD3) # RESULT SUB-TEST 3
-        self.assertTrue(similarO)  # RESULT SUB-TEST 4
-        self.assertTrue(similarD) # RESULT SUB-TEST 5
+        # parameters passed for testing is set here. A list, where each element of this list takes the form 
+        # [trip part to be sampled within mentioned threshold, (start_coord,end_coord)]
+        # Since the extracted_features function returns in the form [origin_lat,origin_long,destination_lat,destination_long],
+        # if clustering is to be done by :
+        #   a.origin, we pass first two values of this list,i.e. from 0 till before 2 index
+        #   b.destination, we pas last two values of this list,i.e. from 2 till before 4 index
+        #   c.origin-destination, we pass the entire list , i.e. from 0 till before 4 index
+        parameters= [["od",(0,4)],["_d",(2,4)],["o_",(0,2)]]
+
+        for tp,(coord_start,coord_end) in parameters:
+            with self.subTest(trip_part=tp):
+                #generate 2 trips with parameter values
+                trips = etmu.setTripConfig(2, [0, 0], [1, 1], trip_part=tp,threshold=generate_points_thresh) 
+                # depending on the parametrs, extract the relevant coordinates
+                trip0_coords = metric.extract_features(trips[0])[coord_start:coord_end]
+                trip1_coords = metric.extract_features(trips[1])[coord_start:coord_end]
+                #check for similarity using relevant coordinates
+                similarOD = metric.similar(trip0_coords,trip1_coords, similarity_threshold)
+                # Since both origin and destination poitns lie within threshold limits,they should be similar
+                # when we check by just origin or just destination or both origin-and-destination
+                self.assertTrue(similarOD)
     
     def testODsAreNotSimilar(self):
-        generate_points_thresh = 0.001  # approx. 111 meters
-        similarity_threshold = 111  # 
-        metric = eamso.OriginDestinationSimilarity()
-
-        ## Sub-Test 1-2: 
-        # Two trips with neither origin nor destination coordinates within threshold
-        # must not be similar in any configuration of similarity testing.
-        trips = etmm.generate_mock_trips('bob', 2, [0, 0], [1, 1], '__', threshold=generate_points_thresh)  
-        coords0 = metric.extract_features(trips[0])
-        coords1 = metric.extract_features(trips[1])
-        similar11 = metric.similar(coords0[:2], coords1[:2], similarity_threshold)
-        similar12 = metric.similar(coords0[2:], coords1[:], similarity_threshold)
-
-        ## Sub-Test 3-4: 
-        # Two trips with  origin coordinates within threshold but we check  
-        # similarity using destination coordinates or origin-and-destination
-        # should not be similar.
-        trips = etmm.generate_mock_trips('Alice', 2, [2, 2], [3, 3], 'o_', threshold=generate_points_thresh)
+        similarity_threshold = 500
         metric = eamso.OriginDestinationSimilarity()
-        coords0 = metric.extract_features(trips[0])
-        coords1 = metric.extract_features(trips[1])
-        similar21 = metric.similar(coords0[2:], coords1[2:], similarity_threshold)
-        similar22 = metric.similar(coords0, coords1, similarity_threshold)
-
-        ## Sub-Test 5-6: 
-        # Two trips with destination coordinates within threshold but we check 
-        # similarity using origin coordinates or origin-and-destination 
-        # should not be similar.        
-        trips = etmm.generate_mock_trips('Caty', 2, [3, 3], [4, 4], '_d', threshold=generate_points_thresh)
-        metric = eamso.OriginDestinationSimilarity()
-        coords0 = metric.extract_features(trips[0])
-        coords1 = metric.extract_features(trips[1])
-        similar31 = metric.similar(coords0[:2], coords1[:2], similarity_threshold)
-        similar32 = metric.similar(coords0, coords1, similarity_threshold)
-
-        # All the similars must be False
-        self.assertFalse(similar11) # RESULT SUB-TEST 1
-        self.assertFalse(similar12) # RESULT SUB-TEST 2
-        self.assertFalse(similar21) # RESULT SUB-TEST 3
-        self.assertFalse(similar22) # RESULT SUB-TEST 4
-        self.assertFalse(similar31) # RESULT SUB-TEST 5
-        self.assertFalse(similar32) # RESULT SUB-TEST 6
 
+        # parameters passed for testing is set. A list, where each element of this list takes the form 
+        # [(start_coord,end_coord)]
+        # Since the extracted_features function return in the form [origin_lat,origin_long,destination_lat,destination_long],
+        # if clustering shouldn't happend, then
+        #   a.origin, we pass first two values of this list,i.e. from 0 till before 2 index
+        #   b.destination, we pas last two values of this list,i.e. from 2 till before 4 index
+        #   c.origin-destination, we pass the entire list , i.e. from 0 till before 4 index
+        parameters= [(0,2),(2,4),[0,4]]
+        n=2
+        #this generates 2 trips one-by-one, where each trip's respective origin and destination 
+        # points are more than 500m away.
+        trips = [etmu.setTripConfig(1, (i, i), (i+1, i+1), 'od', 1)[0] for i in range(n)]    
+        trip0_coord = metric.extract_features(trips[0])
+        trip1_coord = metric.extract_features(trips[1])
+
+        for (coord_start,coord_end) in parameters:
+            with self.subTest(coordinates=(coord_start,coord_end)):      
+                IsSimilar = metric.similar(trip0_coord[coord_start:coord_end],trip1_coord[coord_start:coord_end], similarity_threshold)
+                # Two trips with neither origin nor destination coordinates within the threshold
+                # must not be similar by any configuration of similarity testing.
+                self.assertFalse(IsSimilar)
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/emission/tests/modellingTests/utilities.py b/emission/tests/modellingTests/utilities.py
new file mode 100644
index 000000000..9f03358bb
--- /dev/null
+++ b/emission/tests/modellingTests/utilities.py
@@ -0,0 +1,68 @@
+import emission.analysis.modelling.trip_model.greedy_similarity_binning as eamtg
+import emission.tests.modellingTests.modellingTestAssets as etmm
+
+def setModelConfig(metric,threshold,cutoff,clustering_way,incrementalevaluation):
+    """
+    TODO : tell about each param.
+    pass in a test configuration to the binning algorithm.
+    
+    clustering_way : Part of the trip used for checking pairwise proximity.
+                        Can take one of the three values:
+                        
+                        1. 'origin' -> using origin of the trip to check if 2 points
+                                        lie within the mentioned similarity_threshold_meters
+                        2. 'destination' -> using destination of the trip to check if 2 points
+                                            lie within the mentioned similarity_threshold_meters
+                        3. 'origin-destination' -> both origin and destination of the trip to check 
+                                                if 2 points lie within the mentioned 
+                                                    similarity_threshold_meters
+    """        
+    model_config = {
+        "metric": metric,
+        "similarity_threshold_meters": threshold,  # meters,
+        "apply_cutoff": cutoff,
+        "clustering_way": clustering_way,  
+        "incremental_evaluation": incrementalevaluation
+    }
+
+    return eamtg.GreedySimilarityBinning(model_config)
+
+
+def setTripConfig(trips,org,dest,trip_part,within_thr=None,label_data=None,threshold=0.001):
+    """
+    TODO: Tell about each
+                trip_part: when mock trips are generated, coordinates of this part of 
+                m trips will be within the threshold. trip_part can take one
+                among the four values:
+    
+                1. '__' ->(None, meaning NEITHER origin nor destination of any trip will lie 
+                within the mentioned threshold when trips are generated),
+    
+                2. 'o_' ->(origin, meaning ONLY origin of m trips will lie within the mentioned 
+                threshold when trips are generated),
+    
+                3. '_d' ->(destination),meaning ONLY destination of m trips will lie within the 
+                mentioned threshold when trips are generated)
+    
+                4. 'od' ->(origin and destination,meaning BOTH origin and destination of m trips
+                will lie within the mentioned threshold when trips are generated)
+    """
+    if label_data == None:            
+        label_data = {
+            "mode_confirm": ['walk', 'bike', 'transit'],
+            "purpose_confirm": ['work', 'home', 'school'],
+            "replaced_mode": ['drive']
+        }
+
+    trip =etmm.generate_mock_trips(
+            user_id="joe", 
+            trips=trips, 
+            origin=org, 
+            destination=dest,
+            trip_part=trip_part,
+            label_data=label_data, 
+            within_threshold=within_thr, 
+            threshold=threshold,  
+        )
+    return trip  
+    
\ No newline at end of file