Skip to content

Commit

Permalink
Testing upgraded Similarity functionality
Browse files Browse the repository at this point in the history
Checking `Similarity` behaves as expected when list of size 2 ( for only origin OR only destination ) or size 4 (for origin AND destination) are  passed.
  • Loading branch information
humbleOldSage committed Aug 31, 2023
1 parent 710d1a5 commit 6d9ea77
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 15 deletions.
5 changes: 5 additions & 0 deletions emission/tests/modellingTests/TestBackwardsCompat.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def testAnyVsAllWhilePredicting(self):
"metric": "od_similarity",
"similarity_threshold_meters": 16000, # meters,
"apply_cutoff": False,
"clustering_way": 'origin-destination',
"incremental_evaluation": False
}
new_builder = eamtg.GreedySimilarityBinning(model_config)
Expand Down Expand Up @@ -96,6 +97,7 @@ def testRandomTripsWithinTheSameThreshold(self):
trips=n,
origin=(0, 0),
destination=(1, 1),
trip_part='od',
label_data=label_data,
threshold=0.001, # ~ 111 meters in degrees WGS84
)
Expand All @@ -113,6 +115,7 @@ def testRandomTripsWithinTheSameThreshold(self):
"metric": "od_similarity",
"similarity_threshold_meters": 500, # meters,
"apply_cutoff": False,
"clustering_way": 'origin-destination',
"incremental_evaluation": False
}
new_model = eamtg.GreedySimilarityBinning(model_config)
Expand Down Expand Up @@ -156,6 +159,7 @@ def testRandomTripsOutsideTheSameThreshold(self):
trips=n,
origin=(0, 0),
destination=(1, 1),
trip_part='od',
label_data=label_data,
threshold=0.1, # Much bigger than the 500m threshold, so we will get multiple bins
)
Expand All @@ -173,6 +177,7 @@ def testRandomTripsOutsideTheSameThreshold(self):
"metric": "od_similarity",
"similarity_threshold_meters": 500, # meters,
"apply_cutoff": False,
"clustering_way": 'origin-destination',
"incremental_evaluation": False
}
new_model = eamtg.GreedySimilarityBinning(model_config)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def setUp(self):
"metric": "od_similarity",
"similarity_threshold_meters": sim_threshold,
"apply_cutoff": False,
"clustering_way": 'origin-destination',
"incremental_evaluation": True
}

Expand Down Expand Up @@ -162,6 +163,7 @@ def testIncrementalRun(self):
trips=self.new_trips_per_invocation,
origin=self.origin,
destination=self.destination,
trip_part='od',
label_data=label_data,
threshold=0.0001, # ~10m,
start_ts=time.time() - 20,
Expand Down
3 changes: 3 additions & 0 deletions emission/tests/modellingTests/TestRunGreedyModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def setUp(self):
trips=self.total_trips,
origin=self.origin,
destination=self.destination,
trip_part='od',
label_data=label_data,
within_threshold=self.clustered_trips,
threshold=0.004, # ~400m
Expand Down Expand Up @@ -106,6 +107,7 @@ def testTrainGreedyModelWithZeroTrips(self):
"metric": "od_similarity",
"similarity_threshold_meters": 500,
"apply_cutoff": False,
"clustering_way": 'origin-destination',
"incremental_evaluation": False
}

Expand Down Expand Up @@ -142,6 +144,7 @@ def test1RoundTripGreedySimilarityBinning(self):
"metric": "od_similarity",
"similarity_threshold_meters": 500,
"apply_cutoff": False,
"clustering_way": 'origin-destination',
"incremental_evaluation": False
}

Expand Down
95 changes: 81 additions & 14 deletions emission/tests/modellingTests/TestSimilarityMetric.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,93 @@ class TestSimilarityMetric(unittest.TestCase):

def testODsAreSimilar(self):
generate_points_thresh = 0.001 # approx. 111 meters
similarity_threshold = 500 #
# random, but, points are sampled within a circle and should always be < sim threshold
trips = etmm.generate_mock_trips('bob', 2, [0, 0], [1, 1], threshold=generate_points_thresh)
similarity_threshold = 111 #

metric = eamso.OriginDestinationSimilarity()
## Sub-Test 1 - 3 :
# random, but, origin and destination points are sampled within a circle and should always be < sim threshold
# Since both origin and destination poitns lie within threshold limits,they should be similar
# when we check by just origin or just destination or both origin-and-destination

trips = etmm.generate_mock_trips('bob', 2, [0, 0], [1, 1], 'od',threshold=generate_points_thresh)
coords0 = metric.extract_features(trips[0])
coords1 = metric.extract_features(trips[1])
similar = metric.similar(coords0, coords1, similarity_threshold)
self.assertTrue(similar)
coords1 = metric.extract_features(trips[1])
similarOD1 = metric.similar(coords0, coords1, similarity_threshold)
similarOD2 = metric.similar(coords0[:2], coords1[:2], similarity_threshold)
similarOD3 = metric.similar(coords0[2:], coords1[2:], similarity_threshold)

## Sub-Test 4 :
# random, but, only origin points are sampled within a circle and should always be < sim threshold
# Since origin of two points lies within threshold limits,they should be similar
# when we check just origin for similarity.


trips = etmm.generate_mock_trips('alice', 2, [0, 0], [1, 1], 'o_',threshold=generate_points_thresh)
coords0 = metric.extract_features(trips[0])[:2]
coords1 = metric.extract_features(trips[1])[:2]
similarO = metric.similar(coords0, coords1, similarity_threshold)

##Sub-Test 5 :
# random, but, only destination points are sampled within a circle and should always be < sim threshold
# Since destination of two points lies within threshold limits,they should be similar
# when we check just destination for similarity.

trips = etmm.generate_mock_trips('Caty', 2, [0, 0], [1, 1], '_d',threshold=generate_points_thresh)
coords0 = metric.extract_features(trips[0])[2:]
coords1 = metric.extract_features(trips[1])[2:]
similarD = metric.similar(coords0, coords1, similarity_threshold)

# All the similars must be true
self.assertTrue(similarOD1) # RESULT SUB-TEST 1
self.assertTrue(similarOD2) # RESULT SUB-TEST 2
self.assertTrue(similarOD3) # RESULT SUB-TEST 3
self.assertTrue(similarO) # RESULT SUB-TEST 4
self.assertTrue(similarD) # RESULT SUB-TEST 5

def testODsAreNotSimilar(self):
generate_points_thresh = 0.001 # approx. 111 meters
similarity_threshold = 500 #

trips0 = etmm.generate_mock_trips('bob', 1, [0, 0], [1, 1], threshold=generate_points_thresh)
trips1 = etmm.generate_mock_trips('alice', 1, [2, 2], [3, 3], threshold=generate_points_thresh)
similarity_threshold = 111 #
metric = eamso.OriginDestinationSimilarity()

## Sub-Test 1-2:
# Two trips with neither origin nor destination coordinates within threshold
# must not be similar in any configuration of similarity testing.
trips = etmm.generate_mock_trips('bob', 2, [0, 0], [1, 1], '__', threshold=generate_points_thresh)
coords0 = metric.extract_features(trips[0])
coords1 = metric.extract_features(trips[1])
similar11 = metric.similar(coords0[:2], coords1[:2], similarity_threshold)
similar12 = metric.similar(coords0[2:], coords1[:], similarity_threshold)

## Sub-Test 3-4:
# Two trips with origin coordinates within threshold but we check
# similarity using destination coordinates or origin-and-destination
# should not be similar.
trips = etmm.generate_mock_trips('Alice', 2, [2, 2], [3, 3], 'o_', threshold=generate_points_thresh)
metric = eamso.OriginDestinationSimilarity()
coords0 = metric.extract_features(trips0[0])
coords1 = metric.extract_features(trips1[0])
similar = metric.similar(coords0, coords1, similarity_threshold)
self.assertFalse(similar)
coords0 = metric.extract_features(trips[0])
coords1 = metric.extract_features(trips[1])
similar21 = metric.similar(coords0[2:], coords1[2:], similarity_threshold)
similar22 = metric.similar(coords0, coords1, similarity_threshold)

## Sub-Test 5-6:
# Two trips with destination coordinates within threshold but we check
# similarity using origin coordinates or origin-and-destination
# should not be similar.
trips = etmm.generate_mock_trips('Caty', 2, [3, 3], [4, 4], '_d', threshold=generate_points_thresh)
metric = eamso.OriginDestinationSimilarity()
coords0 = metric.extract_features(trips[0])
coords1 = metric.extract_features(trips[1])
similar31 = metric.similar(coords0[:2], coords1[:2], similarity_threshold)
similar32 = metric.similar(coords0, coords1, similarity_threshold)

# All the similars must be False
self.assertFalse(similar11) # RESULT SUB-TEST 1
self.assertFalse(similar12) # RESULT SUB-TEST 2
self.assertFalse(similar21) # RESULT SUB-TEST 3
self.assertFalse(similar22) # RESULT SUB-TEST 4
self.assertFalse(similar31) # RESULT SUB-TEST 5
self.assertFalse(similar32) # RESULT SUB-TEST 6


if __name__ == '__main__':
unittest.main()
2 changes: 1 addition & 1 deletion emission/tests/modellingTests/modellingTestAssets.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,6 @@ def generate_mock_trips(
"purpose_confirm": ['work', 'home', 'school'],
"replaced_mode": ['walk', 'bike', 'drive']
}
result = generate_mock_trips('joe-bob', 14, [0, 0], [1,1], label_data, 6)
result = generate_mock_trips('joe-bob', 14, [0, 0], [1,1],'od', label_data, 6)
for r in result:
print(r)

0 comments on commit 6d9ea77

Please sign in to comment.