Skip to content

Commit

Permalink
[TESTED] Explicit clustering method, Improved mock trip generation
Browse files Browse the repository at this point in the history
`od_similarity.py`
1.  Explicitly passing 'origin', 'destination', 'origin-destination' for similarity check  in `similarity`

`similarity_metric.py`
2.  Passing the clustering_way parameter

`greedy_similarity_binning.py`
3.  Since this decision making is moved downstream to `similarity`, so removing it from here.

`modellingTestAssets.py`
4. Removing both 2 line wrappers (SetModelConfig, setTripConfig ) from this file since this was parametrised using sub-Test 2 commits back.

5. Removed CalDistanceTest. This was introduced to keep calDistance of test separate from the calDistance being used by the one being used by `greedySimilaritybinning`.  Unnecesary.

6.  Using ref. coordinates whenever provided to generate trip coordinates. If not, use randomly generated coordinates as reference points.

7. receiving and passing origin and destination ref. points.  in `generate_mock_trips'

`TestGreedySimilarityBinning.py`

8. removed wrappers for trip and model generation.

9. Using just single threshold for generating trips and for binning. Removed two thresholds.

`TestSimilarityMetric.py`

10. Removing the implicitness used in binning by passing this as a parameter.
  • Loading branch information
humbleOldSage committed Sep 12, 2023
1 parent c35b7c1 commit f5944cc
Show file tree
Hide file tree
Showing 6 changed files with 168 additions and 167 deletions.
34 changes: 19 additions & 15 deletions emission/analysis/modelling/similarity/od_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,28 @@ class OriginDestinationSimilarity(eamss.SimilarityMetric):
def extract_features(self, trip: ecwc.Confirmedtrip) -> List[float]:
return ctfe.od_features(trip)

def similarity(self, a: List[float], b: List[float]) -> List[float]:
def similarity(self, a: List[float], b: List[float], clustering_way='origin-destination') -> List[float]:
"""
a : a list of point features that can take either of two forms
1. [point1_latitude,point1_longitude]
2. [point1_latitude,point1_longitude,point2_latitude,point2_longitude]
a : a list of point features that takes the forms
[point1_longitude,point1_latitude,point2_longitude,point2_latitude]
b : a list of point features that can take either of two forms
1. [point3_latitude,point3_longitude]
2. [point3_latitude,point3_longitude,point4_latitude,point4_longitude]
It'll always take the same form as parameter a.
b : a list of point features that takes the forms
[point1_longitude,point1_latitude,point2_longitude,point2_latitude]
clustering_way : takes one among 'origin', 'destination', 'origin-destination' as value.
tells the part of the trip to be used for binning trips together if that
part lies within threshold.
return: a list of size 1 ([distance between point1-point3]) if a and b take form 1
or of size 2 ([distance between point1-point3, distance between point2-point4])
if a and b take form 2.
"""

point_dist = [ecc.calDistance(a[i:i+2], b[i:i+2])
for i in range (0,len(a),2)]

return point_dist
origin_dist = ecc.calDistance(a[0:2], b[0:2])
destination_dist=ecc.calDistance(a[2:4], b[2:4])

if clustering_way == 'origin-destination':
return [origin_dist,destination_dist]
elif clustering_way == 'origin':
return [origin_dist]
else:
return [destination_dist]
12 changes: 9 additions & 3 deletions emission/analysis/modelling/similarity/similarity_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,32 @@ def extract_features(self, trip: ecwc.Confirmedtrip) -> List[float]:
pass

@abstractmethod
def similarity(self, a: List[float], b: List[float]) -> List[float]:
def similarity(self, a: List[float], b: List[float], clustering_way = 'origin-destination') -> List[float]:
"""compares the features, producing their similarity
as computed by this similarity metric
:param a: features for a trip
:param b: features for another trip
:param clustering_way : takes one among 'origin', 'destination', 'origin-destination' as value.
tells the part of the trip to be used for binning trips together if that
part lies within a threshold.
:return: for each feature, the similarity of these features
"""
pass

def similar(self, a: List[float], b: List[float], thresh: float) -> bool:
def similar(self, a: List[float], b: List[float], thresh: float, clustering_way= 'origin-destination') -> bool:
"""compares the features, returning true if they are similar
within some threshold
:param a: features for a trip
:param b: features for another trip
:param thresh: threshold for similarity
:param clustering_way : takes one among 'origin', 'destination', 'origin-destination' as value.
tells the part of the trip to be used for binning trips together if that
part lies within a threshold.
:return: true if the feature similarity is within some threshold
"""
similarity_values = self.similarity(a, b)
similarity_values = self.similarity(a, b, clustering_way)
is_similar = all(sim <= thresh for sim in similarity_values)

return is_similar
Original file line number Diff line number Diff line change
Expand Up @@ -212,14 +212,7 @@ def _find_matching_bin_id(self, trip_features: List[float]) -> Optional[str]:
:return: the id of a bin if a match was found, otherwise None
"""
for bin_id, bin_record in self.bins.items():
if self.clusteringWay == 'origin':
start,end=0,2 #since first two features in trip_features are for origin
elif self.clusteringWay == 'destination':
start,end=2,4 #third and fourth values intrip_features are for destination
elif self.clusteringWay == 'origin-destination':
start,end=0,4 #when clusteromgWay is 'origin-destination',we pass all four features

matches_bin = all([self.metric.similar(trip_features[start:end], bin_sample[start:end], self.sim_thresh)
matches_bin = all([self.metric.similar(trip_features, bin_sample, self.sim_thresh,self.clusteringWay)
for bin_sample in bin_record['feature_rows']])
if matches_bin:
return bin_id
Expand Down
109 changes: 89 additions & 20 deletions emission/tests/modellingTests/TestGreedySimilarityBinning.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import unittest
import emission.analysis.modelling.trip_model.greedy_similarity_binning as eamtg
import emission.tests.modellingTests.utilities as etmu
import emission.tests.modellingTests.modellingTestAssets as etmm

import logging


Expand All @@ -21,11 +22,29 @@ def testNoBinning(self):
"""

# generate $n trips.
n = 20

n = 20
binning_threshold=500
#this generates 20 trips one-by-one, where each trip's respective origin and destination
# points are more than 500m away.
trips = [ etmu.setTripConfig(1, (i, i), (i+1, i+1), 'od', 1)[0] for i in range(n)]


label_data = {
"mode_confirm": ['walk', 'bike', 'transit'],
"purpose_confirm": ['work', 'home', 'school'],
"replaced_mode": ['drive']
}


trips =etmm.generate_mock_trips(
user_id="joe",
trips=n,
trip_part='__',
label_data=label_data,
within_threshold=1,
threshold=binning_threshold,
origin=(0,0),
destination=(1,1)
)

# parameters passed for testing. A list, where each element is one way of clustering
clustering_ways_paramters= ["origin","destination","origin-destination"]
Expand All @@ -34,7 +53,14 @@ def testNoBinning(self):
for cw in clustering_ways_paramters:
with self.subTest(clustering_way=cw):
#initialise the binning model and fit with previously generated trips
model = etmu.setModelConfig("od_similarity", 500, False, cw, False)
model_config = {
"metric": "od_similarity",
"similarity_threshold_meters": binning_threshold, # meters,
"apply_cutoff": False,
"clustering_way": cw,
"incremental_evaluation": False
}
model= eamtg.GreedySimilarityBinning(model_config)
model.fit(trips)
#check each bins for no of trips
no_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model.bins.values()))
Expand All @@ -53,17 +79,38 @@ def testBinning(self):
# within a radius that should have them binned.
n = 20
m = 5
binning_threshold=500
label_data = {
"mode_confirm": ['walk', 'bike', 'transit'],
"purpose_confirm": ['work', 'home', 'school'],
"replaced_mode": ['drive']
}

# parameters passed for testing. A list, where each element of this list takes the form
# [trip part to be sampled within mentioned threshold , clustering way used to check similarity]
parameters= [["o_",'origin'],["_d",'destination'],["od",'origin-destination']]
for tp,cw in parameters:
with self.subTest(trip_part=tp,clustering_way=cw):
#generate random trips using utilities
trips = etmu.setTripConfig(trips=n, org=(0, 0), dest=(1, 1),
trip_part=tp, within_thr=m)
trips =etmm.generate_mock_trips(
user_id="joe",
trips=n,
trip_part=tp,
label_data=label_data,
within_threshold=m,
threshold=binning_threshold,
origin=(0,0),
destination=(1,1)
)
#initialise the binning model and fit with previously generated trips
model = etmu.setModelConfig("od_similarity", 500, False, cw, False)
model_config = {
"metric": "od_similarity" ,
"similarity_threshold_meters": binning_threshold, # meters,
"apply_cutoff": False,
"clustering_way": cw,
"incremental_evaluation": False
}
model = eamtg.GreedySimilarityBinning(model_config)
model.fit(trips)
#check each bins for no of trips
at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model.bins.values()))
Expand All @@ -81,11 +128,24 @@ def testPrediction(self):
}

n = 6
trips = etmu.setTripConfig(trips=n, org=(0, 0), dest=(1, 1),
trip_part='od', label_data=label_data,
)
model = etmu.setModelConfig("od_similarity", 500, False, "origin-destination", False)

trips =etmm.generate_mock_trips(
user_id="joe",
trips=n,
trip_part='od',
label_data=label_data,
within_threshold=n,
threshold=500,
origin=(0,0),
destination=(1,1)
)
model_config = {
"metric": "od_similarity",
"similarity_threshold_meters": 500, # meters,
"apply_cutoff": False,
"clustering_way": 'origin_destination',
"incremental_evaluation": False
}
model= eamtg.GreedySimilarityBinning(model_config)
train = trips[0:5]
test = trips[5]

Expand All @@ -105,16 +165,25 @@ def testNoPrediction(self):
"replaced_mode": ['crabwalking']
}
n = 5

train = etmu.setTripConfig(trips=n, org=(39.7645187, -104.9951944), # Denver, CO
dest=(39.7435206, -105.2369292), # Golden, CO
trip_part='od', label_data=label_data
binning_threshold = 500
train = etmm.generate_mock_trips( user_id="joe",trips=n, origin=(39.7645187, -104.9951944), # Denver, CO
destination=(39.7435206, -105.2369292), # Golden, CO
trip_part='od', label_data=label_data,
threshold=binning_threshold, within_threshold=n
)
test = etmu.setTripConfig(trips=n, org=(61.1042262, -150.5611644), # Denver, CO
dest=(62.2721466, -150.3233046), # Golden, CO
test = etmm.generate_mock_trips( user_id="amanda",trips=n, origin=(61.1042262, -150.5611644), # Denver, CO
destination=(62.2721466, -150.3233046), # Golden, CO
trip_part='od', label_data=label_data,
threshold=binning_threshold, within_threshold=n
)
model = etmu.setModelConfig("od_similarity", 500, False, "origin-destination", False)
model_config = {
"metric": "od_similarity",
"similarity_threshold_meters": 500, # meters,
"apply_cutoff": False,
"clustering_way": 'origin_destination',
"incremental_evaluation": False
}
model= eamtg.GreedySimilarityBinning(model_config)
model.fit(train)
results, n = model.predict(test[0])

Expand Down
26 changes: 12 additions & 14 deletions emission/tests/modellingTests/TestSimilarityMetric.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import unittest
import emission.analysis.modelling.similarity.od_similarity as eamso
import emission.tests.modellingTests.utilities as etmu

import emission.tests.modellingTests.modellingTestAssets as etmm
class TestSimilarityMetric(unittest.TestCase):

def testODsAreSimilar(self):
generate_points_thresh = 0.001 # approx. 111 meters
similarity_threshold = 500 # in meters
metric = eamso.OriginDestinationSimilarity()

Expand All @@ -16,17 +14,17 @@ def testODsAreSimilar(self):
# a.origin, we pass first two values of this list,i.e. from 0 till before 2 index
# b.destination, we pas last two values of this list,i.e. from 2 till before 4 index
# c.origin-destination, we pass the entire list , i.e. from 0 till before 4 index
parameters= [["od",(0,4)],["_d",(2,4)],["o_",(0,2)]]
parameters= [["o_",'origin'],["_d",'destination'],["od",'origin-destination']]

for tp,(coord_start,coord_end) in parameters:
for tp,cw in parameters:
with self.subTest(trip_part=tp):
#generate 2 trips with parameter values
trips = etmu.setTripConfig(2, [0, 0], [1, 1], trip_part=tp,threshold=generate_points_thresh)
trips = etmm.generate_mock_trips('joe',2, threshold=similarity_threshold,origin=[0, 0], destination=[1, 1], within_threshold=2,trip_part=tp)
# depending on the parametrs, extract the relevant coordinates
trip0_coords = metric.extract_features(trips[0])[coord_start:coord_end]
trip1_coords = metric.extract_features(trips[1])[coord_start:coord_end]
trip0_coords = metric.extract_features(trips[0])
trip1_coords = metric.extract_features(trips[1])
#check for similarity using relevant coordinates
similarOD = metric.similar(trip0_coords,trip1_coords, similarity_threshold)
similarOD = metric.similar(trip0_coords,trip1_coords, similarity_threshold,cw)
# Since both origin and destination poitns lie within threshold limits,they should be similar
# when we check by just origin or just destination or both origin-and-destination
self.assertTrue(similarOD)
Expand All @@ -42,17 +40,17 @@ def testODsAreNotSimilar(self):
# a.origin, we pass first two values of this list,i.e. from 0 till before 2 index
# b.destination, we pas last two values of this list,i.e. from 2 till before 4 index
# c.origin-destination, we pass the entire list , i.e. from 0 till before 4 index
parameters= [(0,2),(2,4),[0,4]]
parameters= ['origin','destination','origin-destination']
n=2
#this generates 2 trips one-by-one, where each trip's respective origin and destination
# points are more than 500m away.
trips = [etmu.setTripConfig(1, (i, i), (i+1, i+1), 'od', 1)[0] for i in range(n)]
trips = [ etmm.generate_mock_trips('joe',2, origin=[i, i], destination=[i+1, i+1], trip_part= 'od', within_threshold=1,threshold=500)[0] for i in range(n)]
trip0_coord = metric.extract_features(trips[0])
trip1_coord = metric.extract_features(trips[1])

for (coord_start,coord_end) in parameters:
with self.subTest(coordinates=(coord_start,coord_end)):
IsSimilar = metric.similar(trip0_coord[coord_start:coord_end],trip1_coord[coord_start:coord_end], similarity_threshold)
for cw in parameters:
with self.subTest(clustering_way=cw):
IsSimilar = metric.similar(trip0_coord,trip1_coord, similarity_threshold,cw)
# Two trips with neither origin nor destination coordinates within the threshold
# must not be similar by any configuration of similarity testing.
self.assertFalse(IsSimilar)
Expand Down
Loading

0 comments on commit f5944cc

Please sign in to comment.