Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Moving dependence from custom branch's tour_model to master's trip_model #933

Merged
merged 10 commits into from
Sep 14, 2023
23 changes: 20 additions & 3 deletions emission/analysis/modelling/similarity/od_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,23 @@ def extract_features(self, trip: ecwc.Confirmedtrip) -> List[float]:
return ctfe.od_features(trip)

def similarity(self, a: List[float], b: List[float]) -> List[float]:
o_dist = ecc.calDistance([a[0], a[1]], [b[0], b[1]])
d_dist = ecc.calDistance([a[2], a[3]], [b[2], b[3]])
return [o_dist, d_dist]
"""
a : a list of point features that can take either of two forms
1. [point1_latitude,point1_longitude]
2. [point1_latitude,point1_longitude,point2_latitude,point2_longitude]

b : a list of point features that can take either of two forms
1. [point3_latitude,point3_longitude]
2. [point3_latitude,point3_longitude,point4_latitude,point4_longitude]

It'll always take the same form as parameter a.

return: a list of size 1 ([distance between point1-point3]) if a and b take form 1
or of size 2 ([distance between point1-point3, distance between point2-point4])
if a and b take form 2.
"""

point_dist = [ecc.calDistance(a[i:i+2], b[i:i+2])
shankari marked this conversation as resolved.
Show resolved Hide resolved
for i in range (0,len(a),2)]

return point_dist
5 changes: 3 additions & 2 deletions emission/analysis/modelling/similarity/similarity_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,12 @@ def similar(self, a: List[float], b: List[float], thresh: float) -> bool:
"""compares the features, returning true if they are similar
within some threshold

:param a: features for a trip
:param a: features for a trip
:param b: features for another trip
:param thresh: threshold for similarity
:return: true if the feature similarity is within some threshold
"""
similarity_values = self.similarity(a, b)
shankari marked this conversation as resolved.
Show resolved Hide resolved
is_similar = all(map(lambda sim: sim <= thresh, similarity_values))
shankari marked this conversation as resolved.
Show resolved Hide resolved
is_similar = all(sim <= thresh for sim in similarity_values)
shankari marked this conversation as resolved.
Show resolved Hide resolved

return is_similar
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ class label to apply:
self.sim_thresh = config['similarity_threshold_meters']
self.apply_cutoff = config['apply_cutoff']
self.is_incremental = config['incremental_evaluation']
if config.get('clustering_way') is None:
shankari marked this conversation as resolved.
Show resolved Hide resolved
self.clusteringWay='origin-destination' # previous default
else:
self.clusteringWay= config['clustering_way']
self.tripLabels=[]

self.bins: Dict[str, Dict] = {}

Expand Down Expand Up @@ -184,9 +189,11 @@ def _assign_bins(self, trips: List[ecwc.Confirmedtrip]):
logging.debug(f"adding trip to bin {bin_id} with features {trip_features}")
self.bins[bin_id]['feature_rows'].append(trip_features)
self.bins[bin_id]['labels'].append(trip_labels)
self.tripLabels.append(bin_id)
else:
# create new bin
new_bin_id = str(len(self.bins))
self.tripLabels.append(new_bin_id)
new_bin_record = {
'feature_rows': [trip_features],
'labels': [trip_labels],
Expand All @@ -200,14 +207,22 @@ def _find_matching_bin_id(self, trip_features: List[float]) -> Optional[str]:
finds an existing bin where all bin features are "similar" to the incoming
trip features.

:param trip_features: feature row for the incoming trip
:param trip_features: feature row for the incoming trip.
takes the form [orig_lat, orig_lon, dest_lat, dest_lon]
:return: the id of a bin if a match was found, otherwise None
"""
for bin_id, bin_record in self.bins.items():
matches_bin = all([self.metric.similar(trip_features, bin_sample, self.sim_thresh)
for bin_sample in bin_record['feature_rows']])
if matches_bin:
return bin_id
if self.clusteringWay == 'origin':
shankari marked this conversation as resolved.
Show resolved Hide resolved
start,end=0,2 #since first two features in trip_features are for origin
elif self.clusteringWay == 'destination':
start,end=2,4 #third and fourth values intrip_features are for destination
elif self.clusteringWay == 'origin-destination':
start,end=0,4 #when clusteromgWay is 'origin-destination',we pass all four features

matches_bin = all([self.metric.similar(trip_features[start:end], bin_sample[start:end], self.sim_thresh)
for bin_sample in bin_record['feature_rows']])
if matches_bin:
return bin_id
return None

def _nearest_bin(self, trip: ecwc.Confirmedtrip) -> Tuple[Optional[int], Optional[Dict]]:
Expand Down
5 changes: 5 additions & 0 deletions emission/tests/modellingTests/TestBackwardsCompat.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def testAnyVsAllWhilePredicting(self):
"metric": "od_similarity",
"similarity_threshold_meters": 16000, # meters,
"apply_cutoff": False,
"clustering_way": 'origin-destination',
"incremental_evaluation": False
}
new_builder = eamtg.GreedySimilarityBinning(model_config)
Expand Down Expand Up @@ -96,6 +97,7 @@ def testRandomTripsWithinTheSameThreshold(self):
trips=n,
origin=(0, 0),
destination=(1, 1),
trip_part='od',
label_data=label_data,
threshold=0.001, # ~ 111 meters in degrees WGS84
)
Expand All @@ -113,6 +115,7 @@ def testRandomTripsWithinTheSameThreshold(self):
"metric": "od_similarity",
"similarity_threshold_meters": 500, # meters,
"apply_cutoff": False,
"clustering_way": 'origin-destination',
"incremental_evaluation": False
}
new_model = eamtg.GreedySimilarityBinning(model_config)
Expand Down Expand Up @@ -156,6 +159,7 @@ def testRandomTripsOutsideTheSameThreshold(self):
trips=n,
origin=(0, 0),
destination=(1, 1),
trip_part='od',
label_data=label_data,
threshold=0.1, # Much bigger than the 500m threshold, so we will get multiple bins
)
Expand All @@ -173,6 +177,7 @@ def testRandomTripsOutsideTheSameThreshold(self):
"metric": "od_similarity",
"similarity_threshold_meters": 500, # meters,
"apply_cutoff": False,
"clustering_way": 'origin-destination',
"incremental_evaluation": False
}
new_model = eamtg.GreedySimilarityBinning(model_config)
Expand Down
135 changes: 64 additions & 71 deletions emission/tests/modellingTests/TestGreedySimilarityBinning.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import unittest
import emission.analysis.modelling.trip_model.greedy_similarity_binning as eamtg
import emission.tests.modellingTests.modellingTestAssets as etmm
import emission.tests.modellingTests.utilities as etmu
import logging


Expand All @@ -10,44 +10,65 @@ def setUp(self) -> None:
logging.basicConfig(format='%(asctime)s:%(levelname)s:%(message)s',
level=logging.DEBUG)

def testBinning(self):
def testNoBinning(self):
"""
when $should_be_grouped trips are the same, they should appear in a bin
Tests the three (origin, destination and origin-destination based)
binning configuration for trips.

When the origin and destination points of trips are outside a threshold
limit, none of the trips should be binned with the other in any of the three
configs (origin, destination or origin-and-destination based).
"""
label_data = {
"mode_confirm": ['walk', 'bike', 'transit'],
"purpose_confirm": ['work', 'home', 'school'],
"replaced_mode": ['drive']
}

# generate $n trips. $m of them should have origin and destinations sampled
# generate $n trips.
n = 20

#this generates 20 trips one-by-one, where each trip's respective origin and destination
# points are more than 500m away.
trips = [ etmu.setTripConfig(1, (i, i), (i+1, i+1), 'od', 1)[0] for i in range(n)]

shankari marked this conversation as resolved.
Show resolved Hide resolved
# parameters passed for testing. A list, where each element is one way of clustering
clustering_ways_paramters= ["origin","destination","origin-destination"]

#Testing each of the three clustering_ways by passing them as parameters
for cw in clustering_ways_paramters:
with self.subTest(clustering_way=cw):
#initialise the binning model and fit with previously generated trips
model = etmu.setModelConfig("od_similarity", 500, False, cw, False)
model.fit(trips)
#check each bins for no of trips
no_large_bin = all(map(lambda b: len(b['feature_rows']) == 1, model.bins.values()))
#Since all trips were sampled outside the threshold, there should be no bin
# with more then 1 trip
self.assertTrue(no_large_bin,"no bin should have more than 1 features in it")

def testBinning(self):
"""
Tests the three (origin, destination and origin-destination based)
binning configuration for trips.

When the points lie within threshold ,the trips are binned together.
"""
# generate $n trips. $m of them should have origin sampled
# within a radius that should have them binned.
n = 20
m = 5
trips = etmm.generate_mock_trips(
user_id="joe",
trips=n,
origin=(0, 0),
destination=(1, 1),
label_data=label_data,
within_threshold=m,
threshold=0.001, # ~ 111 meters in degrees WGS84
)

# pass in a test configuration to the binning algorithm
model_config = {
"metric": "od_similarity",
"similarity_threshold_meters": 500, # meters,
"apply_cutoff": False,
"incremental_evaluation": False
}
model = eamtg.GreedySimilarityBinning(model_config)

model.fit(trips)

# $m trip features should appear together in one bin
at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model.bins.values()))
self.assertTrue(at_least_one_large_bin, "at least one bin should have at least 5 features in it")
# parameters passed for testing. A list, where each element of this list takes the form
# [trip part to be sampled within mentioned threshold , clustering way used to check similarity]
parameters= [["o_",'origin'],["_d",'destination'],["od",'origin-destination']]
for tp,cw in parameters:
with self.subTest(trip_part=tp,clustering_way=cw):
shankari marked this conversation as resolved.
Show resolved Hide resolved
#generate random trips using utilities
trips = etmu.setTripConfig(trips=n, org=(0, 0), dest=(1, 1),
trip_part=tp, within_thr=m)
#initialise the binning model and fit with previously generated trips
model = etmu.setModelConfig("od_similarity", 500, False, cw, False)
model.fit(trips)
#check each bins for no of trips
at_least_one_large_bin = any(map(lambda b: len(b['feature_rows']) == m, model.bins.values()))
#Since 5 trips were sampled within the threshold, there should be one bin with 5 trips
self.assertTrue(at_least_one_large_bin, "at least one bin should have at least 5 features in it")

def testPrediction(self):
"""
Expand All @@ -60,22 +81,10 @@ def testPrediction(self):
}

n = 6
trips = etmm.generate_mock_trips(
user_id="joe",
trips=n,
origin=(0, 0),
destination=(1, 1),
label_data=label_data,
threshold=0.001, # ~ 111 meters in degrees WGS84
trips = etmu.setTripConfig(trips=n, org=(0, 0), dest=(1, 1),
trip_part='od', label_data=label_data,
)

model_config = {
"metric": "od_similarity",
"similarity_threshold_meters": 500, # meters,
"apply_cutoff": False,
"incremental_evaluation": False
}
model = eamtg.GreedySimilarityBinning(model_config)
model = etmu.setModelConfig("od_similarity", 500, False, "origin-destination", False)

train = trips[0:5]
test = trips[5]
Expand All @@ -95,33 +104,17 @@ def testNoPrediction(self):
"purpose_confirm": ['pizza_party'],
"replaced_mode": ['crabwalking']
}

n = 5
train = etmm.generate_mock_trips(
user_id="joe",
trips=n,
origin=(39.7645187, -104.9951944), # Denver, CO
destination=(39.7435206, -105.2369292), # Golden, CO
label_data=label_data,
threshold=0.001, # ~ 111 meters in degrees WGS84

train = etmu.setTripConfig(trips=n, org=(39.7645187, -104.9951944), # Denver, CO
dest=(39.7435206, -105.2369292), # Golden, CO
trip_part='od', label_data=label_data
)
test = etmm.generate_mock_trips(
user_id="joe",
trips=1,
origin=(61.1042262, -150.5611644), # Anchorage, AK
destination=(62.2721466, -150.3233046), # Talkeetna, AK
label_data=label_data,
threshold=0.001, # ~ 111 meters in degrees WGS84
test = etmu.setTripConfig(trips=n, org=(61.1042262, -150.5611644), # Denver, CO
dest=(62.2721466, -150.3233046), # Golden, CO
trip_part='od', label_data=label_data,
)

model_config = {
"metric": "od_similarity",
"similarity_threshold_meters": 500, # meters,
"apply_cutoff": False,
"incremental_evaluation": False
}
model = eamtg.GreedySimilarityBinning(model_config)

model = etmu.setModelConfig("od_similarity", 500, False, "origin-destination", False)
shankari marked this conversation as resolved.
Show resolved Hide resolved
model.fit(train)
results, n = model.predict(test[0])

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def setUp(self):
"metric": "od_similarity",
"similarity_threshold_meters": sim_threshold,
"apply_cutoff": False,
"clustering_way": 'origin-destination',
"incremental_evaluation": True
}

Expand Down Expand Up @@ -162,6 +163,7 @@ def testIncrementalRun(self):
trips=self.new_trips_per_invocation,
origin=self.origin,
destination=self.destination,
trip_part='od',
label_data=label_data,
threshold=0.0001, # ~10m,
start_ts=time.time() - 20,
Expand Down
3 changes: 3 additions & 0 deletions emission/tests/modellingTests/TestRunGreedyModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def setUp(self):
trips=self.total_trips,
origin=self.origin,
destination=self.destination,
trip_part='od',
label_data=label_data,
within_threshold=self.clustered_trips,
threshold=0.004, # ~400m
Expand Down Expand Up @@ -106,6 +107,7 @@ def testTrainGreedyModelWithZeroTrips(self):
"metric": "od_similarity",
"similarity_threshold_meters": 500,
"apply_cutoff": False,
"clustering_way": 'origin-destination',
"incremental_evaluation": False
}

Expand Down Expand Up @@ -142,6 +144,7 @@ def test1RoundTripGreedySimilarityBinning(self):
"metric": "od_similarity",
"similarity_threshold_meters": 500,
"apply_cutoff": False,
"clustering_way": 'origin-destination',
"incremental_evaluation": False
}

Expand Down
Loading