diff --git a/emission/analysis/intake/segmentation/trip_segmentation_methods/dwell_segmentation_time_filter.py b/emission/analysis/intake/segmentation/trip_segmentation_methods/dwell_segmentation_time_filter.py index e53139e1c..34779f0dd 100644 --- a/emission/analysis/intake/segmentation/trip_segmentation_methods/dwell_segmentation_time_filter.py +++ b/emission/analysis/intake/segmentation/trip_segmentation_methods/dwell_segmentation_time_filter.py @@ -20,7 +20,7 @@ import emission.analysis.point_features as pf import emission.analysis.intake.segmentation.trip_segmentation as eaist import emission.core.wrapper.location as ecwl - +import emission.core.common as ec import emission.analysis.intake.segmentation.restart_checking as eaisr class DwellSegmentationTimeFilter(eaist.TripSegmentationMethod): @@ -109,17 +109,20 @@ def segment_into_trips(self, filtered_points_pre_ts_diff_df,transition_df,timese # We are going to use the last 8 points for now. # TODO: Change this back to last 10 points once we normalize phone and this last10Points_df = filtered_points_df.iloc[max(idx-self.point_threshold, curr_trip_start_point.idx):idx+1] - distanceToLast = lambda row: pf.calDistance(ad.AttrDict(row), currPoint) - timeToLast = lambda row: currPoint.ts - ad.AttrDict(row).ts - last5MinsDistances = last5MinsPoints_df.apply(distanceToLast, axis=1) - logging.debug("last5MinsDistances = %s with length %d" % (last5MinsDistances.to_numpy(), len(last5MinsDistances))) - last10PointsDistances = last10Points_df.apply(distanceToLast, axis=1) - logging.debug("last10PointsDistances = %s with length %d, shape %s" % (last10PointsDistances.to_numpy(), - len(last10PointsDistances), - last10PointsDistances.shape)) - + # get 2d numpy array, from df + last10Points_coords=last10Points_df[['longitude','latitude']].to_numpy() + # create a similar dimension current cordintaes numpy array + currPoint_coords = np.repeat(np.array([[currPoint.longitude,currPoint.latitude]]),len(last10Points_df),axis=0) + #compute distance + last10PointsDistances=ec.calDistance(last10Points_coords,currPoint_coords) + # Reset current coordintes numpy array as per last 5 mins Points array's dimensions + currPoint_coords = np.repeat(np.array([[currPoint.longitude,currPoint.latitude]]),len(last5MinsPoints_df),axis=0) + # get 2d numpy array, from df + last5MinsPoints_coords=last5MinsPoints_df[['longitude','latitude']].to_numpy() + # calcualte distance + last5MinsDistances=ec.calDistance(last5MinsPoints_coords,currPoint_coords) # Fix for https://github.com/e-mission/e-mission-server/issues/348 - last5MinTimes = last5MinsPoints_df.apply(timeToLast, axis=1) + last5MinTimes = currPoint.ts-last5MinsPoints_df.ts logging.debug("len(last10PointsDistances) = %d, len(last5MinsDistances) = %d" % (len(last10PointsDistances), len(last5MinsDistances))) diff --git a/emission/core/common.py b/emission/core/common.py index 4d97ce681..6a37d61ba 100644 --- a/emission/core/common.py +++ b/emission/core/common.py @@ -14,6 +14,7 @@ from dateutil import parser from pytz import timezone import math +import numpy as np def isMillisecs(ts): return not (ts < 10 ** 11) @@ -51,7 +52,20 @@ def calDistance(point1, point2, coordinates=False): # SHANKARI: Why do we have two calDistance() functions? # Need to combine into one # points are now in geojson format (lng,lat) - if coordinates: + + #Added to Support vectorization when dealing with numpy array + if isinstance(point1,np.ndarray) and isinstance(point2,np.ndarray): + dLat = np.radians(point1[:,1]-point2[:,1]) + dLon = np.radians(point1[:,0]-point2[:,0]) + lat1 = np.radians(point1[:,1]) + lat2 = np.radians(point2[:,1]) + + a = (np.sin(dLat/2) ** 2) + ((np.sin(dLon/2) ** 2) * np.cos(lat1) * np.cos(lat2)) + c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a)) + d = earthRadius * c + + return d + elif coordinates: dLat = math.radians(point1.lat-point2.lat) dLon = math.radians(point1.lon-point2.lon) lat1 = math.radians(point1.lat)