Skip to content

Commit

Permalink
[Tested] Pandas Improvements for Dwell_Segementation_time_filter
Browse files Browse the repository at this point in the history
  • Loading branch information
humbleOldSage committed Jan 22, 2024
1 parent 1d1b31f commit 650d4d8
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import emission.analysis.point_features as pf
import emission.analysis.intake.segmentation.trip_segmentation as eaist
import emission.core.wrapper.location as ecwl

import emission.core.common as ec
import emission.analysis.intake.segmentation.restart_checking as eaisr

class DwellSegmentationTimeFilter(eaist.TripSegmentationMethod):
Expand Down Expand Up @@ -109,17 +109,20 @@ def segment_into_trips(self, filtered_points_pre_ts_diff_df,transition_df,timese
# We are going to use the last 8 points for now.
# TODO: Change this back to last 10 points once we normalize phone and this
last10Points_df = filtered_points_df.iloc[max(idx-self.point_threshold, curr_trip_start_point.idx):idx+1]
distanceToLast = lambda row: pf.calDistance(ad.AttrDict(row), currPoint)
timeToLast = lambda row: currPoint.ts - ad.AttrDict(row).ts
last5MinsDistances = last5MinsPoints_df.apply(distanceToLast, axis=1)
logging.debug("last5MinsDistances = %s with length %d" % (last5MinsDistances.to_numpy(), len(last5MinsDistances)))
last10PointsDistances = last10Points_df.apply(distanceToLast, axis=1)
logging.debug("last10PointsDistances = %s with length %d, shape %s" % (last10PointsDistances.to_numpy(),
len(last10PointsDistances),
last10PointsDistances.shape))

# get 2d numpy array, from df
last10Points_coords=last10Points_df[['longitude','latitude']].to_numpy()
# create a similar dimension current cordintaes numpy array
currPoint_coords = np.repeat(np.array([[currPoint.longitude,currPoint.latitude]]),len(last10Points_df),axis=0)
#compute distance
last10PointsDistances=ec.calDistance(last10Points_coords,currPoint_coords)
# Reset current coordintes numpy array as per last 5 mins Points array's dimensions
currPoint_coords = np.repeat(np.array([[currPoint.longitude,currPoint.latitude]]),len(last5MinsPoints_df),axis=0)
# get 2d numpy array, from df
last5MinsPoints_coords=last5MinsPoints_df[['longitude','latitude']].to_numpy()
# calcualte distance
last5MinsDistances=ec.calDistance(last5MinsPoints_coords,currPoint_coords)
# Fix for https://github.com/e-mission/e-mission-server/issues/348
last5MinTimes = last5MinsPoints_df.apply(timeToLast, axis=1)
last5MinTimes = currPoint.ts-last5MinsPoints_df.ts

logging.debug("len(last10PointsDistances) = %d, len(last5MinsDistances) = %d" %
(len(last10PointsDistances), len(last5MinsDistances)))
Expand Down
16 changes: 15 additions & 1 deletion emission/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from dateutil import parser
from pytz import timezone
import math
import numpy as np

def isMillisecs(ts):
return not (ts < 10 ** 11)
Expand Down Expand Up @@ -51,7 +52,20 @@ def calDistance(point1, point2, coordinates=False):
# SHANKARI: Why do we have two calDistance() functions?
# Need to combine into one
# points are now in geojson format (lng,lat)
if coordinates:

#Added to Support vectorization when dealing with numpy array
if isinstance(point1,np.ndarray) and isinstance(point2,np.ndarray):
dLat = np.radians(point1[:,1]-point2[:,1])
dLon = np.radians(point1[:,0]-point2[:,0])
lat1 = np.radians(point1[:,1])
lat2 = np.radians(point2[:,1])

a = (np.sin(dLat/2) ** 2) + ((np.sin(dLon/2) ** 2) * np.cos(lat1) * np.cos(lat2))
c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1-a))
d = earthRadius * c

return d
elif coordinates:
dLat = math.radians(point1.lat-point2.lat)
dLon = math.radians(point1.lon-point2.lon)
lat1 = math.radians(point1.lat)
Expand Down

0 comments on commit 650d4d8

Please sign in to comment.