-
Notifications
You must be signed in to change notification settings - Fork 37
/
datasets.py
150 lines (128 loc) · 5.06 KB
/
datasets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# coding=utf-8
# Copyright 2021, Duong Nguyen
#
# Licensed under the CECILL-C License;
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.cecill.info
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Customized Pytorch Dataset.
"""
import numpy as np
import os
import pickle
import torch
from torch.utils.data import Dataset, DataLoader
class AISDataset(Dataset):
"""Customized Pytorch dataset.
"""
def __init__(self,
l_data,
max_seqlen=96,
dtype=torch.float32,
device=torch.device("cpu")):
"""
Args
l_data: list of dictionaries, each element is an AIS trajectory.
l_data[idx]["mmsi"]: vessel's MMSI.
l_data[idx]["traj"]: a matrix whose columns are
[LAT, LON, SOG, COG, TIMESTAMP]
lat, lon, sog, and cod have been standardized, i.e. range = [0,1).
max_seqlen: (optional) max sequence length. Default is
"""
self.max_seqlen = max_seqlen
self.device = device
self.l_data = l_data
def __len__(self):
return len(self.l_data)
def __getitem__(self, idx):
"""Gets items.
Returns:
seq: Tensor of (max_seqlen, [lat,lon,sog,cog]).
mask: Tensor of (max_seqlen, 1). mask[i] = 0.0 if x[i] is a
padding.
seqlen: sequence length.
mmsi: vessel's MMSI.
time_start: timestamp of the starting time of the trajectory.
"""
V = self.l_data[idx]
m_v = V["traj"][:,:4] # lat, lon, sog, cog
# m_v[m_v==1] = 0.9999
m_v[m_v>0.9999] = 0.9999
seqlen = min(len(m_v), self.max_seqlen)
seq = np.zeros((self.max_seqlen,4))
seq[:seqlen,:] = m_v[:seqlen,:]
seq = torch.tensor(seq, dtype=torch.float32)
mask = torch.zeros(self.max_seqlen)
mask[:seqlen] = 1.
seqlen = torch.tensor(seqlen, dtype=torch.int)
mmsi = torch.tensor(V["mmsi"], dtype=torch.int)
time_start = torch.tensor(V["traj"][0,4], dtype=torch.int)
return seq , mask, seqlen, mmsi, time_start
class AISDataset_grad(Dataset):
"""Customized Pytorch dataset.
Return the positions and the gradient of the positions.
"""
def __init__(self,
l_data,
dlat_max=0.04,
dlon_max=0.04,
max_seqlen=96,
dtype=torch.float32,
device=torch.device("cpu")):
"""
Args
l_data: list of dictionaries, each element is an AIS trajectory.
l_data[idx]["mmsi"]: vessel's MMSI.
l_data[idx]["traj"]: a matrix whose columns are
[LAT, LON, SOG, COG, TIMESTAMP]
lat, lon, sog, and cod have been standardized, i.e. range = [0,1).
dlat_max, dlon_max: the maximum value of the gradient of the positions.
dlat_max = max(lat[idx+1]-lat[idx]) for all idx.
max_seqlen: (optional) max sequence length. Default is
"""
self.dlat_max = dlat_max
self.dlon_max = dlon_max
self.dpos_max = np.array([dlat_max, dlon_max])
self.max_seqlen = max_seqlen
self.device = device
self.l_data = l_data
def __len__(self):
return len(self.l_data)
def __getitem__(self, idx):
"""Gets items.
Returns:
seq: Tensor of (max_seqlen, [lat,lon,sog,cog]).
mask: Tensor of (max_seqlen, 1). mask[i] = 0.0 if x[i] is a
padding.
seqlen: sequence length.
mmsi: vessel's MMSI.
time_start: timestamp of the starting time of the trajectory.
"""
V = self.l_data[idx]
m_v = V["traj"][:,:4] # lat, lon, sog, cog
m_v[m_v==1] = 0.9999
seqlen = min(len(m_v), self.max_seqlen)
seq = np.zeros((self.max_seqlen,4))
# lat and lon
seq[:seqlen,:2] = m_v[:seqlen,:2]
# dlat and dlon
dpos = (m_v[1:,:2]-m_v[:-1,:2]+self.dpos_max )/(2*self.dpos_max )
dpos = np.concatenate((dpos[:1,:],dpos),axis=0)
dpos[dpos>=1] = 0.9999
dpos[dpos<=0] = 0.0
seq[:seqlen,2:] = dpos[:seqlen,:2]
# convert to Tensor
seq = torch.tensor(seq, dtype=torch.float32)
mask = torch.zeros(self.max_seqlen)
mask[:seqlen] = 1.
seqlen = torch.tensor(seqlen, dtype=torch.int)
mmsi = torch.tensor(V["mmsi"], dtype=torch.int)
time_start = torch.tensor(V["traj"][0,4], dtype=torch.int)
return seq , mask, seqlen, mmsi, time_start