-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdataloader.py
166 lines (133 loc) · 5.55 KB
/
dataloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import torch
from torch.utils.data import Dataset
from pathlib import Path
import numpy as np
import abc
from scipy.spatial.transform import Rotation as R
from PIL import Image
import torchvision
import json
class BaseStickDataset(Dataset, abc.ABC):
def __init__(self, traj_path, time_skip, time_offset, time_trim):
super().__init__()
self.traj_path = Path(traj_path)
self.time_skip = time_skip
self.time_offset = time_offset
self.time_trim = time_trim
self.img_pth = self.traj_path / "images"
self.depth_pth = self.traj_path / "depths"
self.conf_pth = self.traj_path / "confs"
self.labels_pth = self.traj_path / "labels.json"
self.labels = json.load(self.labels_pth.open("r"))
self.img_keys = sorted(self.labels.keys())
# lable structure: {image_name: {'xyz' : [x,y,z], 'rpy' : [r, p, y], 'gripper': gripper}, ...}
self.labels = np.array(
[self.flatten_label(self.labels[k]) for k in self.img_keys]
)
# filter using time_skip and time_offset and time_trim. start from time_offset, skip time_skip, and remove last time_trim
self.labels = self.labels[: -self.time_trim][self.time_offset :: self.time_skip]
# filter keys using time_skip and time_offset and time_trim. start from time_offset, skip time_skip, and remove last time_trim
self.img_keys = self.img_keys[: -self.time_trim][
self.time_offset :: self.time_skip
]
def flatten_label(self, label):
# flatten label
xyz = label["xyz"]
rpy = label["rpy"]
gripper = label["gripper"]
return np.concatenate((xyz, rpy, np.array([gripper])))
def __len__(self):
return len(self.img_keys)
def __getitem__(self, idx):
# not implemented
raise NotImplementedError
class StickDataset(BaseStickDataset, abc.ABC):
def __init__(self, traj_path, time_skip, time_offset, time_trim):
super().__init__(traj_path, time_skip, time_offset, time_trim)
self.reformat_labels(self.labels)
self.act_metrics = None
def set_act_metrics(self, act_metrics):
self.act_metrics = act_metrics
def reformat_labels(self, labels):
# reformat labels to be delta xyz, delta rpy, next gripper state
new_labels = np.zeros_like(labels)
new_img_keys = []
for i in range(len(labels) - 1):
if i == 0:
current_label = labels[i]
next_label = labels[i + 1]
else:
next_label = labels[i + 1]
current_matrix = np.eye(4)
r = R.from_euler("xyz", current_label[3:6], degrees=False)
current_matrix[:3, :3] = r.as_matrix()
current_matrix[:3, 3] = current_label[:3]
next_matrix = np.eye(4)
r = R.from_euler("xyz", next_label[3:6], degrees=False)
next_matrix[:3, :3] = r.as_matrix()
next_matrix[:3, 3] = next_label[:3]
delta_matrix = np.linalg.inv(current_matrix) @ next_matrix
delta_xyz = delta_matrix[:3, 3]
delta_r = R.from_matrix(delta_matrix[:3, :3])
delta_rpy = delta_r.as_euler("xyz", degrees=False)
del_gripper = current_label[6] - next_label[6]
xyz_norm = np.linalg.norm(delta_xyz)
rpy_norm = np.linalg.norm(delta_r.as_rotvec())
if xyz_norm < 0.01 and rpy_norm < 0.008 and abs(del_gripper) < 0.05:
# drop this label and corresponding image_key since the delta is too small (basically the same image)
continue
new_labels[i] = np.concatenate(
(delta_xyz, delta_rpy, np.array([del_gripper]))
)
new_img_keys.append(self.img_keys[i])
current_label = next_label
# remove labels with all 0s
new_labels = new_labels[new_labels.sum(axis=1) != 0]
assert len(new_labels) == len(new_img_keys)
self.labels = new_labels
self.img_keys = new_img_keys
def load_labels(self, idx):
# load labels with window size of traj_len, starting from idx and moving window by traj_skip
labels = self.labels
return labels
def __len__(self):
return 1
def __getitem__(self, idx):
if idx < 0 or idx >= len(self):
raise IndexError()
return None, self.load_labels(idx)
class ImageStickDataset(StickDataset):
def __init__(
self,
traj_path,
time_skip,
time_offset,
time_trim,
img_size,
pre_load=False,
transforms=None,
):
super().__init__(traj_path, time_skip, time_offset, time_trim)
self.img_size = img_size
self.pre_load = pre_load
self.transforms = transforms
preprocess_transforms = [torchvision.transforms.ToTensor()]
if img_size is not None:
preprocess_transforms = [
torchvision.transforms.Resize(img_size)
] + preprocess_transforms
self.preprocess_img_transforms = torchvision.transforms.Compose(
preprocess_transforms
)
def __getitem__(self, idx):
_, labels = super().__getitem__(idx)
imgs = []
for key in self.img_keys:
img = Image.open(str(self.img_pth / key))
img = self.preprocess_img_transforms(img)
imgs.append(torch.moveaxis(img, 0, -1))
# add a nex axis at the beginning
imgs = torch.stack(imgs, dim=0)
if self.transforms:
imgs = self.transforms(imgs)
return imgs, labels