-
Notifications
You must be signed in to change notification settings - Fork 4
/
mjbatchrenderer.pyx
301 lines (244 loc) · 10.7 KB
/
mjbatchrenderer.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
try:
import pycuda.driver as drv
except ImportError:
drv = None
class MjBatchRendererException(Exception):
pass
class MjBatchRendererNotSupported(MjBatchRendererException):
pass
class CudaNotEnabledError(MjBatchRendererException):
pass
class CudaBufferNotMappedError(MjBatchRendererException):
pass
class CudaBufferMappedError(MjBatchRendererException):
pass
class MjBatchRenderer(object):
"""
Utility class for rendering into OpenGL Pixel Buffer Objects (PBOs),
which allows for accessing multiple rendered images in batch.
If used with CUDA (i.e. initialized with use_cuda=True), you need
to call map/unmap when accessing CUDA buffer pointer. This is to
ensure that all OpenGL instructions have completed:
renderer = MjBatchRenderer(100, 100, use_cuda=True)
renderer.render(sim)
renderer.map()
image = renderer.read()
renderer.unmap()
"""
def __init__(self, width, height, batch_size=1, device_id=0,
depth=False, use_cuda=False):
"""
Args:
- width (int): Image width.
- height (int): Image height.
- batch_size (int): Size of batch to render into. Memory is
allocated once upon initialization of object.
- device_id (int): Device to use for storing the batch.
- depth (bool): if True, render depth in addition to RGB.
- use_cuda (bool): if True, use OpenGL-CUDA interop to map
the PBO onto a CUDA buffer.
"""
# Early initialization to prevent failure in __del__
self._use_cuda = False
self.pbo_depth, self.pbo_depth = 0, 0
if not usingEGL():
raise MjBatchRendererNotSupported(
"MjBatchRenderer currently only supported with EGL-backed"
"rendering context.")
# Make sure OpenGL Context is available before creating PBOs
initOpenGL(device_id)
makeOpenGLContextCurrent(device_id)
self.pbo_rgb = createPBO(width, height, batch_size, 0)
self.pbo_depth = createPBO(width, height, batch_size, 1) if depth else 0
self._depth = depth
self._device_id = device_id
self._width = width
self._height = height
self._batch_size = batch_size
self._current_batch_offset = 0
self._use_cuda = use_cuda
self._cuda_buffers_are_mapped = False
self._cuda_rgb_ptr, self._cuda_depth_ptr = None, None
if use_cuda:
self._init_cuda()
def _init_cuda(self):
if drv is None:
raise ImportError("Failed to import pycuda.")
# Use local imports so that we don't have to make pycuda
# opengl interop a requirement
from pycuda.gl import RegisteredBuffer
drv.init()
device = drv.Device(self._device_id)
self._cuda_context = device.make_context()
self._cuda_context.push()
self._cuda_rgb_pbo = RegisteredBuffer(self.pbo_rgb)
if self._depth:
self._cuda_depth_pbo = RegisteredBuffer(self.pbo_depth)
def map(self):
""" Map OpenGL buffer to CUDA for reading. """
if not self._use_cuda:
raise CudaNotEnabledError()
elif self._cuda_buffers_are_mapped:
return # just make it a no-op
self._cuda_context.push()
self._cuda_rgb_mapping = self._cuda_rgb_pbo.map()
ptr, self._cuda_rgb_buf_size = (
self._cuda_rgb_mapping.device_ptr_and_size())
assert ptr is not None and self._cuda_rgb_buf_size > 0
if self._cuda_rgb_ptr is None:
self._cuda_rgb_ptr = ptr
# There doesn't seem to be a guarantee from the API that the
# pointer will be the same between mappings, but empirically
# this has been true. If this isn't true, we need to modify
# the interface to MjBatchRenderer to make this clearer to user.
# So, hopefully we won't hit this assert.
assert self._cuda_rgb_ptr == ptr, (
"Mapped CUDA rgb buffer pointer %d doesn't match old pointer %d" %
(ptr, self._cuda_rgb_ptr))
if self._depth:
self._cuda_depth_mapping = self._cuda_depth_pbo.map()
ptr, self._cuda_depth_buf_size = (
self._cuda_depth_mapping.device_ptr_and_size())
assert ptr is not None and self._cuda_depth_buf_size > 0
if self._cuda_depth_ptr is None:
self._cuda_depth_ptr = ptr
assert self._cuda_depth_ptr == ptr, (
"Mapped CUDA depth buffer pointer %d doesn't match old pointer %d" %
(ptr, self._cuda_depth_ptr))
self._cuda_buffers_are_mapped = True
def unmap(self):
""" Unmap OpenGL buffer from CUDA so that it can be rendered into. """
if not self._use_cuda:
raise CudaNotEnabledError()
elif not self._cuda_buffers_are_mapped:
return # just make it a no-op
self._cuda_context.push()
self._cuda_rgb_mapping.unmap()
self._cuda_rgb_mapping = None
self._cuda_rgb_ptr = None
if self._depth:
self._cuda_depth_mapping.unmap()
self._cuda_depth_mapping = None
self._cuda_depth_ptr = None
self._cuda_buffers_are_mapped = False
def prepare_render_context(self, sim):
"""
Set up the rendering context for an MjSim. Also happens automatically
on `.render()`.
"""
for c in sim.render_contexts:
if (c.offscreen and
isinstance(c.opengl_context, OffscreenOpenGLContext) and
c.opengl_context.device_id == self._device_id):
return c
return MjRenderContext(sim, device_id=self._device_id)
def render(self, sim, camera_id=None, batch_offset=None):
"""
Render current scene from the MjSim into the buffer. By
default the batch offset is automatically incremented with
each call. It can be reset with the batch_offset parameter.
This method doesn't return anything. Use the `.read` method
to read the buffer, or access the buffer pointer directly with
e.g. `.cuda_rgb_buffer_pointer` accessor.
Args:
- sim (MjSim): The simulator to use for rendering.
- camera_id (int): MuJoCo id for the camera, from
`sim.model.camera_name2id()`.
- batch_offset (int): offset in batch to render to.
"""
if self._use_cuda and self._cuda_buffers_are_mapped:
raise CudaBufferMappedError(
"CUDA buffers must be unmapped before calling render.")
if batch_offset is not None:
if batch_offset < 0 or batch_offset >= self._batch_size:
raise ValueError("batch_offset out of range")
self._current_batch_offset = batch_offset
# Ensure the correct device context is used (this takes ~1 µs)
makeOpenGLContextCurrent(self._device_id)
render_context = self.prepare_render_context(sim)
render_context.update_offscreen_size(self._width, self._height)
render_context.render(self._width, self._height, camera_id=camera_id)
cdef mjrRect viewport
viewport.left = 0
viewport.bottom = 0
viewport.width = self._width
viewport.height = self._height
cdef PyMjrContext con = <PyMjrContext> render_context.con
copyFBOToPBO(con.ptr, self.pbo_rgb, self.pbo_depth,
viewport, self._current_batch_offset)
self._current_batch_offset = (self._current_batch_offset + 1) % self._batch_size
def read(self):
"""
Transfer a copy of the buffer from the GPU to the CPU as a numpy array.
Returns:
- rgb_batch (numpy array): batch of rgb images in uint8 NHWC format
- depth_batch (numpy array): batch of depth images in uint16 NHWC format
"""
if self._use_cuda:
return self._read_cuda()
else:
return self._read_nocuda()
def _read_cuda(self):
if not self._cuda_buffers_are_mapped:
raise CudaBufferNotMappedError(
"CUDA buffers must be mapped before reading")
rgb_arr = drv.from_device(
self._cuda_rgb_ptr,
shape=(self._batch_size, self._height, self._width, 3),
dtype=np.uint8)
if self._depth:
depth_arr = drv.from_device(
self._cuda_depth_ptr,
shape=(self._batch_size, self._height, self._width),
dtype=np.uint16)
else:
depth_arr = None
return rgb_arr, depth_arr
def _read_nocuda(self):
rgb_arr = np.zeros(3 * self._width * self._height * self._batch_size, dtype=np.uint8)
cdef unsigned char[::view.contiguous] rgb_view = rgb_arr
depth_arr = np.zeros(self._width * self._height * self._batch_size, dtype=np.uint16)
cdef unsigned short[::view.contiguous] depth_view = depth_arr
if self._depth:
readPBO(&rgb_view[0], &depth_view[0], self.pbo_rgb, self.pbo_depth,
self._width, self._height, self._batch_size)
depth_arr = depth_arr.reshape(self._batch_size, self._height, self._width)
else:
readPBO(&rgb_view[0], NULL, self.pbo_rgb, 0,
self._width, self._height, self._batch_size)
# Fine to throw aray depth_arr above since malloc/free is cheap
depth_arr = None
rgb_arr = rgb_arr.reshape(self._batch_size, self._height, self._width, 3)
return rgb_arr, depth_arr
@property
def cuda_rgb_buffer_pointer(self):
""" Pointer to CUDA buffer for RGB batch. """
if not self._use_cuda:
raise CudaNotEnabledError()
elif not self._cuda_buffers_are_mapped:
raise CudaBufferNotMappedError()
return self._cuda_rgb_ptr
@property
def cuda_depth_buffer_pointer(self):
""" Pointer to CUDA buffer for depth batch. """
if not self._use_cuda:
raise CudaNotEnabledError()
elif not self._cuda_buffers_are_mapped:
raise CudaBufferNotMappedError()
if not self._depth:
raise RuntimeError("Depth not enabled. Use depth=True on initialization.")
return self._cuda_depth_ptr
def __del__(self):
if self._use_cuda:
self._cuda_context.push()
self.unmap()
self._cuda_rgb_pbo.unregister()
if self._depth:
self._cuda_depth_pbo.unregister()
# Clean up context
drv.Context.pop()
self._cuda_context.detach()
if self.pbo_depth:
freePBO(self.pbo_rgb)
if self.pbo_depth:
freePBO(self.pbo_depth)