-
Notifications
You must be signed in to change notification settings - Fork 10
/
MemoryBlock.h
267 lines (227 loc) · 6.36 KB
/
MemoryBlock.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
// Copyright 2014-2015 Isis Innovation Limited and the authors of InfiniTAM
#pragma once
#include "PlatformIndependence.h"
#ifndef COMPILE_WITHOUT_CUDA
#include "CUDADefines.h"
#endif
#ifndef __METALC__
#ifdef COMPILE_WITH_METAL
#include "MetalContext.h"
#endif
#include <stdlib.h>
#include <string.h>
#endif
#ifndef MEMORY_DEVICE_TYPE
#define MEMORY_DEVICE_TYPE
enum MemoryDeviceType { MEMORYDEVICE_CPU, MEMORYDEVICE_CUDA };
#endif
namespace ORUtils
{
/** \brief
Represents memory blocks, templated on the data type
*/
template <typename T>
class MemoryBlock
{
protected:
#ifndef __METALC__
bool isAllocated_CPU, isAllocated_CUDA, isMetalCompatible;
#endif
/** Pointer to memory on CPU host. */
DEVICEPTR(T)* data_cpu;
/** Pointer to memory on GPU, if available. */
DEVICEPTR(T)* data_cuda;
#ifndef __METALC__
#ifdef COMPILE_WITH_METAL
void *data_metalBuffer;
#endif
#endif
public:
enum MemoryCopyDirection { CPU_TO_CPU, CPU_TO_CUDA, CUDA_TO_CPU, CUDA_TO_CUDA };
/** Total number of allocated entries in the data array. */
size_t dataSize;
/** Get the data pointer on CPU or GPU. */
inline DEVICEPTR(T)* GetData(MemoryDeviceType memoryType)
{
switch (memoryType)
{
case MEMORYDEVICE_CPU: return data_cpu;
case MEMORYDEVICE_CUDA: return data_cuda;
}
return 0;
}
/** Get the data pointer on CPU or GPU. */
inline const DEVICEPTR(T)* GetData(MemoryDeviceType memoryType) const
{
switch (memoryType)
{
case MEMORYDEVICE_CPU: return data_cpu;
case MEMORYDEVICE_CUDA: return data_cuda;
}
return 0;
}
#ifndef __METALC__
#ifdef COMPILE_WITH_METAL
inline const void *GetMetalBuffer() const { return data_metalBuffer; }
#endif
/** Initialize an empty memory block of the given size,
on CPU only or GPU only or on both. CPU might also use the
Metal compatible allocator (i.e. with 16384 alignment).
*/
MemoryBlock(size_t dataSize, bool allocate_CPU, bool allocate_CUDA, bool metalCompatible = true)
{
this->isAllocated_CPU = false;
this->isAllocated_CUDA = false;
this->isMetalCompatible = false;
Allocate(dataSize, allocate_CPU, allocate_CUDA, metalCompatible);
Clear();
}
/** Initialize an empty memory block of the given size, either
on CPU only or on GPU only. CPU will be Metal compatible if Metal
is enabled.
*/
MemoryBlock(size_t dataSize, MemoryDeviceType memoryType)
{
this->isAllocated_CPU = false;
this->isAllocated_CUDA = false;
this->isMetalCompatible = false;
switch (memoryType)
{
case MEMORYDEVICE_CPU: Allocate(dataSize, true, false, true); break;
case MEMORYDEVICE_CUDA: Allocate(dataSize, false, true, true); break;
}
Clear();
}
/** Set all image data to the given @p defaultValue. */
void Clear(unsigned char defaultValue = 0)
{
if (isAllocated_CPU) memset(data_cpu, defaultValue, dataSize * sizeof(T));
#ifndef COMPILE_WITHOUT_CUDA
if (isAllocated_CUDA) ORcudaSafeCall(cudaMemset(data_cuda, defaultValue, dataSize * sizeof(T)));
#endif
}
/** Transfer data from CPU to GPU, if possible. */
void UpdateDeviceFromHost() const {
#ifndef COMPILE_WITHOUT_CUDA
if (isAllocated_CUDA && isAllocated_CPU)
ORcudaSafeCall(cudaMemcpy(data_cuda, data_cpu, dataSize * sizeof(T), cudaMemcpyHostToDevice));
#endif
}
/** Transfer data from GPU to CPU, if possible. */
void UpdateHostFromDevice() const {
#ifndef COMPILE_WITHOUT_CUDA
if (isAllocated_CUDA && isAllocated_CPU)
ORcudaSafeCall(cudaMemcpy(data_cpu, data_cuda, dataSize * sizeof(T), cudaMemcpyDeviceToHost));
#endif
}
/** Copy data */
void SetFrom(const MemoryBlock<T> *source, MemoryCopyDirection memoryCopyDirection)
{
switch (memoryCopyDirection)
{
case CPU_TO_CPU:
memcpy(this->data_cpu, source->data_cpu, source->dataSize * sizeof(T));
break;
#ifndef COMPILE_WITHOUT_CUDA
case CPU_TO_CUDA:
ORcudaSafeCall(cudaMemcpyAsync(this->data_cuda, source->data_cpu, source->dataSize * sizeof(T), cudaMemcpyHostToDevice));
break;
case CUDA_TO_CPU:
ORcudaSafeCall(cudaMemcpy(this->data_cpu, source->data_cuda, source->dataSize * sizeof(T), cudaMemcpyDeviceToHost));
break;
case CUDA_TO_CUDA:
ORcudaSafeCall(cudaMemcpyAsync(this->data_cuda, source->data_cuda, source->dataSize * sizeof(T), cudaMemcpyDeviceToDevice));
break;
#endif
default: break;
}
}
virtual ~MemoryBlock() { this->Free(); }
/** Allocate image data of the specified size. If the
data has been allocated before, the data is freed.
*/
void Allocate(size_t dataSize, bool allocate_CPU, bool allocate_CUDA, bool metalCompatible)
{
Free();
this->dataSize = dataSize;
if (dataSize == 0) return;
if (allocate_CPU)
{
int allocType = 0;
#ifndef COMPILE_WITHOUT_CUDA
if (allocate_CUDA) allocType = 1;
#endif
#ifdef COMPILE_WITH_METAL
if (metalCompatible) allocType = 2;
#endif
switch (allocType)
{
case 0:
data_cpu = new T[dataSize];
break;
case 1:
#ifndef COMPILE_WITHOUT_CUDA
ORcudaSafeCall(cudaMallocHost((void**)&data_cpu, dataSize * sizeof(T)));
#endif
break;
case 2:
#ifdef COMPILE_WITH_METAL
allocateMetalData((void**)&data_cpu, (void**)&data_metalBuffer, dataSize * sizeof(T), true);
#endif
break;
}
this->isAllocated_CPU = allocate_CPU;
this->isMetalCompatible = metalCompatible;
}
if (allocate_CUDA)
{
#ifndef COMPILE_WITHOUT_CUDA
ORcudaSafeCall(cudaMalloc((void**)&data_cuda, dataSize * sizeof(T)));
this->isAllocated_CUDA = allocate_CUDA;
#endif
}
}
void Free()
{
if (isAllocated_CPU)
{
int allocType = 0;
#ifndef COMPILE_WITHOUT_CUDA
if (isAllocated_CUDA) allocType = 1;
#endif
#ifdef COMPILE_WITH_METAL
if (isMetalCompatible) allocType = 2;
#endif
switch (allocType)
{
case 0:
delete[] data_cpu;
break;
case 1:
#ifndef COMPILE_WITHOUT_CUDA
ORcudaSafeCall(cudaFreeHost(data_cpu));
#endif
break;
case 2:
#ifdef COMPILE_WITH_METAL
freeMetalData((void**)&data_cpu, (void**)&data_metalBuffer, dataSize * sizeof(T), true);
#endif
break;
}
isMetalCompatible = false;
isAllocated_CPU = false;
}
if (isAllocated_CUDA)
{
#ifndef COMPILE_WITHOUT_CUDA
ORcudaSafeCall(cudaFree(data_cuda));
#endif
isAllocated_CUDA = false;
}
}
// Suppress the default copy constructor and assignment operator
MemoryBlock(const MemoryBlock&);
MemoryBlock& operator=(const MemoryBlock&);
#endif
};
}