forked from cms-patatrack/pixeltrack-standalone
-
Notifications
You must be signed in to change notification settings - Fork 0
/
HeterogeneousSoA.h
187 lines (144 loc) · 5.53 KB
/
HeterogeneousSoA.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#ifndef CUDADataFormatsCommonHeterogeneousSoA_H
#define CUDADataFormatsCommonHeterogeneousSoA_H
#include <cassert>
#include "CUDACore/copyAsync.h"
#include "CUDACore/cudaCheck.h"
#include "CUDACore/device_unique_ptr.h"
#include "CUDACore/host_unique_ptr.h"
// a heterogeneous unique pointer...
template <typename T>
class HeterogeneousSoA {
public:
using Product = T;
HeterogeneousSoA() = default; // make root happy
~HeterogeneousSoA() = default;
HeterogeneousSoA(HeterogeneousSoA &&) = default;
HeterogeneousSoA &operator=(HeterogeneousSoA &&) = default;
explicit HeterogeneousSoA(cms::cuda::device::unique_ptr<T> &&p) : dm_ptr(std::move(p)) {}
explicit HeterogeneousSoA(cms::cuda::host::unique_ptr<T> &&p) : hm_ptr(std::move(p)) {}
explicit HeterogeneousSoA(std::unique_ptr<T> &&p) : std_ptr(std::move(p)) {}
auto const *get() const { return dm_ptr ? dm_ptr.get() : (hm_ptr ? hm_ptr.get() : std_ptr.get()); }
auto const &operator*() const { return *get(); }
auto const *operator-> () const { return get(); }
auto *get() { return dm_ptr ? dm_ptr.get() : (hm_ptr ? hm_ptr.get() : std_ptr.get()); }
auto &operator*() { return *get(); }
auto *operator-> () { return get(); }
// in reality valid only for GPU version...
cms::cuda::host::unique_ptr<T> toHostAsync(cudaStream_t stream) const {
assert(dm_ptr);
auto ret = cms::cuda::make_host_unique<T>(stream);
cudaCheck(cudaMemcpyAsync(ret.get(), dm_ptr.get(), sizeof(T), cudaMemcpyDefault, stream));
return ret;
}
private:
// a union wan't do it, a variant will not be more efficienct
cms::cuda::device::unique_ptr<T> dm_ptr; //!
cms::cuda::host::unique_ptr<T> hm_ptr; //!
std::unique_ptr<T> std_ptr; //!
};
namespace cudaCompat {
struct GPUTraits {
template <typename T>
using unique_ptr = cms::cuda::device::unique_ptr<T>;
template <typename T>
static auto make_unique(cudaStream_t stream) {
return cms::cuda::make_device_unique<T>(stream);
}
template <typename T>
static auto make_unique(size_t size, cudaStream_t stream) {
return cms::cuda::make_device_unique<T>(size, stream);
}
template <typename T>
static auto make_host_unique(cudaStream_t stream) {
return cms::cuda::make_host_unique<T>(stream);
}
template <typename T>
static auto make_device_unique(cudaStream_t stream) {
return cms::cuda::make_device_unique<T>(stream);
}
template <typename T>
static auto make_device_unique(size_t size, cudaStream_t stream) {
return cms::cuda::make_device_unique<T>(size, stream);
}
};
struct HostTraits {
template <typename T>
using unique_ptr = cms::cuda::host::unique_ptr<T>;
template <typename T>
static auto make_unique(cudaStream_t stream) {
return cms::cuda::make_host_unique<T>(stream);
}
template <typename T>
static auto make_host_unique(cudaStream_t stream) {
return cms::cuda::make_host_unique<T>(stream);
}
template <typename T>
static auto make_device_unique(cudaStream_t stream) {
return cms::cuda::make_device_unique<T>(stream);
}
template <typename T>
static auto make_device_unique(size_t size, cudaStream_t stream) {
return cms::cuda::make_device_unique<T>(size, stream);
}
};
struct CPUTraits {
template <typename T>
using unique_ptr = std::unique_ptr<T>;
template <typename T>
static auto make_unique(cudaStream_t) {
return std::make_unique<T>();
}
template <typename T>
static auto make_unique(size_t size, cudaStream_t) {
return std::make_unique<T>(size);
}
template <typename T>
static auto make_host_unique(cudaStream_t) {
return std::make_unique<T>();
}
template <typename T>
static auto make_device_unique(cudaStream_t) {
return std::make_unique<T>();
}
template <typename T>
static auto make_device_unique(size_t size, cudaStream_t) {
return std::make_unique<T>(size);
}
};
} // namespace cudaCompat
// a heterogeneous unique pointer (of a different sort) ...
template <typename T, typename Traits>
class HeterogeneousSoAImpl {
public:
template <typename V>
using unique_ptr = typename Traits::template unique_ptr<V>;
HeterogeneousSoAImpl() = default; // make root happy
~HeterogeneousSoAImpl() = default;
HeterogeneousSoAImpl(HeterogeneousSoAImpl &&) = default;
HeterogeneousSoAImpl &operator=(HeterogeneousSoAImpl &&) = default;
explicit HeterogeneousSoAImpl(unique_ptr<T> &&p) : m_ptr(std::move(p)) {}
explicit HeterogeneousSoAImpl(cudaStream_t stream);
T const *get() const { return m_ptr.get(); }
T *get() { return m_ptr.get(); }
cms::cuda::host::unique_ptr<T> toHostAsync(cudaStream_t stream) const;
private:
unique_ptr<T> m_ptr; //!
};
template <typename T, typename Traits>
HeterogeneousSoAImpl<T, Traits>::HeterogeneousSoAImpl(cudaStream_t stream) {
m_ptr = Traits::template make_unique<T>(stream);
}
// in reality valid only for GPU version...
template <typename T, typename Traits>
cms::cuda::host::unique_ptr<T> HeterogeneousSoAImpl<T, Traits>::toHostAsync(cudaStream_t stream) const {
auto ret = cms::cuda::make_host_unique<T>(stream);
cudaCheck(cudaMemcpyAsync(ret.get(), get(), sizeof(T), cudaMemcpyDefault, stream));
return ret;
}
template <typename T>
using HeterogeneousSoAGPU = HeterogeneousSoAImpl<T, cudaCompat::GPUTraits>;
template <typename T>
using HeterogeneousSoACPU = HeterogeneousSoAImpl<T, cudaCompat::CPUTraits>;
template <typename T>
using HeterogeneousSoAHost = HeterogeneousSoAImpl<T, cudaCompat::HostTraits>;
#endif