forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
profiler_cuda.cpp
65 lines (58 loc) · 1.65 KB
/
profiler_cuda.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#include <torch/csrc/autograd/profiler.h>
#include <torch/csrc/cuda/cuda_check.h>
#include <c10/cuda/CUDAGuard.h>
#include <nvToolsExt.h>
#include <sstream>
namespace torch { namespace autograd { namespace profiler {
namespace {
struct CUDAMethods : public CUDAStubs {
void record(int* device, CUDAEventStub* event, int64_t* cpu_ns) override {
TORCH_CUDA_CHECK(cudaGetDevice(device));
TORCH_CUDA_CHECK(cudaEventCreate(event));
auto stream = at::cuda::getCurrentCUDAStream();
*cpu_ns = getTime();
TORCH_CUDA_CHECK(cudaEventRecord(*event, stream));
}
float elapsed(CUDAEventStub event, CUDAEventStub event2) override {
TORCH_CUDA_CHECK(cudaEventSynchronize(event));
TORCH_CUDA_CHECK(cudaEventSynchronize(event2));
float ms;
TORCH_CUDA_CHECK(cudaEventElapsedTime(&ms, event, event2));
return ms*1000.0;
}
void nvtxMarkA(const char* name) override {
::nvtxMark(name);
}
void nvtxRangePushA(const char* name) override {
::nvtxRangePushA(name);
}
void nvtxRangePop() override {
::nvtxRangePop();
}
void onEachDevice(std::function<void(int)> op) override {
at::cuda::OptionalCUDAGuard device_guard;
int count;
TORCH_CUDA_CHECK(cudaGetDeviceCount(&count));
for(int i = 0; i < count; i++) {
device_guard.set_index(i);
op(i);
}
}
void synchronize() override {
cudaDeviceSynchronize();
}
bool enabled() override {
return true;
}
};
struct RegisterCUDAMethods {
RegisterCUDAMethods() {
static CUDAMethods methods;
registerCUDAMethods(&methods);
}
};
RegisterCUDAMethods reg;
} // namespaces
} // namespace profiler
} // namespace autograd
} // namespace torch