forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
THCCachingHostAllocator.h
33 lines (26 loc) · 1.19 KB
/
THCCachingHostAllocator.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#ifndef THC_CACHING_HOST_ALLOCATOR_INC
#define THC_CACHING_HOST_ALLOCATOR_INC
#include <THC/THCGeneral.h>
#include <c10/cuda/CUDAStream.h>
//
// A caching allocator for CUDA host allocations (pinned memory).
//
// This provides a drop-in replacement for THCudaHostAllocator, which re-uses
// freed pinned (page-locked) memory allocations. This avoids device
// synchronizations due to cudaFreeHost calls.
//
// To ensure correct behavior, THCCachingHostAllocator_recordEvent must be
// called anytime a pointer from this allocator is used in a cudaMemcpyAsync
// call between host and device. We implement this for storages and tensors in
// copy_from_cpu_async_ and copy_to_cpu_async_.
//
// Note that this allocator does not split larger allocations into smaller
// blocks, unlike the caching device allocator.
//
THC_API c10::Allocator* getTHCCachingHostAllocator(void);
// Records an event in the specified stream. The allocation 'ptr' will not be
// re-used until the event has occurred.
THC_API cudaError_t THCCachingHostAllocator_recordEvent(void *ptr, at::cuda::CUDAStream stream);
// Releases cached pinned memory allocations via cudaHostFree
THC_API void THCCachingHostAllocator_emptyCache(void);
#endif