OpenVDB  10.0.1
CudaDeviceBuffer.h
Go to the documentation of this file.
1 // Copyright Contributors to the OpenVDB Project
2 // SPDX-License-Identifier: MPL-2.0
3 
4 /*!
5  \file CudaDeviceBuffer.h
6 
7  \author Ken Museth
8 
9  \date January 8, 2020
10 
11  \brief Implements a simple CUDA allocator!
12 
13  CudaDeviceBuffer - a class for simple cuda buffer allocation and management
14 */
15 
16 #ifndef NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED
17 #define NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED
18 
19 #include "HostBuffer.h" // for BufferTraits
20 
21 #include <cuda_runtime_api.h> // for cudaMalloc/cudaMallocManaged/cudaFree
22 
23 #if defined(DEBUG) || defined(_DEBUG)
24  static inline void gpuAssert(cudaError_t code, const char* file, int line, bool abort = true)
25  {
26  if (code != cudaSuccess) {
27  fprintf(stderr, "CUDA Runtime Error: %s %s %d\n", cudaGetErrorString(code), file, line);
28  if (abort) exit(code);
29  }
30  }
31  static inline void ptrAssert(void* ptr, const char* msg, const char* file, int line, bool abort = true)
32  {
33  if (ptr == nullptr) {
34  fprintf(stderr, "NULL pointer error: %s %s %d\n", msg, file, line);
35  if (abort) exit(1);
36  }
37  if (uint64_t(ptr) % NANOVDB_DATA_ALIGNMENT) {
38  fprintf(stderr, "Pointer misalignment error: %s %s %d\n", msg, file, line);
39  if (abort) exit(1);
40  }
41  }
42 #else
43  static inline void gpuAssert(cudaError_t, const char*, int, bool = true){}
44  static inline void ptrAssert(void*, const char*, const char*, int, bool = true){}
45 #endif
46 
47 // Convenience function for checking CUDA runtime API results
48 // can be wrapped around any runtime API call. No-op in release builds.
49 #define cudaCheck(ans) \
50  { \
51  gpuAssert((ans), __FILE__, __LINE__); \
52  }
53 
54 #define checkPtr(ptr, msg) \
55  { \
56  ptrAssert((ptr), (msg), __FILE__, __LINE__); \
57  }
58 
59 namespace nanovdb {
60 
61 // ----------------------------> CudaDeviceBuffer <--------------------------------------
62 
63 /// @brief Simple memory buffer using un-managed pinned host memory when compiled with NVCC.
64 /// Obviously this class is making explicit used of CUDA so replace it with your own memory
65 /// allocator if you are not using CUDA.
66 /// @note While CUDA's pinned host memory allows for asynchronous memory copy between host and device
67 /// it is significantly slower then cached (un-pinned) memory on the host.
69 {
70  uint64_t mSize; // total number of bytes for the NanoVDB grid.
71  uint8_t *mCpuData, *mGpuData; // raw buffer for the NanoVDB grid.
72 
73 public:
74  CudaDeviceBuffer(uint64_t size = 0)
75  : mSize(0)
76  , mCpuData(nullptr)
77  , mGpuData(nullptr)
78  {
79  this->init(size);
80  }
81  /// @brief Disallow copy-construction
82  CudaDeviceBuffer(const CudaDeviceBuffer&) = delete;
83  /// @brief Move copy-constructor
85  : mSize(other.mSize)
86  , mCpuData(other.mCpuData)
87  , mGpuData(other.mGpuData)
88  {
89  other.mSize = 0;
90  other.mCpuData = nullptr;
91  other.mGpuData = nullptr;
92  }
93  /// @brief Disallow copy assignment operation
95  /// @brief Move copy assignment operation
97  {
98  clear();
99  mSize = other.mSize;
100  mCpuData = other.mCpuData;
101  mGpuData = other.mGpuData;
102  other.mSize = 0;
103  other.mCpuData = nullptr;
104  other.mGpuData = nullptr;
105  return *this;
106  }
107  /// @brief Destructor frees memory on both the host and device
108  ~CudaDeviceBuffer() { this->clear(); };
109 
110  void init(uint64_t size);
111 
112  // @brief Retuns a pointer to the raw memory buffer managed by this allocator.
113  ///
114  /// @warning Note that the pointer can be NULL is the allocator was not initialized!
115  uint8_t* data() const { return mCpuData; }
116  uint8_t* deviceData() const { return mGpuData; }
117 
118  /// @brief Copy grid from the CPU/host to the GPU/device. If @c sync is false the memory copy is asynchronous!
119  ///
120  /// @note This will allocate memory on the GPU/device if it is not already allocated
121  void deviceUpload(void* stream = 0, bool sync = true) const;
122 
123  /// @brief Copy grid from the GPU/device to the CPU/host. If @c sync is false the memory copy is asynchronous!
124  void deviceDownload(void* stream = 0, bool sync = true) const;
125 
126  /// @brief Returns the size in bytes of the raw memory buffer managed by this allocator.
127  uint64_t size() const { return mSize; }
128 
129  /// @brief Returns true if this allocator is empty, i.e. has no allocated memory
130  bool empty() const { return mSize == 0; }
131 
132  /// @brief De-allocate all memory managed by this allocator and set all pointer to NULL
133  void clear();
134 
135  static CudaDeviceBuffer create(uint64_t size, const CudaDeviceBuffer* context = nullptr);
136 
137 }; // CudaDeviceBuffer class
138 
139 template<>
141 {
142  static const bool hasDeviceDual = true;
143 };
144 
145 // --------------------------> Implementations below <------------------------------------
146 
148 {
149  return CudaDeviceBuffer(size);
150 }
151 
152 inline void CudaDeviceBuffer::init(uint64_t size)
153 {
154  if (size == mSize)
155  return;
156  if (mSize > 0)
157  this->clear();
158  if (size == 0)
159  return;
160  mSize = size;
161  cudaCheck(cudaMallocHost((void**)&mCpuData, size)); // un-managed pinned memory on the host (can be slow to access!). Always 32B aligned
162  checkPtr(mCpuData, "failed to allocate host data");
163 } // CudaDeviceBuffer::init
164 
165 inline void CudaDeviceBuffer::deviceUpload(void* stream, bool sync) const
166 {
167  checkPtr(mCpuData, "uninitialized cpu data");
168  if (mGpuData == nullptr)
169  cudaCheck(cudaMalloc((void**)&mGpuData, mSize)); // un-managed memory on the device, always 32B aligned!
170  checkPtr(mGpuData, "uninitialized gpu data");
171  cudaCheck(cudaMemcpyAsync(mGpuData, mCpuData, mSize, cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream)));
172  if (sync)
173  cudaCheck(cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream)));
174 } // CudaDeviceBuffer::gpuUpload
175 
176 inline void CudaDeviceBuffer::deviceDownload(void* stream, bool sync) const
177 {
178  checkPtr(mCpuData, "uninitialized cpu data");
179  checkPtr(mGpuData, "uninitialized gpu data");
180  cudaCheck(cudaMemcpyAsync(mCpuData, mGpuData, mSize, cudaMemcpyDeviceToHost, reinterpret_cast<cudaStream_t>(stream)));
181  if (sync)
182  cudaCheck(cudaStreamSynchronize(reinterpret_cast<cudaStream_t>(stream)));
183 } // CudaDeviceBuffer::gpuDownload
184 
186 {
187  if (mGpuData)
188  cudaCheck(cudaFree(mGpuData));
189  if (mCpuData)
190  cudaCheck(cudaFreeHost(mCpuData));
191  mCpuData = mGpuData = nullptr;
192  mSize = 0;
193 } // CudaDeviceBuffer::clear
194 
195 } // namespace nanovdb
196 
197 #endif // end of NANOVDB_CUDA_DEVICE_BUFFER_H_HAS_BEEN_INCLUDED
static void gpuAssert(cudaError_t, const char *, int, bool=true)
Definition: CudaDeviceBuffer.h:43
void deviceDownload(void *stream=0, bool sync=true) const
Copy grid from the GPU/device to the CPU/host. If sync is false the memory copy is asynchronous! ...
Definition: CudaDeviceBuffer.h:176
Definition: HostBuffer.h:99
HostBuffer - a buffer that contains a shared or private bump pool to either externally or internally ...
void init(uint64_t size)
Definition: CudaDeviceBuffer.h:152
static CudaDeviceBuffer create(uint64_t size, const CudaDeviceBuffer *context=nullptr)
Definition: CudaDeviceBuffer.h:147
uint64_t size() const
Returns the size in bytes of the raw memory buffer managed by this allocator.
Definition: CudaDeviceBuffer.h:127
#define NANOVDB_DATA_ALIGNMENT
Definition: NanoVDB.h:137
#define checkPtr(ptr, msg)
Definition: CudaDeviceBuffer.h:54
bool empty() const
Returns true if this allocator is empty, i.e. has no allocated memory.
Definition: CudaDeviceBuffer.h:130
Definition: NanoVDB.h:208
CudaDeviceBuffer & operator=(const CudaDeviceBuffer &)=delete
Disallow copy assignment operation.
CudaDeviceBuffer(uint64_t size=0)
Definition: CudaDeviceBuffer.h:74
CudaDeviceBuffer(CudaDeviceBuffer &&other) noexcept
Move copy-constructor.
Definition: CudaDeviceBuffer.h:84
uint8_t * deviceData() const
Definition: CudaDeviceBuffer.h:116
~CudaDeviceBuffer()
Destructor frees memory on both the host and device.
Definition: CudaDeviceBuffer.h:108
Simple memory buffer using un-managed pinned host memory when compiled with NVCC. Obviously this clas...
Definition: CudaDeviceBuffer.h:68
static const bool hasDeviceDual
Definition: HostBuffer.h:101
static void ptrAssert(void *, const char *, const char *, int, bool=true)
Definition: CudaDeviceBuffer.h:44
CudaDeviceBuffer & operator=(CudaDeviceBuffer &&other) noexcept
Move copy assignment operation.
Definition: CudaDeviceBuffer.h:96
#define cudaCheck(ans)
Definition: CudaDeviceBuffer.h:49
uint8_t * data() const
Definition: CudaDeviceBuffer.h:115
void deviceUpload(void *stream=0, bool sync=true) const
Copy grid from the CPU/host to the GPU/device. If sync is false the memory copy is asynchronous! ...
Definition: CudaDeviceBuffer.h:165
void clear()
De-allocate all memory managed by this allocator and set all pointer to NULL.
Definition: CudaDeviceBuffer.h:185