RISA
Memory.h
Go to the documentation of this file.
1 /*
2  * This file is part of the GLADOS-library.
3  *
4  * Copyright (C) 2016 Helmholtz-Zentrum Dresden-Rossendorf
5  *
6  * GLADOS is free software: You can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * GLADOS is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with GLADOS. If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Date: 30 November 2016
20  * Authors: Tobias Frust <t.frust@hzdr.de>
21  *
22  */
23 
24 #ifndef CUDA_MEMORY_H_
25 #define CUDA_MEMORY_H_
26 
27 #include <cstddef>
28 #include <memory>
29 #include <tuple>
30 #include <type_traits>
31 #include <utility>
32 
33 #include <glados/Memory.h>
34 
35 #include "Check.h"
36 
37 namespace glados
38 {
39  namespace cuda
40  {
41  namespace detail
42  {
43  // no CHECK() here as deleters are required to never throw exceptions
44  struct device_deleter { auto operator()(void* p) -> void { cudaFree(p); }};
45  struct host_deleter { auto operator()(void* p) -> void { cudaFreeHost(p); }};
46 
47  enum class Target { Device, Host };
48 
49  template <Target Src, Target Dest> struct Direction {};
50  template <> struct Direction<Target::Host, Target::Device> { static constexpr auto value = cudaMemcpyHostToDevice; };
51  template <> struct Direction<Target::Device, Target::Host> { static constexpr auto value = cudaMemcpyDeviceToHost; };
52  template <> struct Direction<Target::Device, Target::Device> { static constexpr auto value = cudaMemcpyDeviceToDevice; };
53  template <> struct Direction<Target::Host, Target::Host> { static constexpr auto value = cudaMemcpyHostToHost; };
54 
55  template <class T, class Deleter, Target t>
56  class unique_ptr
57  {
58  public:
59  using pointer = T*;
60  using element_type = T;
61  using deleter_type = Deleter;
62  static constexpr auto target = Target{t};
63 
64  public:
65  constexpr unique_ptr() noexcept
66  : ptr_{}
67  {}
68 
69  constexpr unique_ptr(std::nullptr_t) noexcept
70  : ptr_{nullptr}
71  {}
72 
73  unique_ptr(pointer p) noexcept
74  : ptr_{p}
75  {}
76 
77  unique_ptr(unique_ptr&& other) noexcept
78  : ptr_{std::move(other.ptr_)}
79  {}
80 
81  inline auto operator=(unique_ptr&& r) noexcept -> unique_ptr&
82  {
83  ptr_ = std::move(r.ptr_);
84  return *this;
85  }
86 
87  inline auto operator=(std::nullptr_t) noexcept -> unique_ptr&
88  {
89  ptr_ = nullptr;
90  return *this;
91  }
92 
93  inline auto release() noexcept -> pointer { return ptr_.release(); }
94  inline auto reset(pointer ptr = pointer()) noexcept -> void { ptr_.reset(ptr); }
95 
96  template <class U>
97  auto reset(U) noexcept -> void = delete;
98 
99  inline auto reset(std::nullptr_t) noexcept -> void { ptr_.reset(); }
100  inline auto swap(unique_ptr& other) noexcept -> void { ptr_.swap(other.ptr_); }
101  inline auto get() const noexcept -> pointer { return ptr_.get(); }
102  inline auto get_deleter() noexcept -> Deleter& { return ptr_.get_deleter(); }
103  inline auto get_deleter() const noexcept -> const Deleter& { return ptr_.get_deleter(); }
104  explicit inline operator bool() const noexcept { return bool(ptr_); }
105  inline auto operator[](std::size_t i) const -> T& { return ptr_[i]; }
106 
107  private:
108  unique_ptr(const unique_ptr&) = delete;
109  auto operator=(const unique_ptr&) -> unique_ptr& = delete;
110 
111  private:
112  std::unique_ptr<T[], Deleter> ptr_;
113  };
116 
117  template <class T1, class D1, Target t1, class T2, class D2, Target t2>
118  inline auto operator==(const unique_ptr<T1, D1, t2>& x, const unique_ptr<T2, D2, t2>& y) noexcept -> bool
119  {
120  return x.get() == y.get();
121  }
122 
123  template <class T1, class D1, Target t1, class T2, class D2, Target t2>
124  inline auto operator!=(const unique_ptr<T1, D1, t2>& x, const unique_ptr<T2, D2, t2>& y) noexcept -> bool
125  {
126  return x.get() != y.get();
127  }
128 
129  template <class T, class D, Target t>
130  inline auto operator==(const unique_ptr<T, D, t>& x, std::nullptr_t) noexcept -> bool
131  {
132  return !x;
133  }
134 
135  template <class T, class D, Target t>
136  inline auto operator==(std::nullptr_t, const unique_ptr<T, D, t>&x) noexcept -> bool
137  {
138  return !x;
139  }
140 
141  template <class T, class D, Target t>
142  inline auto operator!=(const unique_ptr<T, D, t>& x, std::nullptr_t) noexcept -> bool
143  {
144  return static_cast<bool>(x);
145  }
146 
147  template <class T, class D, Target t>
148  inline auto operator!=(std::nullptr_t, const unique_ptr<T, D, t>&x) noexcept -> bool
149  {
150  return static_cast<bool>(x);
151  }
152  }
153 
155  {
156  protected:
157  ~sync_copy_policy() = default;
158 
159  /* 1D copies*/
160  template <class Dest, class Src>
161  inline auto copy(Dest& dest, const Src& src, std::size_t size) const -> void
162  {
163  CHECK(cudaMemcpy(dest.get(), src.get(), size,
165  }
166 
167  /* 2D copies */
168  template <class Dest, class Src>
169  inline auto copy(Dest& dest, const Src& src, std::size_t width, std::size_t height) const -> void
170  {
171  CHECK(cudaMemcpy2D(dest.get(), dest.pitch(),
172  src.get(), src.pitch(),
173  width * sizeof(typename Src::element_type), height,
175  }
176 
177  /* 3D copies */
178  template <class Dest, class Src>
179  inline auto copy(Dest& dest, const Src& src, std::size_t width, std::size_t height, std::size_t depth) const -> void
180  {
181  auto parms = cudaMemcpy3DParms{0};
182  auto uchar_width = width * sizeof(typename Src::element_type)/sizeof(unsigned char);
183  // using uchar_width instead of width because cudaMemcpy3D interprets the pointer's elements as unsigned char
184  parms.srcPtr = make_cudaPitchedPtr(reinterpret_cast<unsigned char*>(src.get()), src.pitch(), uchar_width, height);
185  parms.dstPtr = make_cudaPitchedPtr(reinterpret_cast<unsigned char*>(dest.get()), dest.pitch(), uchar_width, height);
186  parms.extent = make_cudaExtent(uchar_width, height, depth);
188 
189  CHECK(cudaMemcpy3D(&parms));
190  }
191  };
192 
194  {
195  public:
196  inline auto set_stream(cudaStream_t s) noexcept -> void
197  {
198  stream_ = s;
199  }
200 
201  inline auto stream() const noexcept -> cudaStream_t
202  {
203  return stream_;
204  }
205 
206  protected:
208  : stream_{nullptr}
209  {}
210 
211  ~async_copy_policy() = default;
212 
213  /* 1D copies */
214  template <class Dest, class Src>
215  inline auto copy(Dest& dest, const Src& src, std::size_t size) const -> void
216  {
217  CHECK(cudaMemcpyAsync(dest.get(), src.get(), size,
219  }
220 
221  /* 2D copies */
222  template <class Dest, class Src>
223  inline auto copy(Dest& dest, const Src& src, std::size_t width, std::size_t height) const -> void
224  {
225  CHECK(cudaMemcpy2DAsync(dest.get(), dest.pitch(),
226  src.get(), src.pitch(),
227  width * sizeof(typename Src::element_type), height,
229  }
230 
231  /* 3D copies */
232  template <class Dest, class Src>
233  inline auto copy(Dest& dest, const Src& src, std::size_t width, std::size_t height, std::size_t depth) const -> void
234  {
235  auto parms = cudaMemcpy3DParms{0};
236  auto uchar_width = width * sizeof(typename Src::element_type)/sizeof(unsigned char);
237  parms.srcPtr = make_cudaPitchedPtr(reinterpret_cast<unsigned char*>(src.get()), src.pitch(), uchar_width, height);
238  parms.dstPtr = make_cudaPitchedPtr(reinterpret_cast<unsigned char*>(dest.get()), dest.pitch(), uchar_width, height);
239  parms.extent = make_cudaExtent(uchar_width, height, depth);
241 
242  CHECK(cudaMemcpy3DAsync(&parms, stream_));
243  }
244 
245  private:
246  cudaStream_t stream_;
247  };
248 
249  template <class T, class CopyPolicy> using device_ptr = glados::ptr<T, CopyPolicy, detail::unique_device_ptr<T>>;
250  template <class T, class CopyPolicy> using host_ptr = glados::ptr<T, CopyPolicy, detail::unique_host_ptr<T>>;
251 
252  template <class T, class CopyPolicy, class is3D> using pitched_device_ptr = glados::pitched_ptr<T, CopyPolicy, is3D, detail::unique_device_ptr<T>>;
253  template <class T, class CopyPolicy, class is3D> using pitched_host_ptr = glados::pitched_ptr<T, CopyPolicy, is3D, detail::unique_host_ptr<T>>;
254 
255  /*
256  * Array types with unknown bounds
257  */
258  template <class T, class CopyPolicy = sync_copy_policy>
259  auto make_device_ptr(std::size_t length) -> device_ptr<T, CopyPolicy>
260  {
261  auto size = length * sizeof(T);
262  auto p = static_cast<T*>(nullptr);
263  CHECK(cudaMalloc(&p, size));
265  }
266 
267  template <class T, class CopyPolicy = sync_copy_policy>
268  auto make_host_ptr(std::size_t length) -> host_ptr<T, CopyPolicy>
269  {
270  auto p = static_cast<T*>(nullptr);
271  auto size = length * sizeof(T);
272  CHECK(cudaMallocHost(&p, size));
274  }
275 
276  template <class T, class CopyPolicy = sync_copy_policy>
277  auto make_device_ptr(std::size_t width, std::size_t height) -> pitched_device_ptr<T, CopyPolicy, std::false_type>
278  {
279  auto p = static_cast<T*>(nullptr);
280  auto pitch = std::size_t{};
281  CHECK(cudaMallocPitch(&p, &pitch, width * sizeof(T), height));
283  }
284 
285  template <class T, class CopyPolicy = sync_copy_policy>
286  auto make_host_ptr(std::size_t width, std::size_t height) -> pitched_host_ptr<T, CopyPolicy, std::false_type>
287  {
288  auto p = static_cast<T*>(nullptr);
289  auto pitch = width * sizeof(T);
290  CHECK(cudaMallocHost(&p, pitch * height));
292  }
293 
294  template <class T, class CopyPolicy = sync_copy_policy>
295  auto make_device_ptr(std::size_t width, std::size_t height, std::size_t depth) -> pitched_device_ptr<T, CopyPolicy, std::true_type>
296  {
297  auto extent = make_cudaExtent(width * sizeof(T), height, depth);
298  auto pitchedPtr = cudaPitchedPtr{};
299  CHECK(cudaMalloc3D(&pitchedPtr, extent));
300  // omitting pitchedPtr.xsize and pitchedPtr.ysize as those are identical to width and height
302  pitchedPtr.pitch, width, height, depth);
303  }
304 
305  template <class T, class CopyPolicy = sync_copy_policy>
306  auto make_host_ptr(std::size_t width, std::size_t height, std::size_t depth) -> pitched_host_ptr<T, CopyPolicy, std::true_type>
307  {
308  auto p = static_cast<T*>(nullptr);
309  auto pitch = width * sizeof(T);
310  CHECK(cudaMallocHost(&p, pitch * height * depth));
311  return pitched_host_ptr<T, CopyPolicy, std::true_type>(detail::unique_host_ptr<T>(p), pitch, width, height, depth);
312  }
313 
314  namespace detail
315  {
316  /* Functor for explicit copies */
317  template <class CopyPolicy>
318  struct copy_ftor : public CopyPolicy
319  {
320  /* 1D */
321  template <class Dest, class Src>
322  inline auto operator()(Dest& dest, const Src& src)
323  -> typename std::enable_if<(!Dest::has_pitch && !Src::has_pitch), void>::type
324  {
325  CopyPolicy::copy(dest, src, src.size());
326  }
327 
328  /* 2D */
329  template <class Dest, class Src>
330  inline auto operator()(Dest& dest, const Src& src)
331  -> typename std::enable_if<(Dest::has_pitch && Src::has_pitch) && (!Dest::is3DPtr && !Src::is3DPtr), void>::type
332  {
333  CopyPolicy::copy(dest, src, src.width(), src.height());
334  }
335 
336  /* 3D */
337  template <class Dest, class Src>
338  inline auto operator()(Dest& dest, const Src& src)
339  -> typename std::enable_if<(Dest::has_pitch && Src::has_pitch) && (Dest::is3DPtr && Src::is3DPtr), void>::type
340  {
341  CopyPolicy::copy(dest, src, src.width(), src.height(), src.depth());
342  }
343  };
344  }
345 
346  /*
347  * Explicit synchronous copy
348  */
349  template <class Dest, class Src>
350  auto copy_sync(Dest& dest, const Src& src) -> void
351  {
353  ftor(dest, src);
354  }
355 
356  /*
357  * Explicit asynchronous copy
358  */
359  template <class Dest, class Src>
360  auto copy_async(Dest& dest, const Src& src) -> void
361  {
363  ftor(dest, src);
364  }
365  }
366 }
367 
368 #endif /* CUDA_MEMORY_H_ */
auto copy(Dest &dest, const Src &src, std::size_t width, std::size_t height) const -> void
Definition: Memory.h:169
auto operator()(void *p) -> void
Definition: Memory.h:44
auto get_deleter() const noexcept-> const Deleter &
Definition: Memory.h:103
auto release() noexcept-> pointer
Definition: Memory.h:93
auto set_stream(cudaStream_t s) noexcept-> void
Definition: Memory.h:196
auto copy(Dest &dest, const Src &src, std::size_t width, std::size_t height, std::size_t depth) const -> void
Definition: Memory.h:233
auto reset(pointer ptr=pointer()) noexcept-> void
Definition: Memory.h:94
auto operator()(Dest &dest, const Src &src) -> typename std::enable_if<(Dest::has_pitch &&Src::has_pitch)&&(Dest::is3DPtr &&Src::is3DPtr), void >::type
Definition: Memory.h:338
auto operator()(Dest &dest, const Src &src) -> typename std::enable_if<(!Dest::has_pitch &&!Src::has_pitch), void >::type
Definition: Memory.h:322
auto operator=(std::nullptr_t) noexcept-> unique_ptr &
Definition: Memory.h:87
auto copy(Dest &dest, const Src &src, std::size_t width, std::size_t height, std::size_t depth) const -> void
Definition: Memory.h:179
#define CHECK(x)
Definition: Check.h:35
unique_ptr(unique_ptr &&other) noexcept
Definition: Memory.h:77
unique_ptr(pointer p) noexcept
Definition: Memory.h:73
auto copy(Dest &dest, const Src &src, std::size_t width, std::size_t height) const -> void
Definition: Memory.h:223
auto make_device_ptr(std::size_t length) -> device_ptr< T, CopyPolicy >
Definition: Memory.h:259
auto operator[](std::size_t i) const -> T &
Definition: Memory.h:105
auto operator!=(const unique_ptr< T1, D1, t2 > &x, const unique_ptr< T2, D2, t2 > &y) noexcept-> bool
Definition: Memory.h:124
auto copy_sync(Dest &dest, const Src &src) -> void
Definition: Memory.h:350
constexpr unique_ptr() noexcept
Definition: Memory.h:65
auto operator=(unique_ptr &&r) noexcept-> unique_ptr &
Definition: Memory.h:81
auto make_host_ptr(std::size_t length) -> host_ptr< T, CopyPolicy >
Definition: Memory.h:268
auto copy(Dest &dest, const Src &src, std::size_t size) const -> void
Definition: Memory.h:215
auto operator()(void *p) -> void
Definition: Memory.h:45
auto copy_async(Dest &dest, const Src &src) -> void
Definition: Memory.h:360
auto operator()(Dest &dest, const Src &src) -> typename std::enable_if<(Dest::has_pitch &&Src::has_pitch)&&(!Dest::is3DPtr &&!Src::is3DPtr), void >::type
Definition: Memory.h:330
auto swap(unique_ptr &other) noexcept-> void
Definition: Memory.h:100
auto get_deleter() noexcept-> Deleter &
Definition: Memory.h:102
auto copy(Dest &dest, const Src &src, std::size_t size) const -> void
Definition: Memory.h:161
auto operator==(const unique_ptr< T1, D1, t2 > &x, const unique_ptr< T2, D2, t2 > &y) noexcept-> bool
Definition: Memory.h:118
auto stream() const noexcept-> cudaStream_t
Definition: Memory.h:201
constexpr unique_ptr(std::nullptr_t) noexcept
Definition: Memory.h:69
std::unique_ptr< T[], Deleter > ptr_
Definition: Memory.h:112
auto reset(std::nullptr_t) noexcept-> void
Definition: Memory.h:99