24 #ifndef CUDA_MEMORY_H_ 25 #define CUDA_MEMORY_H_ 30 #include <type_traits> 49 template <Target Src, Target Dest>
struct Direction {};
55 template <
class T,
class Deleter, Target t>
62 static constexpr
auto target =
Target{t};
78 : ptr_{std::move(other.ptr_)}
83 ptr_ = std::move(r.ptr_);
97 auto reset(U) noexcept ->
void =
delete;
99 inline auto reset(std::nullptr_t) noexcept ->
void { ptr_.reset(); }
101 inline auto get()
const noexcept ->
pointer {
return ptr_.get(); }
102 inline auto get_deleter() noexcept -> Deleter& {
return ptr_.get_deleter(); }
103 inline auto get_deleter() const noexcept -> const Deleter& {
return ptr_.get_deleter(); }
104 explicit inline operator bool() const noexcept {
return bool(ptr_); }
105 inline auto operator[](std::size_t i)
const -> T& {
return ptr_[i]; }
112 std::unique_ptr<T[], Deleter>
ptr_;
117 template <
class T1,
class D1, Target t1,
class T2,
class D2, Target t2>
120 return x.get() == y.get();
123 template <
class T1,
class D1, Target t1,
class T2,
class D2, Target t2>
126 return x.get() != y.get();
129 template <
class T,
class D, Target t>
135 template <
class T,
class D, Target t>
141 template <
class T,
class D, Target t>
144 return static_cast<bool>(x);
147 template <
class T,
class D, Target t>
150 return static_cast<bool>(x);
160 template <
class Dest,
class Src>
161 inline auto copy(Dest& dest,
const Src& src, std::size_t size)
const ->
void 163 CHECK(cudaMemcpy(dest.get(), src.get(), size,
168 template <
class Dest,
class Src>
169 inline auto copy(Dest& dest,
const Src& src, std::size_t width, std::size_t height)
const ->
void 171 CHECK(cudaMemcpy2D(dest.get(), dest.pitch(),
172 src.get(), src.pitch(),
173 width *
sizeof(
typename Src::element_type), height,
178 template <
class Dest,
class Src>
179 inline auto copy(Dest& dest,
const Src& src, std::size_t width, std::size_t height, std::size_t depth)
const ->
void 181 auto parms = cudaMemcpy3DParms{0};
182 auto uchar_width = width *
sizeof(
typename Src::element_type)/
sizeof(
unsigned char);
184 parms.srcPtr = make_cudaPitchedPtr(reinterpret_cast<unsigned char*>(src.get()), src.pitch(), uchar_width, height);
185 parms.dstPtr = make_cudaPitchedPtr(reinterpret_cast<unsigned char*>(dest.get()), dest.pitch(), uchar_width, height);
186 parms.extent = make_cudaExtent(uchar_width, height, depth);
189 CHECK(cudaMemcpy3D(&parms));
201 inline auto stream() const noexcept -> cudaStream_t
214 template <
class Dest,
class Src>
215 inline auto copy(Dest& dest,
const Src& src, std::size_t size)
const ->
void 217 CHECK(cudaMemcpyAsync(dest.get(), src.get(), size,
222 template <
class Dest,
class Src>
223 inline auto copy(Dest& dest,
const Src& src, std::size_t width, std::size_t height)
const ->
void 225 CHECK(cudaMemcpy2DAsync(dest.get(), dest.pitch(),
226 src.get(), src.pitch(),
227 width *
sizeof(
typename Src::element_type), height,
232 template <
class Dest,
class Src>
233 inline auto copy(Dest& dest,
const Src& src, std::size_t width, std::size_t height, std::size_t depth)
const ->
void 235 auto parms = cudaMemcpy3DParms{0};
236 auto uchar_width = width *
sizeof(
typename Src::element_type)/
sizeof(
unsigned char);
237 parms.srcPtr = make_cudaPitchedPtr(reinterpret_cast<unsigned char*>(src.get()), src.pitch(), uchar_width, height);
238 parms.dstPtr = make_cudaPitchedPtr(reinterpret_cast<unsigned char*>(dest.get()), dest.pitch(), uchar_width, height);
239 parms.extent = make_cudaExtent(uchar_width, height, depth);
242 CHECK(cudaMemcpy3DAsync(&parms, stream_));
258 template <
class T,
class CopyPolicy = sync_copy_policy>
261 auto size = length *
sizeof(T);
262 auto p =
static_cast<T*
>(
nullptr);
263 CHECK(cudaMalloc(&p, size));
267 template <
class T,
class CopyPolicy = sync_copy_policy>
270 auto p =
static_cast<T*
>(
nullptr);
271 auto size = length *
sizeof(T);
272 CHECK(cudaMallocHost(&p, size));
276 template <
class T,
class CopyPolicy = sync_copy_policy>
279 auto p =
static_cast<T*
>(
nullptr);
280 auto pitch = std::size_t{};
281 CHECK(cudaMallocPitch(&p, &pitch, width *
sizeof(T), height));
285 template <
class T,
class CopyPolicy = sync_copy_policy>
288 auto p =
static_cast<T*
>(
nullptr);
289 auto pitch = width *
sizeof(T);
290 CHECK(cudaMallocHost(&p, pitch * height));
294 template <
class T,
class CopyPolicy = sync_copy_policy>
297 auto extent = make_cudaExtent(width *
sizeof(T), height, depth);
298 auto pitchedPtr = cudaPitchedPtr{};
299 CHECK(cudaMalloc3D(&pitchedPtr, extent));
302 pitchedPtr.pitch, width, height, depth);
305 template <
class T,
class CopyPolicy = sync_copy_policy>
308 auto p =
static_cast<T*
>(
nullptr);
309 auto pitch = width *
sizeof(T);
310 CHECK(cudaMallocHost(&p, pitch * height * depth));
317 template <
class CopyPolicy>
321 template <
class Dest,
class Src>
323 ->
typename std::enable_if<(!Dest::has_pitch && !Src::has_pitch), void>::type
325 CopyPolicy::copy(dest, src, src.size());
329 template <
class Dest,
class Src>
331 ->
typename std::enable_if<(Dest::has_pitch && Src::has_pitch) && (!Dest::is3DPtr && !Src::is3DPtr), void>::type
333 CopyPolicy::copy(dest, src, src.width(), src.height());
337 template <
class Dest,
class Src>
339 ->
typename std::enable_if<(Dest::has_pitch && Src::has_pitch) && (Dest::is3DPtr && Src::is3DPtr), void>::type
341 CopyPolicy::copy(dest, src, src.width(), src.height(), src.depth());
349 template <
class Dest,
class Src>
359 template <
class Dest,
class Src>
auto copy(Dest &dest, const Src &src, std::size_t width, std::size_t height) const -> void
auto operator()(void *p) -> void
auto get_deleter() const noexcept-> const Deleter &
auto release() noexcept-> pointer
auto set_stream(cudaStream_t s) noexcept-> void
auto copy(Dest &dest, const Src &src, std::size_t width, std::size_t height, std::size_t depth) const -> void
auto reset(pointer ptr=pointer()) noexcept-> void
auto operator()(Dest &dest, const Src &src) -> typename std::enable_if<(Dest::has_pitch &&Src::has_pitch)&&(Dest::is3DPtr &&Src::is3DPtr), void >::type
auto operator()(Dest &dest, const Src &src) -> typename std::enable_if<(!Dest::has_pitch &&!Src::has_pitch), void >::type
auto operator=(std::nullptr_t) noexcept-> unique_ptr &
auto copy(Dest &dest, const Src &src, std::size_t width, std::size_t height, std::size_t depth) const -> void
unique_ptr(unique_ptr &&other) noexcept
unique_ptr(pointer p) noexcept
auto copy(Dest &dest, const Src &src, std::size_t width, std::size_t height) const -> void
auto make_device_ptr(std::size_t length) -> device_ptr< T, CopyPolicy >
auto operator[](std::size_t i) const -> T &
auto operator!=(const unique_ptr< T1, D1, t2 > &x, const unique_ptr< T2, D2, t2 > &y) noexcept-> bool
auto copy_sync(Dest &dest, const Src &src) -> void
constexpr unique_ptr() noexcept
auto operator=(unique_ptr &&r) noexcept-> unique_ptr &
auto make_host_ptr(std::size_t length) -> host_ptr< T, CopyPolicy >
auto copy(Dest &dest, const Src &src, std::size_t size) const -> void
auto operator()(void *p) -> void
auto copy_async(Dest &dest, const Src &src) -> void
auto operator()(Dest &dest, const Src &src) -> typename std::enable_if<(Dest::has_pitch &&Src::has_pitch)&&(!Dest::is3DPtr &&!Src::is3DPtr), void >::type
auto swap(unique_ptr &other) noexcept-> void
auto get_deleter() noexcept-> Deleter &
auto copy(Dest &dest, const Src &src, std::size_t size) const -> void
auto operator==(const unique_ptr< T1, D1, t2 > &x, const unique_ptr< T2, D2, t2 > &y) noexcept-> bool
auto stream() const noexcept-> cudaStream_t
constexpr unique_ptr(std::nullptr_t) noexcept
std::unique_ptr< T[], Deleter > ptr_
auto reset(std::nullptr_t) noexcept-> void