Skip to content

Commit 68b855e

Browse files
committed
Refactor driver APIs
1 parent f9835a5 commit 68b855e

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+654
-668
lines changed

cudax/include/cuda/experimental/__container/async_buffer.cuh

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -177,8 +177,8 @@ private:
177177
static_assert(::cuda::std::contiguous_iterator<_Iter>, "Non contiguous iterators are not supported");
178178
// TODO use batched memcpy for non-contiguous iterators, it allows to
179179
// specify stream ordered access
180-
::cuda::__driver::__memcpyAsync(
181-
__dest, ::cuda::std::to_address(__first), sizeof(_Tp) * __count, __buf_.stream().get());
180+
_CCCL_TRY_DRIVER_API(
181+
__memcpyAsync(__dest, ::cuda::std::to_address(__first), sizeof(_Tp) * __count, __buf_.stream().get()));
182182
}
183183

184184
public:
@@ -635,11 +635,11 @@ template <typename _BufferTo, typename _BufferFrom>
635635
void __copy_cross_buffers(stream_ref __stream, _BufferTo& __to, const _BufferFrom& __from)
636636
{
637637
__stream.wait(__from.stream());
638-
::cuda::__driver::__memcpyAsync(
638+
_CCCL_TRY_DRIVER_API(__memcpyAsync(
639639
__to.__unwrapped_begin(),
640640
__from.__unwrapped_begin(),
641641
sizeof(typename _BufferTo::value_type) * __from.size(),
642-
__stream.get());
642+
__stream.get()));
643643
}
644644

645645
_CCCL_BEGIN_NAMESPACE_ARCH_DEPENDENT
@@ -660,16 +660,9 @@ __fill_n(cuda::stream_ref __stream, _Tp* __first, ::cuda::std::size_t __count, c
660660
// we need to check the attributes
661661
if constexpr (_Accessability == __memory_accessability::__device_and_host)
662662
{
663-
__driver::__pointer_attribute_value_type_t<CU_POINTER_ATTRIBUTE_MEMORY_TYPE> __type;
664-
bool __is_managed{};
665-
auto __status1 = ::cuda::__driver::__pointerGetAttributeNoThrow<CU_POINTER_ATTRIBUTE_MEMORY_TYPE>(__type, __first);
666-
auto __status2 =
667-
::cuda::__driver::__pointerGetAttributeNoThrow<CU_POINTER_ATTRIBUTE_IS_MANAGED>(__is_managed, __first);
668-
if (__status1 != ::cudaSuccess || __status2 != ::cudaSuccess)
669-
{
670-
__throw_cuda_error(__status1, "Failed to get buffer memory attributes");
671-
}
672-
if (__type == ::CU_MEMORYTYPE_HOST && !__is_managed)
663+
const auto __mem_type = _CCCL_TRY_DRIVER_API(__pointerGetAttribute<::CU_POINTER_ATTRIBUTE_MEMORY_TYPE>(__first));
664+
const auto __is_managed = _CCCL_TRY_DRIVER_API(__pointerGetAttribute<::CU_POINTER_ATTRIBUTE_IS_MANAGED>(__first));
665+
if (__mem_type == ::CU_MEMORYTYPE_HOST && !__is_managed)
673666
{
674667
__fill_n<_Tp, __memory_accessability::__host>(__stream, __first, __count, __value);
675668
}
@@ -687,7 +680,7 @@ __fill_n(cuda::stream_ref __stream, _Tp* __first, ::cuda::std::size_t __count, c
687680
{
688681
if constexpr (sizeof(_Tp) <= 4)
689682
{
690-
::cuda::__driver::__memsetAsync(__first, __value, __count, __stream.get());
683+
_CCCL_TRY_DRIVER_API(__memsetAsync(__first, __value, __count, __stream.get()));
691684
}
692685
else
693686
{

cudax/include/cuda/experimental/__cufile/exception.cuh

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -102,18 +102,18 @@ public:
102102
}
103103

104104
//! @brief Macro to call a cuFile API and throw a cufile_error or cuda_error if it fails.
105-
#define _CCCL_TRY_CUFILE_API(_NAME, _MSG, ...) \
106-
do \
107-
{ \
108-
const ::CUfileError_t __cufile_error_status = _NAME(__VA_ARGS__); \
109-
switch (__cufile_error_status.err) \
110-
{ \
111-
case ::CU_FILE_SUCCESS: \
112-
break; \
113-
case ::CU_FILE_CUDA_DRIVER_ERROR: \
114-
::cuda::__throw_cuda_error(static_cast<::cudaError_t>(__cufile_error_status.cu_err), _MSG, #_NAME); \
115-
default: \
116-
__throw_cufile_error(__cufile_error_status.err, _MSG, #_NAME); \
117-
} \
105+
#define _CCCL_TRY_CUFILE_API(_NAME, _MSG, ...) \
106+
do \
107+
{ \
108+
const ::CUfileError_t __cufile_error_status = _NAME(__VA_ARGS__); \
109+
switch (__cufile_error_status.err) \
110+
{ \
111+
case ::CU_FILE_SUCCESS: \
112+
break; \
113+
case ::CU_FILE_CUDA_DRIVER_ERROR: \
114+
::cuda::__throw_cuda_error(__cufile_error_status.cu_err, _MSG, #_NAME); \
115+
default: \
116+
__throw_cufile_error(__cufile_error_status.err, _MSG, #_NAME); \
117+
} \
118118
} while (0)
119119
} // namespace cuda::experimental

cudax/include/cuda/experimental/__green_context/green_ctx.cuh

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,9 @@ struct green_context
4747
: __dev_id(__device.get())
4848
{
4949
// TODO get CUdevice from device
50-
auto __dev_handle = ::cuda::__driver::__deviceGet(__dev_id);
51-
__green_ctx = ::cuda::__driver::__greenCtxCreate(__dev_handle);
52-
__transformed = ::cuda::__driver::__ctxFromGreenCtx(__green_ctx);
50+
auto __dev_handle = _CCCL_TRY_DRIVER_API(__deviceGet(__dev_id));
51+
__green_ctx = _CCCL_TRY_DRIVER_API(__greenCtxCreate(__dev_handle));
52+
__transformed = _CCCL_TRY_DRIVER_API(__ctxFromGreenCtx(__green_ctx));
5353
}
5454

5555
green_context(const green_context&) = delete;
@@ -58,17 +58,17 @@ struct green_context
5858
// TODO this probably should be the runtime equivalent once available
5959
[[nodiscard]] static green_context from_native_handle(CUgreenCtx __gctx)
6060
{
61-
CUcontext __transformed = ::cuda::__driver::__ctxFromGreenCtx(__gctx);
62-
::cuda::__driver::__ctxPush(__transformed);
63-
CUdevice __device = ::cuda::__driver::__ctxGetDevice();
64-
::cuda::__driver::__ctxPop();
61+
CUcontext __transformed = _CCCL_TRY_DRIVER_API(__ctxFromGreenCtx(__gctx));
62+
(void) _CCCL_TRY_DRIVER_API(__ctxPush(__transformed));
63+
CUdevice __device = _CCCL_TRY_DRIVER_API(__ctxGetDevice());
64+
(void) _CCCL_TRY_DRIVER_API(__ctxPop());
6565
return green_context(::cuda::__driver::__cudevice_to_ordinal(__device), __gctx, __transformed);
6666
}
6767

6868
# if _CCCL_CTK_AT_LEAST(13, 0)
6969
[[nodiscard]] _CCCL_HOST_API green_context_id id() const
7070
{
71-
return green_context_id{::cuda::__driver::__greenCtxGetId(__green_ctx)};
71+
return green_context_id{_CCCL_TRY_DRIVER_API(__greenCtxGetId(__green_ctx))};
7272
}
7373
# endif // _CCCL_CTK_AT_LEAST(13, 0)
7474

@@ -83,7 +83,7 @@ struct green_context
8383
{
8484
if (__green_ctx)
8585
{
86-
[[maybe_unused]] cudaError_t __status = ::cuda::__driver::__greenCtxDestroyNoThrow(__green_ctx);
86+
_CCCL_ASSERT_DRIVER_API(__greenCtxDestroy(__green_ctx));
8787
}
8888
}
8989

cudax/include/cuda/experimental/__kernel/attributes.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ struct __kernel_attr_impl
4747
template <class _Signature>
4848
[[nodiscard]] type operator()(kernel_ref<_Signature> __kernel, device_ref __dev) const
4949
{
50-
return static_cast<type>(
51-
::cuda::__driver::__kernelGetAttribute(_Attr, __kernel.get(), ::cuda::__driver::__deviceGet(__dev.get())));
50+
return static_cast<type>(_CCCL_TRY_DRIVER_API(
51+
__kernelGetAttribute(_Attr, __kernel.get(), _CCCL_TRY_DRIVER_API(__deviceGet(__dev.get())))));
5252
}
5353
};
5454

cudax/include/cuda/experimental/__kernel/kernel_ref.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ public:
9595
//! @throws cuda_error if the kernel name cannot be obtained
9696
[[nodiscard]] ::cuda::std::string_view name() const
9797
{
98-
return ::cuda::__driver::__kernelGetName(__kernel_);
98+
return _CCCL_TRY_DRIVER_API(__kernelGetName(__kernel_));
9999
}
100100
#endif // _CCCL_CTK_AT_LEAST(12, 3)
101101

cudax/include/cuda/experimental/__launch/configuration.cuh

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -325,33 +325,34 @@ template <class _Tp>
325325
[[nodiscard]] ::cudaError_t __apply_launch_option(
326326
const dynamic_shared_memory<_Tp>& __opt, ::CUlaunchConfig& __config, ::CUfunction __kernel) noexcept
327327
{
328-
::cudaError_t __status = ::cudaSuccess;
328+
// Since CUDA 12.4, querying CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES requires the function to be loaded.
329+
const auto __needs_load = ::cuda::__driver::__version_at_least(12, 4);
330+
if (__needs_load.__error_ != ::CUDA_SUCCESS)
331+
{
332+
return static_cast<::cudaError_t>(__needs_load.__error_);
333+
}
329334

330-
// Since CUDA 12.4, querying CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES requires the
331-
// function to be loaded.
332-
if (::cuda::__driver::__version_at_least(12, 4))
335+
if (__needs_load.__value_)
333336
{
334-
__status = ::cuda::__driver::__functionLoadNoThrow(__kernel);
335-
if (__status != ::cudaSuccess)
337+
const auto __status = ::cuda::__driver::__functionLoad(__kernel).__error_;
338+
if (__status != ::CUDA_SUCCESS)
336339
{
337-
return __status;
340+
return static_cast<::cudaError_t>(__status);
338341
}
339342
}
340343

341-
int __static_smem_size{};
342-
__status = ::cuda::__driver::__functionGetAttributeNoThrow(
343-
__static_smem_size, ::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, __kernel);
344-
if (__status != ::cudaSuccess)
344+
const auto __static_smem_size =
345+
::cuda::__driver::__functionGetAttribute(::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, __kernel);
346+
if (__static_smem_size.__error_ != ::CUDA_SUCCESS)
345347
{
346-
return __status;
348+
return static_cast<::cudaError_t>(__static_smem_size.__error_);
347349
}
348350

349-
int __max_dyn_smem_size{};
350-
__status = ::cuda::__driver::__functionGetAttributeNoThrow(
351-
__max_dyn_smem_size, ::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, __kernel);
352-
if (__status != ::cudaSuccess)
351+
const auto __max_dyn_smem_size =
352+
::cuda::__driver::__functionGetAttribute(::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, __kernel);
353+
if (__max_dyn_smem_size.__error_ != ::CUDA_SUCCESS)
353354
{
354-
return __status;
355+
return static_cast<::cudaError_t>(__max_dyn_smem_size.__error_);
355356
}
356357

357358
const auto __dyn_smem_size = ::cuda::overflow_cast<int>(__opt.size_bytes());
@@ -360,19 +361,21 @@ template <class _Tp>
360361
return ::cudaErrorInvalidValue;
361362
}
362363

363-
const int __smem_size = __static_smem_size + __dyn_smem_size.value;
364+
const int __smem_size = __static_smem_size.__value_ + __dyn_smem_size.value;
364365
if (static_cast<::cuda::std::size_t>(__smem_size) > __max_portable_dyn_smem_size && !__opt.__non_portable_)
365366
{
366367
return ::cudaErrorInvalidValue;
367368
}
368369

369-
if (__max_dyn_smem_size < __dyn_smem_size.value)
370+
if (__max_dyn_smem_size.__value_ < __dyn_smem_size.value)
370371
{
371-
__status = ::cuda::__driver::__functionSetAttributeNoThrow(
372-
__kernel, ::CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, __dyn_smem_size.value);
373-
if (__status != ::cudaSuccess)
372+
const auto __status =
373+
::cuda::__driver::__functionSetAttribute(
374+
__kernel, ::CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, __dyn_smem_size.value)
375+
.__error_;
376+
if (__status != ::CUDA_SUCCESS)
374377
{
375-
return __status;
378+
return static_cast<::cudaError_t>(__status);
376379
}
377380
}
378381

cudax/include/cuda/experimental/__launch/launch.cuh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ __global__ static void __kernel_launcher_no_config(_Kernel __kernel_fn, _Args...
6060
template <class... _Args>
6161
[[nodiscard]] _CCCL_HOST_API CUfunction __get_cufunction_of(kernel_ref<void(_Args...)> __kernel)
6262
{
63-
return ::cuda::__driver::__kernelGetFunction(__kernel.get());
63+
return _CCCL_TRY_DRIVER_API(__kernelGetFunction(__kernel.get()));
6464
}
6565

6666
template <class... _Args>
@@ -90,12 +90,12 @@ __do_launch(_GraphInserter&& __inserter, ::CUlaunchConfig& __config, ::CUfunctio
9090

9191
auto __dependencies = __inserter.get_dependencies();
9292

93-
const auto __node = ::cuda::__driver::__graphAddKernelNode(
94-
__inserter.get_graph().get(), __dependencies.data(), __dependencies.size(), __node_params);
93+
const auto __node = _CCCL_TRY_DRIVER_API(
94+
__graphAddKernelNode(__inserter.get_graph().get(), __dependencies.data(), __dependencies.size(), __node_params));
9595

9696
for (unsigned int __i = 0; __i < __config.numAttrs; ++__i)
9797
{
98-
::cuda::__driver::__graphKernelNodeSetAttribute(__node, __config.attrs[__i].id, __config.attrs[__i].value);
98+
_CCCL_TRY_DRIVER_API(__graphKernelNodeSetAttribute(__node, __config.attrs[__i].id, __config.attrs[__i].value));
9999
}
100100

101101
// TODO skip the update if called on rvalue?
@@ -111,7 +111,7 @@ _CCCL_HOST_API void inline __do_launch(
111111
#if defined(_CUDAX_LAUNCH_CONFIG_TEST)
112112
test_launch_kernel_replacement(__config, __kernel, __args_ptrs);
113113
#else // ^^^ _CUDAX_LAUNCH_CONFIG_TEST ^^^ / vvv !_CUDAX_LAUNCH_CONFIG_TEST vvv
114-
::cuda::__driver::__launchKernel(__config, __kernel, __args_ptrs);
114+
_CCCL_TRY_DRIVER_API(__launchKernel(__config, __kernel, __args_ptrs));
115115
#endif // ^^^ !_CUDAX_LAUNCH_CONFIG_TEST ^^^
116116
}
117117

cudax/include/cuda/experimental/__library/library.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ struct library : public library_ref
7979
{
8080
if (__library_ != value_type{})
8181
{
82-
[[maybe_unused]] const auto __status = ::cuda::__driver::__libraryUnloadNoThrow(__library_);
82+
_CCCL_ASSERT_DRIVER_API(__libraryUnload(__library_));
8383
}
8484
}
8585

cudax/include/cuda/experimental/__library/library_ref.cuh

Lines changed: 20 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -72,15 +72,15 @@ public:
7272
//! @throws cuda_error if the library could not be queried for the kernel
7373
[[nodiscard]] bool has_kernel(const char* __name) const
7474
{
75-
::CUkernel __kernel{};
76-
switch (const auto __res = ::cuda::__driver::__libraryGetKernelNoThrow(__kernel, __library_, __name))
75+
const auto __status = ::cuda::__driver::__libraryGetKernel(__library_, __name).__error_;
76+
switch (__status)
7777
{
78-
case ::cudaSuccess:
78+
case ::CUDA_SUCCESS:
7979
return true;
80-
case ::cudaErrorSymbolNotFound:
80+
case ::CUDA_ERROR_NOT_FOUND:
8181
return false;
8282
default:
83-
::cuda::__throw_cuda_error(__res, "Failed to get the kernel from library");
83+
::cuda::__throw_cuda_error(__status, "Failed to get the kernel from library");
8484
}
8585
}
8686

@@ -96,13 +96,7 @@ public:
9696
template <class _Signature>
9797
[[nodiscard]] kernel_ref<_Signature> kernel(const char* __name) const
9898
{
99-
::CUkernel __kernel{};
100-
if (const auto __res = ::cuda::__driver::__libraryGetKernelNoThrow(__kernel, __library_, __name);
101-
__res != ::cudaSuccess)
102-
{
103-
::cuda::__throw_cuda_error(__res, "Failed to get the kernel from the library");
104-
}
105-
return kernel_ref<_Signature>{__kernel};
99+
return kernel_ref<_Signature>{_CCCL_TRY_DRIVER_API(__libraryGetKernel(__library_, __name))};
106100
}
107101

108102
//! @brief Checks if the library contains a global symbol with the given name on a device
@@ -117,16 +111,15 @@ public:
117111
{
118112
::cuda::__ensure_current_context __ctx_guard(__device);
119113

120-
::CUdeviceptr __dptr{};
121-
::cuda::std::size_t __size{};
122-
switch (const auto __res = ::cuda::__driver::__libraryGetGlobalNoThrow(__dptr, __size, __library_, __name))
114+
const auto __status = ::cuda::__driver::__libraryGetGlobal(__library_, __name).__error_;
115+
switch (__status)
123116
{
124-
case ::cudaSuccess:
117+
case ::CUDA_SUCCESS:
125118
return true;
126-
case ::cudaErrorSymbolNotFound:
119+
case ::CUDA_ERROR_NOT_FOUND:
127120
return false;
128121
default:
129-
::cuda::__throw_cuda_error(__res, "Failed to get the global symbol from library");
122+
::cuda::__throw_cuda_error(__status, "Failed to get the global symbol from library");
130123
}
131124
}
132125

@@ -141,15 +134,8 @@ public:
141134
[[nodiscard]] library_symbol_info global(const char* __name, ::cuda::device_ref __device) const
142135
{
143136
::cuda::__ensure_current_context __ctx_guard(__device);
144-
145-
::CUdeviceptr __dptr{};
146-
::cuda::std::size_t __size{};
147-
if (const auto __res = ::cuda::__driver::__libraryGetGlobalNoThrow(__dptr, __size, __library_, __name);
148-
__res != ::cudaSuccess)
149-
{
150-
::cuda::__throw_cuda_error(__res, "Failed to get the global symbol from the library");
151-
}
152-
return library_symbol_info{reinterpret_cast<void*>(__dptr), __size};
137+
auto [__ptr, __size] = _CCCL_TRY_DRIVER_API(__libraryGetGlobal(__library_, __name));
138+
return library_symbol_info{__ptr, __size};
153139
}
154140

155141
//! @brief Checks if the library contains a managed symbol with the given name
@@ -163,16 +149,15 @@ public:
163149
//! @note Managed memory is shared across devices
164150
[[nodiscard]] bool has_managed(const char* __name) const
165151
{
166-
::CUdeviceptr __dptr{};
167-
::cuda::std::size_t __size{};
168-
switch (const auto __res = ::cuda::__driver::__libraryGetManagedNoThrow(__dptr, __size, __library_, __name))
152+
const auto __status = ::cuda::__driver::__libraryGetManaged(__library_, __name).__error_;
153+
switch (__status)
169154
{
170-
case ::cudaSuccess:
155+
case ::CUDA_SUCCESS:
171156
return true;
172-
case ::cudaErrorSymbolNotFound:
157+
case ::CUDA_ERROR_NOT_FOUND:
173158
return false;
174159
default:
175-
::cuda::__throw_cuda_error(__res, "Failed to get the managed symbol from library");
160+
::cuda::__throw_cuda_error(__status, "Failed to get the managed symbol from library");
176161
}
177162
}
178163

@@ -187,14 +172,8 @@ public:
187172
//! @note Managed memory is shared across devices
188173
[[nodiscard]] library_symbol_info managed(const char* __name) const
189174
{
190-
::CUdeviceptr __dptr{};
191-
::cuda::std::size_t __size{};
192-
if (const auto __res = ::cuda::__driver::__libraryGetManagedNoThrow(__dptr, __size, __library_, __name);
193-
__res != ::cudaSuccess)
194-
{
195-
::cuda::__throw_cuda_error(__res, "Failed to get the managed symbol from the library");
196-
}
197-
return library_symbol_info{reinterpret_cast<void*>(__dptr), __size};
175+
auto [__ptr, __size] = _CCCL_TRY_DRIVER_API(__libraryGetManaged(__library_, __name));
176+
return library_symbol_info{__ptr, __size};
198177
}
199178

200179
//! @brief Gets the CUlibrary handle

cudax/include/cuda/experimental/__memory_resource/synchronous_resource_adapter.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ struct synchronous_resource_adapter : ::cuda::mr::__copy_default_queries<_Resour
8585
}
8686
else
8787
{
88-
::cuda::__driver::__streamSynchronizeNoThrow(__stream.get());
88+
_CCCL_ASSERT_DRIVER_API(__streamSynchronize(__stream.get()));
8989
__resource.deallocate_sync(__ptr, __bytes, __alignment);
9090
}
9191
}

0 commit comments

Comments
 (0)