File tree Expand file tree Collapse file tree 1 file changed +6
-6
lines changed
libcudacxx/include/cuda/__memcpy_async Expand file tree Collapse file tree 1 file changed +6
-6
lines changed Original file line number Diff line number Diff line change @@ -51,8 +51,8 @@ using namespace __v1;
5151
5252_CCCL_BEGIN_NAMESPACE_CUDA
5353
54- template <typename Group >
55- [[nodiscard]] _CCCL_DEVICE _CCCL_FORCEINLINE unsigned int __thread_rank (const Group & __g)
54+ template <typename _Group >
55+ [[nodiscard]] _CCCL_DEVICE _CCCL_FORCEINLINE unsigned int __thread_rank (const _Group & __g)
5656{
5757 return __g.thread_rank ();
5858}
@@ -63,10 +63,10 @@ __elect_from_group(const cooperative_groups::thread_block& __g) noexcept
6363{
6464 // Cannot call __g.thread_rank(), because we only forward declared the thread_block type
6565 // cooperative groups (and we here) maps a multidimensional thread id into the thread rank the same way as warps do
66- const unsigned int tid = threadIdx.z * blockDim.y * blockDim.x + threadIdx.y * blockDim.x + threadIdx.x ;
67- const unsigned int warp_id = tid / 32 ;
68- const unsigned int uniform_warp_id = __shfl_sync (0xFFFFFFFF , warp_id , 0 ); // broadcast from lane 0
69- return uniform_warp_id == 0 && ::cuda::ptx::elect_sync (0xFFFFFFFF ); // elect a leader thread among warp 0
66+ const unsigned int __tid = threadIdx.z * blockDim.y * blockDim.x + threadIdx.y * blockDim.x + threadIdx.x ;
67+ const unsigned int __warp_id = __tid / 32 ;
68+ const unsigned int __uniform_warp_id = __shfl_sync (0xFFFFFFFF , __warp_id , 0 ); // broadcast from lane 0
69+ return __uniform_warp_id == 0 && ::cuda::ptx::elect_sync (0xFFFFFFFF ); // elect a leader thread among warp 0
7070}
7171
7272// elect from a single warp
You can’t perform that action at this time.
0 commit comments