Skip to content

Commit 8bd8708

Browse files
Not ugly enough
1 parent c05fa96 commit 8bd8708

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

libcudacxx/include/cuda/__memcpy_async/cp_async_bulk_shared_global.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ using namespace __v1;
5151

5252
_CCCL_BEGIN_NAMESPACE_CUDA
5353

54-
template <typename Group>
55-
[[nodiscard]] _CCCL_DEVICE _CCCL_FORCEINLINE unsigned int __thread_rank(const Group& __g)
54+
template <typename _Group>
55+
[[nodiscard]] _CCCL_DEVICE _CCCL_FORCEINLINE unsigned int __thread_rank(const _Group& __g)
5656
{
5757
return __g.thread_rank();
5858
}
@@ -63,10 +63,10 @@ __elect_from_group(const cooperative_groups::thread_block& __g) noexcept
6363
{
6464
// Cannot call __g.thread_rank(), because we only forward declared the thread_block type
6565
// cooperative groups (and we here) maps a multidimensional thread id into the thread rank the same way as warps do
66-
const unsigned int tid = threadIdx.z * blockDim.y * blockDim.x + threadIdx.y * blockDim.x + threadIdx.x;
67-
const unsigned int warp_id = tid / 32;
68-
const unsigned int uniform_warp_id = __shfl_sync(0xFFFFFFFF, warp_id, 0); // broadcast from lane 0
69-
return uniform_warp_id == 0 && ::cuda::ptx::elect_sync(0xFFFFFFFF); // elect a leader thread among warp 0
66+
const unsigned int __tid = threadIdx.z * blockDim.y * blockDim.x + threadIdx.y * blockDim.x + threadIdx.x;
67+
const unsigned int __warp_id = __tid / 32;
68+
const unsigned int __uniform_warp_id = __shfl_sync(0xFFFFFFFF, __warp_id, 0); // broadcast from lane 0
69+
return __uniform_warp_id == 0 && ::cuda::ptx::elect_sync(0xFFFFFFFF); // elect a leader thread among warp 0
7070
}
7171

7272
// elect from a single warp

0 commit comments

Comments
 (0)