@@ -128,15 +128,15 @@ template <typename ArchPolicies,
128128 typename OffsetT,
129129 typename ReductionOpT,
130130 typename AccumT,
131- typename TransformOpT,
132- int BlockThreads = ArchPolicies{}(::cuda::arch_id{CUB_PTX_ARCH / 10 }).reduce_policy.block_threads>
133- CUB_DETAIL_KERNEL_ATTRIBUTES __launch_bounds__ (BlockThreads) void DeviceReduceKernel(
134- InputIteratorT d_in,
135- AccumT* d_out,
136- OffsetT num_items,
137- GridEvenShare<OffsetT> even_share,
138- ReductionOpT reduction_op,
139- TransformOpT transform_op)
131+ typename TransformOpT>
132+ CUB_DETAIL_KERNEL_ATTRIBUTES __launch_bounds__ ( int (
133+ ArchPolicies{}(::cuda::arch_id{CUB_PTX_ARCH / 10 })
134+ .reduce_policy.block_threads)) void DeviceReduceKernel( InputIteratorT d_in,
135+ AccumT* d_out,
136+ OffsetT num_items,
137+ GridEvenShare<OffsetT> even_share,
138+ ReductionOpT reduction_op,
139+ TransformOpT transform_op)
140140{
141141 static constexpr agent_reduce_policy policy = ArchPolicies{}(::cuda::arch_id{CUB_PTX_ARCH / 10 }).reduce_policy ;
142142 // TODO(bgruber): pass policy directly as template argument to AgentReduce in C++20
@@ -218,15 +218,15 @@ template <typename ArchPolicies,
218218 typename ReductionOpT,
219219 typename InitT,
220220 typename AccumT,
221- typename TransformOpT = ::cuda::std::identity,
222- int BlockThreads = ArchPolicies{}(::cuda::arch_id{CUB_PTX_ARCH / 10 }).single_tile_policy.block_threads>
223- CUB_DETAIL_KERNEL_ATTRIBUTES __launch_bounds__ (BlockThreads, 1 ) void DeviceReduceSingleTileKernel(
224- InputIteratorT d_in,
225- OutputIteratorT d_out,
226- OffsetT num_items,
227- ReductionOpT reduction_op,
228- InitT init,
229- TransformOpT transform_op)
221+ typename TransformOpT = ::cuda::std::identity>
222+ CUB_DETAIL_KERNEL_ATTRIBUTES __launch_bounds__ (
223+ int (ArchPolicies{}(::cuda::arch_id{CUB_PTX_ARCH / 10 }).single_tile_policy.block_threads),
224+ 1) void DeviceReduceSingleTileKernel( InputIteratorT d_in,
225+ OutputIteratorT d_out,
226+ OffsetT num_items,
227+ ReductionOpT reduction_op,
228+ InitT init,
229+ TransformOpT transform_op)
230230{
231231 static constexpr agent_reduce_policy policy = ArchPolicies{}(::cuda::arch_id{CUB_PTX_ARCH / 10 }).single_tile_policy ;
232232 // TODO(bgruber): pass policy directly as template argument to AgentReduce in C++20
0 commit comments