1919
2020#include < cuda/std/optional>
2121
22+ #if !_CCCL_COMPILER(NVRTC)
23+ # include < ostream>
24+ #endif
25+
2226CUB_NAMESPACE_BEGIN
2327
2428struct agent_reduce_policy // equivalent of AgentReducePolicy
@@ -226,17 +230,17 @@ struct sm100_tuning<double, OffsetT, op_type::plus, offset_size::_4, accum_size:
226230
227231enum class accum_type
228232{
229- _float ,
230- _double ,
233+ float32 ,
234+ double32 ,
231235 other,
232236};
233237
234238template <typename AccumT>
235239_CCCL_HOST_DEVICE constexpr accum_type classify_accum_type ()
236240{
237- return ::cuda::std::is_same_v<AccumT, float > ? accum_type::_float
241+ return ::cuda::std::is_same_v<AccumT, float > ? accum_type::float32
238242 : ::cuda::std::is_same_v<AccumT, double >
239- ? accum_type::_double
243+ ? accum_type::double32
240244 : accum_type::other;
241245}
242246
@@ -252,11 +256,11 @@ _CCCL_API constexpr auto get_sm100_tuning(accum_type at, op_type ot, offset_size
252256{
253257 if (ot == op_type::plus)
254258 {
255- if (at == accum_type::_float && os == offset_size::_4 && as == accum_size::_4)
259+ if (at == accum_type::float32 && os == offset_size::_4 && as == accum_size::_4)
256260 {
257261 return sm100_tuning_values{16 , 512 , 2 };
258262 }
259- if (at == accum_type::_double && os == offset_size::_4 && as == accum_size::_8)
263+ if (at == accum_type::double32 && os == offset_size::_4 && as == accum_size::_8)
260264 {
261265 return sm100_tuning_values{16 , 640 , 1 };
262266 }
@@ -379,7 +383,7 @@ struct arch_policies // equivalent to the policy_hub, holds policies for a bunch
379383 // IDEA(bgruber): instead of the constexpr function, we could also provide a map<int, reduce_arch_policy> and move the
380384 // selection mechanism elsewhere
381385
382- _CCCL_API constexpr auto operator ()(int arch) const -> reduce_arch_policy
386+ [[nodiscard]] _CCCL_API constexpr auto operator ()(int arch) const -> reduce_arch_policy
383387 {
384388 if (arch >= 1000 )
385389 {
@@ -431,8 +435,7 @@ struct arch_policies // equivalent to the policy_hub, holds policies for a bunch
431435 constexpr int items_per_thread = 20 ;
432436 constexpr int items_per_vec_load = 4 ;
433437
434- auto [scaled_items,
435- scaled_threads] = scale_mem_bound (threads_per_block, items_per_thread, accum_size);
438+ auto [scaled_items, scaled_threads] = scale_mem_bound (threads_per_block, items_per_thread, accum_size);
436439 const auto rp =
437440 agent_reduce_policy{scaled_threads, scaled_items, items_per_vec_load, BLOCK_REDUCE_WARP_REDUCTIONS, LOAD_LDG};
438441
@@ -450,7 +453,7 @@ struct arch_policies // equivalent to the policy_hub, holds policies for a bunch
450453template <typename AccumT, typename OffsetT, typename ReductionOpT>
451454struct arch_policies_from_types
452455{
453- _CCCL_API constexpr auto operator ()(int arch) const -> reduce_arch_policy
456+ [[nodiscard]] _CCCL_API constexpr auto operator ()(int arch) const -> reduce_arch_policy
454457 {
455458 constexpr auto policies = arch_policies{
456459 classify_accum_type<AccumT>(), classify_op<ReductionOpT>(), classify_offset_size<OffsetT>(), int {sizeof (AccumT)}};
0 commit comments