Skip to content

Commit 99976dc

Browse files
authored
MVP for disabling nvtx ranges for thrust::seq (#6415)
1 parent 60be6c0 commit 99976dc

File tree

7 files changed

+84
-38
lines changed

7 files changed

+84
-38
lines changed

thrust/thrust/detail/copy.inl

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
# pragma system_header
2727
#endif // no system header
2828
#include <thrust/detail/copy.h>
29+
#include <thrust/detail/nvtx_policy.h>
2930
#include <thrust/system/detail/generic/select_system.h>
3031

3132
// Include all active backend system implementations (generic, sequential, host and device)
@@ -52,7 +53,7 @@ copy(const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
5253
InputIterator last,
5354
OutputIterator result)
5455
{
55-
_CCCL_NVTX_RANGE_SCOPE("thrust::copy");
56+
_CCCL_NVTX_RANGE_SCOPE_IF(detail::should_enable_nvtx_for_policy<DerivedPolicy>(), "thrust::copy");
5657
using thrust::system::detail::generic::copy;
5758
return copy(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, result);
5859
} // end copy()
@@ -62,7 +63,7 @@ template <typename DerivedPolicy, typename InputIterator, typename Size, typenam
6263
_CCCL_HOST_DEVICE OutputIterator copy_n(
6364
const thrust::detail::execution_policy_base<DerivedPolicy>& exec, InputIterator first, Size n, OutputIterator result)
6465
{
65-
_CCCL_NVTX_RANGE_SCOPE("thrust::copy_n");
66+
_CCCL_NVTX_RANGE_SCOPE_IF(detail::should_enable_nvtx_for_policy<DerivedPolicy>(), "thrust::copy_n");
6667
using thrust::system::detail::generic::copy_n;
6768
return copy_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, result);
6869
} // end copy_n()
@@ -78,7 +79,8 @@ _CCCL_HOST_DEVICE OutputIterator two_system_copy(
7879
InputIterator last,
7980
OutputIterator result)
8081
{
81-
_CCCL_NVTX_RANGE_SCOPE("thrust::two_system_copy");
82+
_CCCL_NVTX_RANGE_SCOPE_IF(should_enable_nvtx_for_policy<System1>() || should_enable_nvtx_for_policy<System2>(),
83+
"thrust::two_system_copy");
8284
using thrust::system::detail::generic::select_system;
8385

8486
return thrust::copy(
@@ -98,7 +100,8 @@ _CCCL_HOST_DEVICE OutputIterator two_system_copy_n(
98100
Size n,
99101
OutputIterator result)
100102
{
101-
_CCCL_NVTX_RANGE_SCOPE("thrust::two_system_copy_n");
103+
_CCCL_NVTX_RANGE_SCOPE_IF(should_enable_nvtx_for_policy<System1>() || should_enable_nvtx_for_policy<System2>(),
104+
"thrust::two_system_copy_n");
102105
using thrust::system::detail::generic::select_system;
103106

104107
return thrust::copy_n(
@@ -113,9 +116,11 @@ _CCCL_HOST_DEVICE OutputIterator two_system_copy_n(
113116
template <typename InputIterator, typename OutputIterator>
114117
OutputIterator copy(InputIterator first, InputIterator last, OutputIterator result)
115118
{
116-
_CCCL_NVTX_RANGE_SCOPE("thrust::copy");
117119
using System1 = typename thrust::iterator_system<InputIterator>::type;
118120
using System2 = typename thrust::iterator_system<OutputIterator>::type;
121+
_CCCL_NVTX_RANGE_SCOPE_IF(
122+
detail::should_enable_nvtx_for_policy<System1>() || detail::should_enable_nvtx_for_policy<System2>(),
123+
"thrust::copy");
119124

120125
System1 system1;
121126
System2 system2;
@@ -126,9 +131,11 @@ OutputIterator copy(InputIterator first, InputIterator last, OutputIterator resu
126131
template <typename InputIterator, typename Size, typename OutputIterator>
127132
OutputIterator copy_n(InputIterator first, Size n, OutputIterator result)
128133
{
129-
_CCCL_NVTX_RANGE_SCOPE("thrust::copy_n");
130134
using System1 = typename thrust::iterator_system<InputIterator>::type;
131135
using System2 = typename thrust::iterator_system<OutputIterator>::type;
136+
_CCCL_NVTX_RANGE_SCOPE_IF(
137+
detail::should_enable_nvtx_for_policy<System1>() || detail::should_enable_nvtx_for_policy<System2>(),
138+
"thrust::copy_n");
132139

133140
System1 system1;
134141
System2 system2;

thrust/thrust/detail/fill.inl

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
# pragma system_header
2727
#endif // no system header
2828

29+
#include <thrust/detail/nvtx_policy.h>
2930
#include <thrust/fill.h>
3031
#include <thrust/iterator/iterator_traits.h>
3132
#include <thrust/system/detail/generic/select_system.h>
@@ -54,7 +55,7 @@ fill(const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
5455
ForwardIterator last,
5556
const T& value)
5657
{
57-
_CCCL_NVTX_RANGE_SCOPE("thrust::fill");
58+
_CCCL_NVTX_RANGE_SCOPE_IF(detail::should_enable_nvtx_for_policy<DerivedPolicy>(), "thrust::fill");
5859
using thrust::system::detail::generic::fill;
5960
return fill(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, value);
6061
} // end fill()
@@ -64,18 +65,17 @@ template <typename DerivedPolicy, typename OutputIterator, typename Size, typena
6465
_CCCL_HOST_DEVICE OutputIterator
6566
fill_n(const thrust::detail::execution_policy_base<DerivedPolicy>& exec, OutputIterator first, Size n, const T& value)
6667
{
67-
_CCCL_NVTX_RANGE_SCOPE("thrust::fill_n");
68+
_CCCL_NVTX_RANGE_SCOPE_IF(detail::should_enable_nvtx_for_policy<DerivedPolicy>(), "thrust::fill_n");
6869
using thrust::system::detail::generic::fill_n;
6970
return fill_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, value);
7071
} // end fill_n()
7172

7273
template <typename ForwardIterator, typename T>
7374
_CCCL_HOST_DEVICE void fill(ForwardIterator first, ForwardIterator last, const T& value)
7475
{
75-
_CCCL_NVTX_RANGE_SCOPE("thrust::fill");
76-
using thrust::system::detail::generic::select_system;
77-
7876
using System = typename thrust::iterator_system<ForwardIterator>::type;
77+
_CCCL_NVTX_RANGE_SCOPE_IF(detail::should_enable_nvtx_for_policy<System>(), "thrust::fill");
78+
using thrust::system::detail::generic::select_system;
7979

8080
System system;
8181

@@ -85,10 +85,9 @@ _CCCL_HOST_DEVICE void fill(ForwardIterator first, ForwardIterator last, const T
8585
template <typename OutputIterator, typename Size, typename T>
8686
_CCCL_HOST_DEVICE OutputIterator fill_n(OutputIterator first, Size n, const T& value)
8787
{
88-
_CCCL_NVTX_RANGE_SCOPE("thrust::fill_n");
89-
using thrust::system::detail::generic::select_system;
90-
9188
using System = typename thrust::iterator_system<OutputIterator>::type;
89+
_CCCL_NVTX_RANGE_SCOPE_IF(detail::should_enable_nvtx_for_policy<System>(), "thrust::fill_n");
90+
using thrust::system::detail::generic::select_system;
9291

9392
System system;
9493

thrust/thrust/detail/for_each.inl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
2626
# pragma system_header
2727
#endif // no system header
28+
#include <thrust/detail/nvtx_policy.h>
2829
#include <thrust/for_each.h>
2930
#include <thrust/iterator/iterator_traits.h>
3031
#include <thrust/system/detail/generic/select_system.h>
@@ -53,7 +54,7 @@ _CCCL_HOST_DEVICE InputIterator for_each(
5354
InputIterator last,
5455
UnaryFunction f)
5556
{
56-
_CCCL_NVTX_RANGE_SCOPE("thrust::for_each");
57+
_CCCL_NVTX_RANGE_SCOPE_IF(detail::should_enable_nvtx_for_policy<DerivedPolicy>(), "thrust::for_each");
5758
using thrust::system::detail::generic::for_each;
5859

5960
return for_each(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, f);
@@ -62,9 +63,9 @@ _CCCL_HOST_DEVICE InputIterator for_each(
6263
template <typename InputIterator, typename UnaryFunction>
6364
InputIterator for_each(InputIterator first, InputIterator last, UnaryFunction f)
6465
{
65-
_CCCL_NVTX_RANGE_SCOPE("thrust::for_each");
66-
using thrust::system::detail::generic::select_system;
6766
using System = typename thrust::iterator_system<InputIterator>::type;
67+
_CCCL_NVTX_RANGE_SCOPE_IF(detail::should_enable_nvtx_for_policy<System>(), "thrust::for_each");
68+
using thrust::system::detail::generic::select_system;
6869

6970
System system;
7071
return thrust::for_each(select_system(system), first, last, f);
@@ -75,7 +76,7 @@ template <typename DerivedPolicy, typename InputIterator, typename Size, typenam
7576
_CCCL_HOST_DEVICE InputIterator for_each_n(
7677
const thrust::detail::execution_policy_base<DerivedPolicy>& exec, InputIterator first, Size n, UnaryFunction f)
7778
{
78-
_CCCL_NVTX_RANGE_SCOPE("thrust::for_each_n");
79+
_CCCL_NVTX_RANGE_SCOPE_IF(detail::should_enable_nvtx_for_policy<DerivedPolicy>(), "thrust::for_each_n");
7980
using thrust::system::detail::generic::for_each_n;
8081

8182
return for_each_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, f);
@@ -84,10 +85,9 @@ _CCCL_HOST_DEVICE InputIterator for_each_n(
8485
template <typename InputIterator, typename Size, typename UnaryFunction>
8586
InputIterator for_each_n(InputIterator first, Size n, UnaryFunction f)
8687
{
87-
_CCCL_NVTX_RANGE_SCOPE("thrust::for_each_n");
88-
using thrust::system::detail::generic::select_system;
89-
9088
using System = typename thrust::iterator_system<InputIterator>::type;
89+
_CCCL_NVTX_RANGE_SCOPE_IF(detail::should_enable_nvtx_for_policy<System>(), "thrust::for_each_n");
90+
using thrust::system::detail::generic::select_system;
9191

9292
System system;
9393
return thrust::for_each_n(select_system(system), first, n, f);

thrust/thrust/detail/generate.inl

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
# pragma system_header
2727
#endif // no system header
2828

29+
#include <thrust/detail/nvtx_policy.h>
2930
#include <thrust/generate.h>
3031
#include <thrust/iterator/iterator_traits.h>
3132
#include <thrust/system/detail/generic/select_system.h>
@@ -54,7 +55,7 @@ generate(const thrust::detail::execution_policy_base<DerivedPolicy>& exec,
5455
ForwardIterator last,
5556
Generator gen)
5657
{
57-
_CCCL_NVTX_RANGE_SCOPE("thrust::generate");
58+
_CCCL_NVTX_RANGE_SCOPE_IF(detail::should_enable_nvtx_for_policy<DerivedPolicy>(), "thrust::generate");
5859
using thrust::system::detail::generic::generate;
5960
return generate(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, gen);
6061
} // end generate()
@@ -64,18 +65,17 @@ template <typename DerivedPolicy, typename OutputIterator, typename Size, typena
6465
_CCCL_HOST_DEVICE OutputIterator generate_n(
6566
const thrust::detail::execution_policy_base<DerivedPolicy>& exec, OutputIterator first, Size n, Generator gen)
6667
{
67-
_CCCL_NVTX_RANGE_SCOPE("thrust::generate_n");
68+
_CCCL_NVTX_RANGE_SCOPE_IF(detail::should_enable_nvtx_for_policy<DerivedPolicy>(), "thrust::generate_n");
6869
using thrust::system::detail::generic::generate_n;
6970
return generate_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, gen);
7071
} // end generate_n()
7172

7273
template <typename ForwardIterator, typename Generator>
7374
void generate(ForwardIterator first, ForwardIterator last, Generator gen)
7475
{
75-
_CCCL_NVTX_RANGE_SCOPE("thrust::generate");
76-
using thrust::system::detail::generic::select_system;
77-
7876
using System = typename thrust::iterator_system<ForwardIterator>::type;
77+
_CCCL_NVTX_RANGE_SCOPE_IF(detail::should_enable_nvtx_for_policy<System>(), "thrust::generate");
78+
using thrust::system::detail::generic::select_system;
7979

8080
System system;
8181

@@ -85,10 +85,9 @@ void generate(ForwardIterator first, ForwardIterator last, Generator gen)
8585
template <typename OutputIterator, typename Size, typename Generator>
8686
OutputIterator generate_n(OutputIterator first, Size n, Generator gen)
8787
{
88-
_CCCL_NVTX_RANGE_SCOPE("thrust::generate_n");
89-
using thrust::system::detail::generic::select_system;
90-
9188
using System = typename thrust::iterator_system<OutputIterator>::type;
89+
_CCCL_NVTX_RANGE_SCOPE_IF(detail::should_enable_nvtx_for_policy<System>(), "thrust::generate_n");
90+
using thrust::system::detail::generic::select_system;
9291

9392
System system;
9493

thrust/thrust/detail/nvtx_policy.h

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3+
#pragma once
4+
5+
#include <thrust/detail/config.h>
6+
7+
#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
8+
# pragma GCC system_header
9+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
10+
# pragma clang system_header
11+
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
12+
# pragma system_header
13+
#endif // no system header
14+
15+
#include <cuda/std/__type_traits/decay.h>
16+
#include <cuda/std/__type_traits/is_base_of.h>
17+
18+
THRUST_NAMESPACE_BEGIN
19+
20+
// Forward declarations
21+
namespace system::detail::sequential
22+
{
23+
template <class>
24+
struct execution_policy;
25+
} // namespace system::detail::sequential
26+
27+
namespace detail
28+
{
29+
// Helper to determine if NVTX should be enabled for a given policy
30+
// NVTX is DISABLED only for thrust::seq and any policy derived from sequential::execution_policy
31+
// ENABLED for all other policies (CUDA, OMP, TBB, etc.)
32+
template <typename DerivedPolicy>
33+
inline constexpr bool should_enable_nvtx_for_policy()
34+
{
35+
using Policy = ::cuda::std::decay_t<DerivedPolicy>;
36+
// This catches thrust::seq, cpp::tag, and any other sequential-based policy
37+
return !::cuda::std::is_base_of_v<thrust::system::detail::sequential::execution_policy<Policy>, Policy>;
38+
}
39+
} // namespace detail
40+
41+
THRUST_NAMESPACE_END

thrust/thrust/detail/reduce.inl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
# pragma system_header
2727
#endif // no system header
2828

29+
#include <thrust/detail/nvtx_policy.h>
2930
#include <thrust/iterator/iterator_traits.h>
3031
#include <thrust/reduce.h>
3132
#include <thrust/system/detail/generic/select_system.h>
@@ -59,7 +60,7 @@ template <typename DerivedPolicy, typename InputIterator>
5960
_CCCL_HOST_DEVICE detail::it_value_t<InputIterator>
6061
reduce(const thrust::detail::execution_policy_base<DerivedPolicy>& exec, InputIterator first, InputIterator last)
6162
{
62-
_CCCL_NVTX_RANGE_SCOPE("thrust::reduce");
63+
_CCCL_NVTX_RANGE_SCOPE_IF(detail::should_enable_nvtx_for_policy<DerivedPolicy>(), "thrust::reduce");
6364
using thrust::system::detail::generic::reduce;
6465
return reduce(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last);
6566
} // end reduce()

thrust/thrust/detail/uninitialized_fill.inl

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
# pragma system_header
2727
#endif // no system header
2828

29+
#include <thrust/detail/nvtx_policy.h>
2930
#include <thrust/iterator/iterator_traits.h>
3031
#include <thrust/system/detail/generic/select_system.h>
3132
#include <thrust/uninitialized_fill.h>
@@ -54,7 +55,7 @@ _CCCL_HOST_DEVICE void uninitialized_fill(
5455
ForwardIterator last,
5556
const T& x)
5657
{
57-
_CCCL_NVTX_RANGE_SCOPE("uninitialized_fill");
58+
_CCCL_NVTX_RANGE_SCOPE_IF(detail::should_enable_nvtx_for_policy<DerivedPolicy>(), "uninitialized_fill");
5859
using thrust::system::detail::generic::uninitialized_fill;
5960
return uninitialized_fill(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, last, x);
6061
} // end uninitialized_fill()
@@ -64,18 +65,17 @@ template <typename DerivedPolicy, typename ForwardIterator, typename Size, typen
6465
_CCCL_HOST_DEVICE ForwardIterator uninitialized_fill_n(
6566
const thrust::detail::execution_policy_base<DerivedPolicy>& exec, ForwardIterator first, Size n, const T& x)
6667
{
67-
_CCCL_NVTX_RANGE_SCOPE("uninitialized_fill_n");
68+
_CCCL_NVTX_RANGE_SCOPE_IF(detail::should_enable_nvtx_for_policy<DerivedPolicy>(), "uninitialized_fill_n");
6869
using thrust::system::detail::generic::uninitialized_fill_n;
6970
return uninitialized_fill_n(thrust::detail::derived_cast(thrust::detail::strip_const(exec)), first, n, x);
7071
} // end uninitialized_fill_n()
7172

7273
template <typename ForwardIterator, typename T>
7374
void uninitialized_fill(ForwardIterator first, ForwardIterator last, const T& x)
7475
{
75-
_CCCL_NVTX_RANGE_SCOPE("uninitialized_fill");
76-
using thrust::system::detail::generic::select_system;
77-
7876
using System = typename thrust::iterator_system<ForwardIterator>::type;
77+
_CCCL_NVTX_RANGE_SCOPE_IF(detail::should_enable_nvtx_for_policy<System>(), "uninitialized_fill");
78+
using thrust::system::detail::generic::select_system;
7979

8080
System system;
8181

@@ -85,10 +85,9 @@ void uninitialized_fill(ForwardIterator first, ForwardIterator last, const T& x)
8585
template <typename ForwardIterator, typename Size, typename T>
8686
ForwardIterator uninitialized_fill_n(ForwardIterator first, Size n, const T& x)
8787
{
88-
_CCCL_NVTX_RANGE_SCOPE("uninitialized_fill_n");
89-
using thrust::system::detail::generic::select_system;
90-
9188
using System = typename thrust::iterator_system<ForwardIterator>::type;
89+
_CCCL_NVTX_RANGE_SCOPE_IF(detail::should_enable_nvtx_for_policy<System>(), "uninitialized_fill_n");
90+
using thrust::system::detail::generic::select_system;
9291

9392
System system;
9493

0 commit comments

Comments
 (0)