Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 11 additions & 14 deletions c/parallel/src/jit_templates/mappings/iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@
template <typename ValueTp>
struct cccl_iterator_t_mapping
{
bool is_pointer = false;
int size = 1;
int alignment = 1;
void (*advance)(void*, cuda::std::uint64_t) = nullptr;
void (*dereference)(const void*, ValueTp*) = nullptr;
void (*assign)(const void*, ValueTp);
bool is_pointer = false;
int size = 1;
int alignment = 1;
void (*advance)(void*, const void*) = nullptr;
void (*dereference)(const void*, ValueTp*) = nullptr;
void (*assign)(const void*, const void*);

using ValueT = ValueTp;
};
Expand Down Expand Up @@ -68,22 +68,19 @@ struct parameter_mapping<cccl_iterator_t>
{
return std::format(
R"output(
extern "C" __device__ void {0}(void *, {1});
extern "C" __device__ void {2}(const void *, {3});
extern "C" __device__ void {0}(void *, const void*);
extern "C" __device__ void {1}(const void *, const void*);
)output",
arg.advance.name,
cccl_type_enum_to_name(cccl_type_enum::CCCL_UINT64),
arg.dereference.name,
cccl_type_enum_to_name(arg.value_type.type));
arg.dereference.name);
}

return std::format(
R"input(
extern "C" __device__ void {0}(void *, {1});
extern "C" __device__ void {2}(const void *, {3}*);
extern "C" __device__ void {0}(void *, const void*);
extern "C" __device__ void {1}(const void *, {2}*);
)input",
arg.advance.name,
cccl_type_enum_to_name(cccl_type_enum::CCCL_UINT64),
arg.dereference.name,
cccl_type_enum_to_name(arg.value_type.type));
}
Expand Down
2 changes: 1 addition & 1 deletion c/parallel/src/jit_templates/templates/input_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ struct input_iterator_t

__device__ input_iterator_t& operator+=(difference_type diff)
{
Iterator.advance(&state, diff);
Iterator.advance(&state, &diff);
return *this;
}

Expand Down
4 changes: 2 additions & 2 deletions c/parallel/src/jit_templates/templates/output_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ struct output_iterator_proxy_t
{
__device__ output_iterator_proxy_t& operator=(AssignT x)
{
AssignF(&state, cuda::std::move(x));
AssignF(&state, &x);
return *this;
}

Expand All @@ -59,7 +59,7 @@ struct output_iterator_t

__device__ output_iterator_t& operator+=(difference_type diff)
{
Iterator.advance(&state, diff);
Iterator.advance(&state, &diff);
return *this;
}

Expand Down
12 changes: 6 additions & 6 deletions c/parallel/src/kernels/iterators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ std::string make_kernel_input_iterator(
{
const std::string iter_def = std::format(R"XXX(
extern "C" __device__ void DEREF(const void *self_ptr, VALUE_T* result);
extern "C" __device__ void ADVANCE(void *self_ptr, DIFF_T offset);
extern "C" __device__ void ADVANCE(void *self_ptr, const void* offset);
struct __align__(OP_ALIGNMENT) {0} {{
using iterator_category = cuda::std::random_access_iterator_tag;
using value_type = VALUE_T;
Expand All @@ -58,7 +58,7 @@ struct __align__(OP_ALIGNMENT) {0} {{
return result;
}}
__device__ inline {0}& operator+=(difference_type diff) {{
ADVANCE(data, diff);
ADVANCE(data, &diff);
return *this;
}}
__device__ inline value_type operator[](difference_type diff) const {{
Expand Down Expand Up @@ -99,14 +99,14 @@ std::string make_kernel_output_iterator(
std::string_view advance)
{
const std::string iter_def = std::format(R"XXX(
extern "C" __device__ void DEREF(const void *self_ptr, VALUE_T x);
extern "C" __device__ void ADVANCE(void *self_ptr, DIFF_T offset);
extern "C" __device__ void DEREF(const void *self_ptr, const void* x);
extern "C" __device__ void ADVANCE(void *self_ptr, const void* offset);
struct __align__(OP_ALIGNMENT) {0}_state_t {{
char data[OP_SIZE];
}};
struct {0}_proxy_t {{
__device__ {0}_proxy_t operator=(VALUE_T x) {{
DEREF(&state, x);
DEREF(&state, &x);
return *this;
}}
{0}_state_t state;
Expand All @@ -119,7 +119,7 @@ struct {0} {{
using reference = {0}_proxy_t;
__device__ {0}_proxy_t operator*() const {{ return {{state}}; }}
__device__ {0}& operator+=(difference_type diff) {{
ADVANCE(&state, diff);
ADVANCE(&state, &diff);
return *this;
}}
__device__ {0}_proxy_t operator[](difference_type diff) const {{
Expand Down
81 changes: 43 additions & 38 deletions c/parallel/test/test_merge_sort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,16 +244,15 @@ struct item_pair
struct DeviceMergeSort_SortPairsCopy_CustomType_Fixture_Tag;
C2H_TEST("DeviceMergeSort:SortPairsCopy works with custom types", "[merge_sort]")
{
const size_t num_items = GENERATE_COPY(take(2, random(1, 100000)), values({5, 10000, 100000}));
operation_t op = make_operation(
"op",
"struct key_pair { short a; size_t b; };\n"
"extern \"C\" __device__ void op(void* lhs_ptr, void* rhs_ptr, bool* out_ptr) {\n"
" key_pair* lhs = static_cast<key_pair*>(lhs_ptr);\n"
" key_pair* rhs = static_cast<key_pair*>(rhs_ptr);\n"
" bool* out = static_cast<bool*>(out_ptr);\n"
" *out = lhs->a == rhs->a ? lhs->b < rhs->b : lhs->a < rhs->a;\n"
"}");
const size_t num_items = GENERATE_COPY(take(2, random(1, 100000)), values({5, 10000, 100000}));
operation_t op = make_operation("op",
R"(struct key_pair { short a; size_t b; };
extern "C" __device__ void op(void* lhs_ptr, void* rhs_ptr, bool* out_ptr) {
key_pair* lhs = static_cast<key_pair*>(lhs_ptr);
key_pair* rhs = static_cast<key_pair*>(rhs_ptr);
bool* out = static_cast<bool*>(out_ptr);
*out = lhs->a == rhs->a ? lhs->b < rhs->b : lhs->a < rhs->a;
})");
const std::vector<short> a = generate<short>(num_items);
const std::vector<size_t> b = generate<size_t>(num_items);
std::vector<key_pair> input_keys(num_items);
Expand Down Expand Up @@ -301,16 +300,15 @@ C2H_TEST("DeviceMergeSort:SortPairsCopy works with custom types", "[merge_sort]"
struct DeviceMergeSort_SortPairsCopy_CustomType_WellKnown_Fixture_Tag;
C2H_TEST("DeviceMergeSort:SortPairsCopy works with custom types with well-known predicates", "[merge_sort][well_known]")
{
const size_t num_items = GENERATE_COPY(take(2, random(1, 100000)), values({5, 10000, 100000}));
operation_t op_state = make_operation(
"op",
"struct key_pair { short a; size_t b; };\n"
"extern \"C\" __device__ void op(void* lhs_ptr, void* rhs_ptr, bool* out_ptr) {\n"
" key_pair* lhs = static_cast<key_pair*>(lhs_ptr);\n"
" key_pair* rhs = static_cast<key_pair*>(rhs_ptr);\n"
" bool* out = static_cast<bool*>(out_ptr);\n"
" *out = lhs->a == rhs->a ? lhs->b < rhs->b : lhs->a < rhs->a;\n"
"}");
const size_t num_items = GENERATE_COPY(take(2, random(1, 100000)), values({5, 10000, 100000}));
operation_t op_state = make_operation("op",
R"(struct key_pair { short a; size_t b; };
extern "C" __device__ void op(void* lhs_ptr, void* rhs_ptr, bool* out_ptr) {
key_pair* lhs = static_cast<key_pair*>(lhs_ptr);
key_pair* rhs = static_cast<key_pair*>(rhs_ptr);
bool* out = static_cast<bool*>(out_ptr);
*out = lhs->a == rhs->a ? lhs->b < rhs->b : lhs->a < rhs->a;
})");
cccl_op_t op = op_state;
op.type = cccl_op_kind_t::CCCL_LESS;
const std::vector<short> a = generate<short>(num_items);
Expand Down Expand Up @@ -432,13 +430,17 @@ C2H_TEST("DeviceMergeSort::SortKeys works with output iterators", "[merge_sort]"
make_iterator<TestType, random_access_iterator_state_t>(
{"random_access_iterator_state_t", "struct random_access_iterator_state_t { int* d_input; };\n"},
{"advance",
"extern \"C\" __device__ void advance(random_access_iterator_state_t* state, unsigned long long offset) {\n"
" state->d_input += offset;\n"
"}"},
R"(extern "C" __device__ void advance(void* state, const void* offset) {
auto* typed_state = static_cast<random_access_iterator_state_t*>(state);
auto offset_val = *static_cast<const unsigned long long*>(offset);
typed_state->d_input += offset_val;
})"},
{"dereference",
"extern \"C\" __device__ void dereference(random_access_iterator_state_t* state, int x) {\n"
" *state->d_input = x;\n"
"}"});
R"(extern "C" __device__ void dereference(void* state, const void* x) {
auto* typed_state = static_cast<random_access_iterator_state_t*>(state);
auto x_val = *static_cast<const int*>(x);
*typed_state->d_input = x_val;
})"});
std::vector<TestType> input_keys = make_shuffled_key_ranks_vector<TestType>(num_items);
std::vector<TestType> expected_keys = input_keys;

Expand Down Expand Up @@ -471,14 +473,17 @@ C2H_TEST("DeviceMergeSort::SortPairs works with output iterators for items", "[m
make_iterator<TestType, item_random_access_iterator_state_t>(
"struct item_random_access_iterator_state_t { int* d_input; };\n",
{"advance",
"extern \"C\" __device__ void advance(item_random_access_iterator_state_t* state, unsigned long long offset) "
"{\n"
" state->d_input += offset;\n"
"}"},
R"(extern "C" __device__ void advance(void* state, const void* offset) {
auto* typed_state = static_cast<item_random_access_iterator_state_t*>(state);
auto offset_val = *static_cast<const unsigned long long*>(offset);
typed_state->d_input += offset_val;
})"},
{"dereference",
"extern \"C\" __device__ void dereference(item_random_access_iterator_state_t* state, int x) {\n"
" *state->d_input = x;\n"
"}"});
R"(extern "C" __device__ void dereference(void* state, const void* x) {
auto* typed_state = static_cast<item_random_access_iterator_state_t*>(state);
auto x_val = *static_cast<const int*>(x);
*typed_state->d_input = x_val;
})"});

pointer_t<TestType> input_keys_it(input_keys);
pointer_t<item_t> input_items_it(input_items);
Expand Down Expand Up @@ -650,12 +655,12 @@ C2H_TEST("MergeSort works with C++ source operations using custom headers", "[me
/* C2H_TEST("DeviceMergeSort:SortPairsCopy fails to build for large types due to no vsmem", "[merge_sort]")
{
const size_t num_items = 1;
operation_t op = make_operation(
operation_t op = make_operation(
"op",
"struct large_key_pair { int a; char c[100]; };\n"
"extern \"C\" __device__ bool op(large_key_pair lhs, large_key_pair rhs) {\n"
" return lhs.a < rhs.a;\n"
"}");
R"(struct large_key_pair { int a; char c[100]; };
extern "C" __device__ bool op(large_key_pair lhs, large_key_pair rhs) {
return lhs.a < rhs.a;
})");
const std::vector<int> a = generate<int>(num_items);
std::vector<large_key_pair> input_keys(num_items);
for (std::size_t i = 0; i < num_items; ++i)
Expand Down
50 changes: 24 additions & 26 deletions c/parallel/test/test_reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,15 +180,14 @@ C2H_TEST("Reduce works with custom types", "[reduce]")
{
const std::size_t num_items = GENERATE(0, 42, take(4, random(1 << 12, 1 << 24)));

operation_t op = make_operation(
"op",
"struct pair { short a; size_t b; };\n"
"extern \"C\" __device__ void op(void* lhs_ptr, void* rhs_ptr, void* out_ptr) {\n"
" pair* lhs = static_cast<pair*>(lhs_ptr);\n"
" pair* rhs = static_cast<pair*>(rhs_ptr);\n"
" pair* out = static_cast<pair*>(out_ptr);\n"
" *out = pair{ lhs->a + rhs->a, lhs->b + rhs->b };\n"
"}");
operation_t op = make_operation("op",
R"(struct pair { short a; size_t b; };
extern "C" __device__ void op(void* lhs_ptr, void* rhs_ptr, void* out_ptr) {
pair* lhs = static_cast<pair*>(lhs_ptr);
pair* rhs = static_cast<pair*>(rhs_ptr);
pair* out = static_cast<pair*>(out_ptr);
*out = pair{ lhs->a + rhs->a, lhs->b + rhs->b };
})");
const std::vector<short> a = generate<short>(num_items);
const std::vector<size_t> b = generate<size_t>(num_items);
std::vector<pair> input(num_items);
Expand Down Expand Up @@ -218,15 +217,14 @@ C2H_TEST("Reduce works with custom types with well-known operations", "[reduce][
{
const std::size_t num_items = GENERATE(0, 42, take(4, random(1 << 12, 1 << 24)));

operation_t op_state = make_operation(
"op",
"struct pair { short a; size_t b; };\n"
"extern \"C\" __device__ void op(void* lhs_ptr, void* rhs_ptr, void* out_ptr) {\n"
" pair* lhs = static_cast<pair*>(lhs_ptr);\n"
" pair* rhs = static_cast<pair*>(rhs_ptr);\n"
" pair* out = static_cast<pair*>(out_ptr);\n"
" *out = pair{ lhs->a + rhs->a, lhs->b + rhs->b };\n"
"}");
operation_t op_state = make_operation("op",
R"(struct pair { short a; size_t b; };
extern "C" __device__ void op(void* lhs_ptr, void* rhs_ptr, void* out_ptr) {
pair* lhs = static_cast<pair*>(lhs_ptr);
pair* rhs = static_cast<pair*>(rhs_ptr);
pair* out = static_cast<pair*>(out_ptr);
*out = pair{ lhs->a + rhs->a, lhs->b + rhs->b };
})");
cccl_op_t op = op_state;
op.type = cccl_op_kind_t::CCCL_PLUS;
const std::vector<short> a = generate<short>(num_items);
Expand Down Expand Up @@ -371,14 +369,14 @@ C2H_TEST("Reduce works with stateful operators", "[reduce]")
pointer_t<int> counter(1);
stateful_operation_t<invocation_counter_state_t> op = make_operation(
"op",
"struct invocation_counter_state_t { int* d_counter; };\n"
"extern \"C\" __device__ void op(void* state_ptr, void* a_ptr, void* b_ptr, void* out_ptr) {\n"
" invocation_counter_state_t* state = static_cast<invocation_counter_state_t*>(state_ptr);\n"
" atomicAdd(state->d_counter, 1);\n"
" int a = *static_cast<int*>(a_ptr);\n"
" int b = *static_cast<int*>(b_ptr);\n"
" *static_cast<int*>(out_ptr) = a + b;\n"
"}",
R"(struct invocation_counter_state_t { int* d_counter; };
extern "C" __device__ void op(void* state_ptr, void* a_ptr, void* b_ptr, void* out_ptr) {
invocation_counter_state_t* state = static_cast<invocation_counter_state_t*>(state_ptr);
atomicAdd(state->d_counter, 1);
int a = *static_cast<int*>(a_ptr);
int b = *static_cast<int*>(b_ptr);
*static_cast<int*>(out_ptr) = a + b;
})",
invocation_counter_state_t{counter.ptr});

const std::vector<int> input = generate<int>(num_items);
Expand Down
34 changes: 16 additions & 18 deletions c/parallel/test/test_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -398,15 +398,14 @@ C2H_TEST("Scan works with custom types", "[scan]")
{
const std::size_t num_items = GENERATE(0, 42, take(4, random(1 << 12, 1 << 24)));

operation_t op = make_operation(
"op",
"struct pair { short a; size_t b; };\n"
"extern \"C\" __device__ void op(void* lhs_ptr, void* rhs_ptr, void* out_ptr) {\n"
" pair* lhs = static_cast<pair*>(lhs_ptr);\n"
" pair* rhs = static_cast<pair*>(rhs_ptr);\n"
" pair* out = static_cast<pair*>(out_ptr);\n"
" *out = pair{ lhs->a + rhs->a, lhs->b + rhs->b };\n"
"}");
operation_t op = make_operation("op",
R"(struct pair { short a; size_t b; };
extern "C" __device__ void op(void* lhs_ptr, void* rhs_ptr, void* out_ptr) {
pair* lhs = static_cast<pair*>(lhs_ptr);
pair* rhs = static_cast<pair*>(rhs_ptr);
pair* out = static_cast<pair*>(out_ptr);
*out = pair{ lhs->a + rhs->a, lhs->b + rhs->b };
})");
const std::vector<short> a = generate<short>(num_items);
const std::vector<size_t> b = generate<size_t>(num_items);
std::vector<pair> input(num_items);
Expand Down Expand Up @@ -439,15 +438,14 @@ C2H_TEST("Scan works with custom types with well-known operations", "[scan][well
{
const std::size_t num_items = GENERATE(0, 42, take(4, random(1 << 12, 1 << 24)));

operation_t op_state = make_operation(
"op",
"struct pair { short a; size_t b; };\n"
"extern \"C\" __device__ void op(void* lhs_ptr, void* rhs_ptr, void* out_ptr) {\n"
" pair* lhs = static_cast<pair*>(lhs_ptr);\n"
" pair* rhs = static_cast<pair*>(rhs_ptr);\n"
" pair* out = static_cast<pair*>(out_ptr);\n"
" *out = pair{ lhs->a + rhs->a, lhs->b + rhs->b };\n"
"}");
operation_t op_state = make_operation("op",
R"(struct pair { short a; size_t b; };
extern "C" __device__ void op(void* lhs_ptr, void* rhs_ptr, void* out_ptr) {
pair* lhs = static_cast<pair*>(lhs_ptr);
pair* rhs = static_cast<pair*>(rhs_ptr);
pair* out = static_cast<pair*>(out_ptr);
*out = pair{ lhs->a + rhs->a, lhs->b + rhs->b };
})");
cccl_op_t op = op_state;
op.type = cccl_op_kind_t::CCCL_PLUS;
const std::vector<short> a = generate<short>(num_items);
Expand Down
15 changes: 9 additions & 6 deletions c/parallel/test/test_segmented_reduce.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,20 +443,23 @@ struct {0} {{
/* 2 */ index_type_name);

static constexpr std::string_view it_advance_fn_def_src_tmpl = R"XXX(
extern "C" __device__ void {0}({1}* state, {2} offset)
extern "C" __device__ void {0}(void* state, const void* offset)
{{
state->linear_id += offset;
auto* typed_state = static_cast<{1}*>(state);
auto offset_val = *static_cast<const {2}*>(offset);
typed_state->linear_id += offset_val;
}}
)XXX";

const std::string it_advance_fn_def_src =
std::format(it_advance_fn_def_src_tmpl, /*0*/ advance_fn_name, state_name, index_type_name);

static constexpr std::string_view it_dereference_fn_src_tmpl = R"XXX(
extern "C" __device__ void {0}({2} *state, {1}* result) {{
unsigned long long col_id = (state->linear_id) / (state->n_rows);
unsigned long long row_id = (state->linear_id) - col_id * (state->n_rows);
*result = *(state->ptr + row_id * (state->n_cols) + col_id);
extern "C" __device__ void {0}(const void* state, {1}* result) {{
auto* typed_state = static_cast<const {2}*>(state);
unsigned long long col_id = (typed_state->linear_id) / (typed_state->n_rows);
unsigned long long row_id = (typed_state->linear_id) - col_id * (typed_state->n_rows);
*result = *(typed_state->ptr + row_id * (typed_state->n_cols) + col_id);
}}
)XXX";

Expand Down
Loading