-
Notifications
You must be signed in to change notification settings - Fork 78
Open
Description
The following kernel --
knl = lp.make_kernel(
"{[i, j]: 0<=i<100 and 0<=j<4}",
"""
for i
for j
<> tmp1[j] = i+j
<> tmp2[j] = 0
if j
tmp2[j] = 2 * tmp1[j]
end
out[i, j] = 2*tmp2[j]
end
end
""", seq_dependencies=True)
knl = lp.tag_array_axes(knl, "tmp1,tmp2", "vec")
knl = lp.tag_inames(knl, "j:vec")
print(lp.generate_code_v2(knl).device_code())
generates:
__kernel void __attribute__ ((reqd_work_group_size(1, 1, 1))) loopy_kernel(__global int *__restrict__ out)
{
int4 tmp1;
int4 tmp2;
for (int i = 0; i <= 99; ++i)
{
tmp1.s0 = i;
tmp1.s1 = i + 1;
tmp1.s2 = i + 2;
tmp1.s3 = i + 3;
tmp2 = 0;
if (j)
tmp2 = 2 * tmp1;
out[4 * i] = 2 * tmp2.s0;
out[1 + 4 * i] = 2 * tmp2.s1;
out[2 + 4 * i] = 2 * tmp2.s2;
out[3 + 4 * i] = 2 * tmp2.s3;
}
}Notice the stray (j) in the conditional. A short term solution which is not vectorizing such instructions will be included as a part of #557.
/cc @sv2518
Metadata
Metadata
Assignees
Labels
No labels