Skip to content

Commit 9daa7fe

Browse files
Merge pull request #2457 from KhronosGroup/fix-2456-2301
GLSL: Implement SPV_KHR_cooperative_matrix.
2 parents 72b5b7c + de656c6 commit 9daa7fe

File tree

9 files changed

+802
-4
lines changed

9 files changed

+802
-4
lines changed
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
#version 450
2+
#if defined(GL_AMD_gpu_shader_half_float)
3+
#extension GL_AMD_gpu_shader_half_float : require
4+
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
5+
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
6+
#else
7+
#error No extension available for FP16.
8+
#endif
9+
#extension GL_EXT_shader_16bit_storage : require
10+
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
11+
#extension GL_KHR_cooperative_matrix : require
12+
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
13+
14+
layout(constant_id = 6) const int Scope = 3;
15+
layout(constant_id = 0) const int Rows = 16;
16+
layout(constant_id = 1) const int Columns = 16;
17+
layout(constant_id = 5) const int Layout = 0;
18+
19+
layout(set = 0, binding = 0, std430) buffer SSBO32
20+
{
21+
float data[];
22+
} ssbo32;
23+
24+
layout(set = 0, binding = 0, std430) buffer SSBO16
25+
{
26+
float16_t data[];
27+
} ssbo16;
28+
29+
shared uint blah[512];
30+
31+
void main()
32+
{
33+
uint _514 = 256u * gl_WorkGroupID.x;
34+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> _519;
35+
coopMatLoad(_519, ssbo32.data, _514, 16u, Layout);
36+
uint _523 = 512u * gl_WorkGroupID.x;
37+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> _528;
38+
coopMatLoad(_528, ssbo16.data, _523, 32u, Layout);
39+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseB> _537;
40+
coopMatLoad(_537, ssbo32.data, _514, 16u, Layout);
41+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseB> _546;
42+
coopMatLoad(_546, ssbo16.data, _523, 32u, Layout);
43+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseAccumulator> _555;
44+
coopMatLoad(_555, ssbo32.data, _514, 16u, Layout);
45+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseAccumulator> _564;
46+
coopMatLoad(_564, ssbo16.data, _523, 32u, Layout);
47+
uint _578 = 128u * gl_WorkGroupID.x;
48+
coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseA> _583;
49+
coopMatLoad(_583, ssbo32.data, _578, 8u, Layout);
50+
coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseA> _592;
51+
coopMatLoad(_592, ssbo16.data, _514, 16u, Layout);
52+
coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseB> _601;
53+
coopMatLoad(_601, ssbo32.data, _578, 8u, Layout);
54+
coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseB> _610;
55+
coopMatLoad(_610, ssbo16.data, _514, 16u, Layout);
56+
coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseAccumulator> _619;
57+
coopMatLoad(_619, ssbo32.data, _578, 8u, Layout);
58+
coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseAccumulator> _628;
59+
coopMatLoad(_628, ssbo16.data, _514, 16u, Layout);
60+
coopMatStore(coopmat<float, Scope, Rows, Columns, gl_MatrixUseA>(100.0), ssbo32.data, _578, 0u, Layout);
61+
coopMatStore(coopmat<uint, Scope, Rows, Columns, gl_MatrixUseA>(100u), ssbo32.data, _578, 0u, Layout);
62+
coopMatStore(coopmat<int, Scope, Rows, Columns, gl_MatrixUseA>(-100), ssbo32.data, _578, 0u, Layout);
63+
coopMatStore(coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseA>(float16_t(100.0)), ssbo32.data, _578, 0u, Layout);
64+
coopMatStore(coopmat<int16_t, Scope, Rows, Columns, gl_MatrixUseA>(-100s), ssbo32.data, _578, 0u, Layout);
65+
coopMatStore(coopmat<uint16_t, Scope, Rows, Columns, gl_MatrixUseA>(100us), ssbo32.data, _578, 0u, Layout);
66+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> _726 = coopmat<float, Scope, Rows, Columns, gl_MatrixUseA>(100.0);
67+
for (int _884 = 0; _884 < int(uint(coopmat<float, Scope, Rows, Columns, gl_MatrixUseA>(0).length())); )
68+
{
69+
_726[_884] += 50.0;
70+
_884++;
71+
continue;
72+
}
73+
coopMatStore(_726, ssbo32.data, 0u, 16u, Layout);
74+
coopMatStore(coopmat<uint, Scope, Rows, Columns, gl_MatrixUseA>(gl_WorkGroupID.x), ssbo32.data, 0u, 16u, Layout);
75+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> _768;
76+
coopMatLoad(_768, ssbo32.data, _514, 16u, Layout);
77+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseB> _777;
78+
coopMatLoad(_777, ssbo32.data, _514, 16u, Layout);
79+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseAccumulator> _786;
80+
coopMatLoad(_786, ssbo32.data, _514, 16u, Layout);
81+
coopmat<uint, Scope, Rows, Columns, gl_MatrixUseA> _814;
82+
coopMatLoad(_814, ssbo32.data, _514, 16u, Layout);
83+
coopmat<uint, Scope, Rows, Columns, gl_MatrixUseB> _823;
84+
coopMatLoad(_823, ssbo32.data, _514, 16u, Layout);
85+
coopmat<uint, Scope, Rows, Columns, gl_MatrixUseAccumulator> _832;
86+
coopMatLoad(_832, ssbo32.data, _514, 16u, Layout);
87+
coopmat<int, Scope, Rows, Columns, gl_MatrixUseA> _852;
88+
coopMatLoad(_852, ssbo32.data, _514, 16u, Layout);
89+
coopmat<int, Scope, Rows, Columns, gl_MatrixUseB> _861;
90+
coopMatLoad(_861, ssbo32.data, _514, 16u, Layout);
91+
coopmat<int, Scope, Rows, Columns, gl_MatrixUseAccumulator> _870;
92+
coopMatLoad(_870, ssbo32.data, _514, 16u, Layout);
93+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> _880;
94+
coopMatLoad(_880, blah, 0u, 16u, Layout);
95+
coopMatStore(_880, blah, 0u, 16u, Layout);
96+
}
97+
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#version 450
2+
#extension GL_KHR_cooperative_matrix : require
3+
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
4+
5+
layout(constant_id = 6) const int Scope = 3;
6+
layout(constant_id = 0) const int Rows = 16;
7+
layout(constant_id = 1) const int Columns = 16;
8+
layout(constant_id = 5) const int Layout = 0;
9+
10+
layout(set = 0, binding = 0, std430) buffer SSBO32
11+
{
12+
float data[];
13+
} ssbo32;
14+
15+
void main()
16+
{
17+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> _33 = coopmat<float, Scope, Rows, Columns, gl_MatrixUseA>(50.0);
18+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> _36 = _33;
19+
_36[1] = _33[1] + 50.0;
20+
coopMatStore(_36, ssbo32.data, 0u, 16u, Layout);
21+
coopMatStore(_33, ssbo32.data, 0u, 16u, Layout);
22+
}
23+
Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
#version 450
2+
#if defined(GL_AMD_gpu_shader_half_float)
3+
#extension GL_AMD_gpu_shader_half_float : require
4+
#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)
5+
#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require
6+
#else
7+
#error No extension available for FP16.
8+
#endif
9+
#extension GL_EXT_shader_16bit_storage : require
10+
#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require
11+
#extension GL_KHR_cooperative_matrix : require
12+
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
13+
14+
layout(constant_id = 6) const int Scope = 3;
15+
layout(constant_id = 0) const int Rows = 16;
16+
layout(constant_id = 1) const int Columns = 16;
17+
layout(constant_id = 5) const int Layout = 0;
18+
19+
layout(set = 0, binding = 0, std430) buffer SSBO32
20+
{
21+
float data[];
22+
} ssbo32;
23+
24+
layout(set = 0, binding = 0, std430) buffer SSBO16
25+
{
26+
float16_t data[];
27+
} ssbo16;
28+
29+
shared uint blah[512];
30+
31+
void loads_32()
32+
{
33+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> _60;
34+
coopMatLoad(_60, ssbo32.data, 256u * gl_WorkGroupID.x, 16u, Layout);
35+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> tempArg = _60;
36+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> A32 = tempArg;
37+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> _79;
38+
coopMatLoad(_79, ssbo16.data, 512u * gl_WorkGroupID.x, 32u, Layout);
39+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> tempArg_1 = _79;
40+
A32 = tempArg_1;
41+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseB> _92;
42+
coopMatLoad(_92, ssbo32.data, 256u * gl_WorkGroupID.x, 16u, Layout);
43+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseB> tempArg_2 = _92;
44+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseB> B32 = tempArg_2;
45+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseB> _103;
46+
coopMatLoad(_103, ssbo16.data, 512u * gl_WorkGroupID.x, 32u, Layout);
47+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseB> tempArg_3 = _103;
48+
B32 = tempArg_3;
49+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseAccumulator> _116;
50+
coopMatLoad(_116, ssbo32.data, 256u * gl_WorkGroupID.x, 16u, Layout);
51+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseAccumulator> tempArg_4 = _116;
52+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseAccumulator> C32 = tempArg_4;
53+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseAccumulator> _127;
54+
coopMatLoad(_127, ssbo16.data, 512u * gl_WorkGroupID.x, 32u, Layout);
55+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseAccumulator> tempArg_5 = _127;
56+
C32 = tempArg_5;
57+
}
58+
59+
void loads_16()
60+
{
61+
coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseA> _141;
62+
coopMatLoad(_141, ssbo32.data, 128u * gl_WorkGroupID.x, 8u, Layout);
63+
coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseA> tempArg = _141;
64+
coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseA> A16 = tempArg;
65+
coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseA> _152;
66+
coopMatLoad(_152, ssbo16.data, 256u * gl_WorkGroupID.x, 16u, Layout);
67+
coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseA> tempArg_1 = _152;
68+
A16 = tempArg_1;
69+
coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseB> _164;
70+
coopMatLoad(_164, ssbo32.data, 128u * gl_WorkGroupID.x, 8u, Layout);
71+
coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseB> tempArg_2 = _164;
72+
coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseB> B16 = tempArg_2;
73+
coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseB> _175;
74+
coopMatLoad(_175, ssbo16.data, 256u * gl_WorkGroupID.x, 16u, Layout);
75+
coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseB> tempArg_3 = _175;
76+
B16 = tempArg_3;
77+
coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseAccumulator> _187;
78+
coopMatLoad(_187, ssbo32.data, 128u * gl_WorkGroupID.x, 8u, Layout);
79+
coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseAccumulator> tempArg_4 = _187;
80+
coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseAccumulator> C16 = tempArg_4;
81+
coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseAccumulator> _198;
82+
coopMatLoad(_198, ssbo16.data, 256u * gl_WorkGroupID.x, 16u, Layout);
83+
coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseAccumulator> tempArg_5 = _198;
84+
C16 = tempArg_5;
85+
}
86+
87+
void stores()
88+
{
89+
coopMatStore(coopmat<float, Scope, Rows, Columns, gl_MatrixUseA>(100.0), ssbo32.data, 128u * gl_WorkGroupID.x, 0u, Layout);
90+
coopMatStore(coopmat<uint, Scope, Rows, Columns, gl_MatrixUseA>(100u), ssbo32.data, 128u * gl_WorkGroupID.x, 0u, Layout);
91+
coopMatStore(coopmat<int, Scope, Rows, Columns, gl_MatrixUseA>(-100), ssbo32.data, 128u * gl_WorkGroupID.x, 0u, Layout);
92+
coopMatStore(coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseA>(float16_t(100.0)), ssbo32.data, 128u * gl_WorkGroupID.x, 0u, Layout);
93+
coopMatStore(coopmat<int16_t, Scope, Rows, Columns, gl_MatrixUseA>(-100s), ssbo32.data, 128u * gl_WorkGroupID.x, 0u, Layout);
94+
coopMatStore(coopmat<uint16_t, Scope, Rows, Columns, gl_MatrixUseA>(100us), ssbo32.data, 128u * gl_WorkGroupID.x, 0u, Layout);
95+
}
96+
97+
void len()
98+
{
99+
int len_1 = int(uint(coopmat<float, Scope, Rows, Columns, gl_MatrixUseA>(0).length()));
100+
len_1 = int(uint(coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseB>(0).length()));
101+
len_1 = int(uint(coopmat<int, Scope, Rows, Columns, gl_MatrixUseAccumulator>(0).length()));
102+
len_1 = int(uint(coopmat<uint, Scope, Rows, Columns, gl_MatrixUseA>(0).length()));
103+
len_1 = int(uint(coopmat<int16_t, Scope, Rows, Columns, gl_MatrixUseB>(0).length()));
104+
len_1 = int(uint(coopmat<uint16_t, Scope, Rows, Columns, gl_MatrixUseAccumulator>(0).length()));
105+
}
106+
107+
void conversions()
108+
{
109+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> A = coopmat<float, Scope, Rows, Columns, gl_MatrixUseA>(100.0);
110+
coopmat<uint, Scope, Rows, Columns, gl_MatrixUseA> A2 = coopmat<uint, Scope, Rows, Columns, gl_MatrixUseA>(100u);
111+
coopmat<int, Scope, Rows, Columns, gl_MatrixUseA> B = coopmat<int, Scope, Rows, Columns, gl_MatrixUseA>(A);
112+
B = coopmat<int, Scope, Rows, Columns, gl_MatrixUseA>(A2);
113+
}
114+
115+
void elementwise()
116+
{
117+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> A = coopmat<float, Scope, Rows, Columns, gl_MatrixUseA>(100.0);
118+
coopmat<int, Scope, Rows, Columns, gl_MatrixUseA> B = coopmat<int, Scope, Rows, Columns, gl_MatrixUseA>(100);
119+
A += A;
120+
A -= A;
121+
A *= A;
122+
A /= A;
123+
A *= 100.0;
124+
B += B;
125+
B -= B;
126+
B *= B;
127+
B /= B;
128+
B *= 100;
129+
}
130+
131+
void insert_extract()
132+
{
133+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> A = coopmat<float, Scope, Rows, Columns, gl_MatrixUseA>(100.0);
134+
for (int i = 0; i < int(uint(coopmat<float, Scope, Rows, Columns, gl_MatrixUseA>(0).length())); i++)
135+
{
136+
A[i] += 50.0;
137+
}
138+
coopMatStore(A, ssbo32.data, 0u, 16u, Layout);
139+
}
140+
141+
void scalar_construct()
142+
{
143+
coopmat<uint, Scope, Rows, Columns, gl_MatrixUseA> A = coopmat<uint, Scope, Rows, Columns, gl_MatrixUseA>(gl_WorkGroupID.x);
144+
coopMatStore(A, ssbo32.data, 0u, 16u, Layout);
145+
}
146+
147+
void matmul()
148+
{
149+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> _268;
150+
coopMatLoad(_268, ssbo32.data, 256u * gl_WorkGroupID.x, 16u, Layout);
151+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> tempArg = _268;
152+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> A = tempArg;
153+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseB> _279;
154+
coopMatLoad(_279, ssbo32.data, 256u * gl_WorkGroupID.x, 16u, Layout);
155+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseB> tempArg_1 = _279;
156+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseB> B = tempArg_1;
157+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseAccumulator> _290;
158+
coopMatLoad(_290, ssbo32.data, 256u * gl_WorkGroupID.x, 16u, Layout);
159+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseAccumulator> tempArg_2 = _290;
160+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseAccumulator> C = tempArg_2;
161+
C = coopMatMulAdd(A, B, C, 0);
162+
C = coopMatMulAdd(A, B, C, 0);
163+
C = coopMatMulAdd(A, B, C, 16);
164+
}
165+
166+
void matmul_uint()
167+
{
168+
coopmat<uint, Scope, Rows, Columns, gl_MatrixUseA> _315;
169+
coopMatLoad(_315, ssbo32.data, 256u * gl_WorkGroupID.x, 16u, Layout);
170+
coopmat<uint, Scope, Rows, Columns, gl_MatrixUseA> tempArg = _315;
171+
coopmat<uint, Scope, Rows, Columns, gl_MatrixUseA> A = tempArg;
172+
coopmat<uint, Scope, Rows, Columns, gl_MatrixUseB> _328;
173+
coopMatLoad(_328, ssbo32.data, 256u * gl_WorkGroupID.x, 16u, Layout);
174+
coopmat<uint, Scope, Rows, Columns, gl_MatrixUseB> tempArg_1 = _328;
175+
coopmat<uint, Scope, Rows, Columns, gl_MatrixUseB> B = tempArg_1;
176+
coopmat<uint, Scope, Rows, Columns, gl_MatrixUseAccumulator> _341;
177+
coopMatLoad(_341, ssbo32.data, 256u * gl_WorkGroupID.x, 16u, Layout);
178+
coopmat<uint, Scope, Rows, Columns, gl_MatrixUseAccumulator> tempArg_2 = _341;
179+
coopmat<uint, Scope, Rows, Columns, gl_MatrixUseAccumulator> C = tempArg_2;
180+
C = coopMatMulAdd(A, B, C, 31);
181+
}
182+
183+
void matmul_int()
184+
{
185+
coopmat<int, Scope, Rows, Columns, gl_MatrixUseA> _358;
186+
coopMatLoad(_358, ssbo32.data, 256u * gl_WorkGroupID.x, 16u, Layout);
187+
coopmat<int, Scope, Rows, Columns, gl_MatrixUseA> tempArg = _358;
188+
coopmat<int, Scope, Rows, Columns, gl_MatrixUseA> A = tempArg;
189+
coopmat<int, Scope, Rows, Columns, gl_MatrixUseB> _371;
190+
coopMatLoad(_371, ssbo32.data, 256u * gl_WorkGroupID.x, 16u, Layout);
191+
coopmat<int, Scope, Rows, Columns, gl_MatrixUseB> tempArg_1 = _371;
192+
coopmat<int, Scope, Rows, Columns, gl_MatrixUseB> B = tempArg_1;
193+
coopmat<int, Scope, Rows, Columns, gl_MatrixUseAccumulator> _384;
194+
coopMatLoad(_384, ssbo32.data, 256u * gl_WorkGroupID.x, 16u, Layout);
195+
coopmat<int, Scope, Rows, Columns, gl_MatrixUseAccumulator> tempArg_2 = _384;
196+
coopmat<int, Scope, Rows, Columns, gl_MatrixUseAccumulator> C = tempArg_2;
197+
C = coopMatMulAdd(A, B, C, 31);
198+
}
199+
200+
void lds()
201+
{
202+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> _482;
203+
coopMatLoad(_482, blah, 0u, 16u, Layout);
204+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> tempArg = _482;
205+
coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> A = tempArg;
206+
coopMatStore(A, blah, 0u, 16u, Layout);
207+
}
208+
209+
void main()
210+
{
211+
loads_32();
212+
loads_16();
213+
stores();
214+
len();
215+
conversions();
216+
elementwise();
217+
insert_extract();
218+
scalar_construct();
219+
matmul();
220+
matmul_uint();
221+
matmul_int();
222+
lds();
223+
}
224+

0 commit comments

Comments
 (0)