77#error No extension available for FP16.
88#endif
99#extension GL_EXT_shader_16bit_storage : require
10+ #extension GL_EXT_buffer_reference2 : require
1011#extension GL_KHR_cooperative_matrix : require
1112#extension GL_KHR_memory_scope_semantics : require
1213#extension GL_NV_cooperative_matrix2 : require
1314#extension GL_EXT_float_e4m3 : require
1415layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
1516
17+ layout(buffer_reference) buffer A_buffer_ref;
18+
19+ layout(constant_id = 0) const uint Clamp = 0u;
20+
21+ layout(buffer_reference, std430) buffer A_buffer_ref
22+ {
23+ float16_t data_a[];
24+ };
25+
26+ layout(set = 0, binding = 0, std430) buffer A_buffer
27+ {
28+ float16_t data_a[];
29+ } _157;
30+
1631void accum_to_a_cast()
1732{
1833 coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> Accum = coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(float16_t(0.0));
@@ -34,9 +49,99 @@ void value_cast()
3449void saturated_cast()
3550{
3651 coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> Accum = coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator>(float16_t(0.0));
37- coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _52;
38- saturatedConvertEXT(_52, Accum);
39- coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> B = _52;
52+ coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> _107;
53+ saturatedConvertEXT(_107, Accum);
54+ coopmat<floate4m3_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseB> B = _107;
55+ }
56+
57+ void tensor_layouts()
58+ {
59+ tensorLayoutNV<2, gl_CooperativeMatrixClampModeConstantNV> layout1 = createTensorLayoutNV(2u, gl_CooperativeMatrixClampModeConstantNV);
60+ tensorLayoutNV<2, (Clamp)> layout2 = createTensorLayoutNV(2u, (Clamp));
61+ tensorLayoutNV<2, gl_CooperativeMatrixClampModeConstantNV> layout3 = setTensorLayoutClampValueNV(layout1, 42u);
62+ layout1 = setTensorLayoutBlockSizeNV(layout1, 1u, 16u);
63+ layout1 = setTensorLayoutBlockSizeNV(layout1, 1u, 16u);
64+ layout1 = setTensorLayoutDimensionNV(layout1, 128u, 128u);
65+ layout1 = setTensorLayoutDimensionNV(layout1, 128u, 128u);
66+ layout1 = setTensorLayoutDimensionNV(layout1, 128u, 128u);
67+ layout1 = setTensorLayoutStrideNV(layout1, 1u, 1u);
68+ }
69+
70+ float16_t decodeLoad(const in A_buffer_ref buf, const in uint blockCoord[2], const in uint coordInBlock[2])
71+ {
72+ return buf.data_a[0];
73+ }
74+
75+ void load_stores()
76+ {
77+ uint offset = 17u;
78+ tensorLayoutNV<2, gl_CooperativeMatrixClampModeConstantNV> layout1 = createTensorLayoutNV(2u, gl_CooperativeMatrixClampModeConstantNV);
79+ tensorViewNV<2u, false, 0u, 1u> view = createTensorViewNV(2u, false, 0u, 1u);
80+ coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> A;
81+ coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _163;
82+ coopMatLoadTensorNV(_163, _157.data_a, offset, layout1);
83+ A = _163;
84+ coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _169;
85+ coopMatLoadTensorNV(_169, _157.data_a, offset, sliceTensorLayoutNV(layout1, 16u, 16u, 0u, 16u));
86+ A = _169;
87+ coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _175;
88+ coopMatLoadTensorNV(_175, _157.data_a, offset, layout1, view);
89+ A = _175;
90+ coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _180;
91+ coopMatLoadTensorNV(_180, _157.data_a, offset, layout1, decodeLoad);
92+ A = _180;
93+ coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseA> _186;
94+ coopMatLoadTensorNV(_186, _157.data_a, offset, layout1, view, decodeLoad);
95+ A = _186;
96+ coopMatStoreTensorNV(A, _157.data_a, offset, sliceTensorLayoutNV(layout1, 16u, 16u, 0u, 16u));
97+ coopMatStoreTensorNV(A, _157.data_a, offset, sliceTensorLayoutNV(layout1, 16u, 16u, 0u, 16u), view);
98+ }
99+
100+ float16_t maxReduce(const in float16_t x, const in float16_t y)
101+ {
102+ return max(x, y);
103+ }
104+
105+ float16_t maxReduceIndirect(const in float16_t x, const in float16_t y)
106+ {
107+ return maxReduce(x, y);
108+ }
109+
110+ float16_t Exp(const in uint row, const in uint col, const in float16_t elem)
111+ {
112+ return exp(elem);
113+ }
114+
115+ float16_t ExpWithArg(const in uint row, const in uint col, const in float16_t elem, const in bool maybe)
116+ {
117+ if (maybe)
118+ {
119+ return exp(elem);
120+ }
121+ else
122+ {
123+ return elem;
124+ }
125+ }
126+
127+ void callback_functions()
128+ {
129+ coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> A;
130+ coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _201;
131+ coopMatReduceNV(_201, A, gl_CooperativeMatrixReduceRowNV, maxReduce);
132+ coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> reduced = _201;
133+ coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _204;
134+ coopMatReduceNV(_204, reduced, gl_CooperativeMatrixReduceRowAndColumnNV, maxReduce);
135+ reduced = _204;
136+ coopmat<float16_t, gl_ScopeSubgroup, 8u, 8u, gl_MatrixUseAccumulator> _211;
137+ coopMatReduceNV(_211, A, gl_CooperativeMatrixReduce2x2NV, maxReduceIndirect);
138+ coopmat<float16_t, gl_ScopeSubgroup, 8u, 8u, gl_MatrixUseAccumulator> B = _211;
139+ coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _213;
140+ coopMatPerElementNV(_213, A, Exp);
141+ A = _213;
142+ coopmat<float16_t, gl_ScopeSubgroup, 16u, 16u, gl_MatrixUseAccumulator> _216;
143+ coopMatPerElementNV(_216, A, ExpWithArg, true);
144+ A = _216;
40145}
41146
42147void main()
@@ -45,5 +150,8 @@ void main()
45150 accum_to_b_cast();
46151 value_cast();
47152 saturated_cast();
153+ tensor_layouts();
154+ load_stores();
155+ callback_functions();
48156}
49157
0 commit comments