|
| 1 | +#version 450 |
| 2 | +#if defined(GL_AMD_gpu_shader_half_float) |
| 3 | +#extension GL_AMD_gpu_shader_half_float : require |
| 4 | +#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16) |
| 5 | +#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require |
| 6 | +#else |
| 7 | +#error No extension available for FP16. |
| 8 | +#endif |
| 9 | +#extension GL_EXT_shader_16bit_storage : require |
| 10 | +#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require |
| 11 | +#extension GL_KHR_cooperative_matrix : require |
| 12 | +layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in; |
| 13 | + |
| 14 | +layout(constant_id = 6) const int Scope = 3; |
| 15 | +layout(constant_id = 0) const int Rows = 16; |
| 16 | +layout(constant_id = 1) const int Columns = 16; |
| 17 | +layout(constant_id = 5) const int Layout = 0; |
| 18 | + |
| 19 | +layout(set = 0, binding = 0, std430) buffer SSBO32 |
| 20 | +{ |
| 21 | + float data[]; |
| 22 | +} ssbo32; |
| 23 | + |
| 24 | +layout(set = 0, binding = 0, std430) buffer SSBO16 |
| 25 | +{ |
| 26 | + float16_t data[]; |
| 27 | +} ssbo16; |
| 28 | + |
| 29 | +shared uint blah[512]; |
| 30 | + |
| 31 | +void loads_32() |
| 32 | +{ |
| 33 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> _60; |
| 34 | + coopMatLoad(_60, ssbo32.data, 256u * gl_WorkGroupID.x, 16u, Layout); |
| 35 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> tempArg = _60; |
| 36 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> A32 = tempArg; |
| 37 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> _79; |
| 38 | + coopMatLoad(_79, ssbo16.data, 512u * gl_WorkGroupID.x, 32u, Layout); |
| 39 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> tempArg_1 = _79; |
| 40 | + A32 = tempArg_1; |
| 41 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseB> _92; |
| 42 | + coopMatLoad(_92, ssbo32.data, 256u * gl_WorkGroupID.x, 16u, Layout); |
| 43 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseB> tempArg_2 = _92; |
| 44 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseB> B32 = tempArg_2; |
| 45 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseB> _103; |
| 46 | + coopMatLoad(_103, ssbo16.data, 512u * gl_WorkGroupID.x, 32u, Layout); |
| 47 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseB> tempArg_3 = _103; |
| 48 | + B32 = tempArg_3; |
| 49 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseAccumulator> _116; |
| 50 | + coopMatLoad(_116, ssbo32.data, 256u * gl_WorkGroupID.x, 16u, Layout); |
| 51 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseAccumulator> tempArg_4 = _116; |
| 52 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseAccumulator> C32 = tempArg_4; |
| 53 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseAccumulator> _127; |
| 54 | + coopMatLoad(_127, ssbo16.data, 512u * gl_WorkGroupID.x, 32u, Layout); |
| 55 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseAccumulator> tempArg_5 = _127; |
| 56 | + C32 = tempArg_5; |
| 57 | +} |
| 58 | + |
| 59 | +void loads_16() |
| 60 | +{ |
| 61 | + coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseA> _141; |
| 62 | + coopMatLoad(_141, ssbo32.data, 128u * gl_WorkGroupID.x, 8u, Layout); |
| 63 | + coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseA> tempArg = _141; |
| 64 | + coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseA> A16 = tempArg; |
| 65 | + coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseA> _152; |
| 66 | + coopMatLoad(_152, ssbo16.data, 256u * gl_WorkGroupID.x, 16u, Layout); |
| 67 | + coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseA> tempArg_1 = _152; |
| 68 | + A16 = tempArg_1; |
| 69 | + coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseB> _164; |
| 70 | + coopMatLoad(_164, ssbo32.data, 128u * gl_WorkGroupID.x, 8u, Layout); |
| 71 | + coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseB> tempArg_2 = _164; |
| 72 | + coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseB> B16 = tempArg_2; |
| 73 | + coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseB> _175; |
| 74 | + coopMatLoad(_175, ssbo16.data, 256u * gl_WorkGroupID.x, 16u, Layout); |
| 75 | + coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseB> tempArg_3 = _175; |
| 76 | + B16 = tempArg_3; |
| 77 | + coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseAccumulator> _187; |
| 78 | + coopMatLoad(_187, ssbo32.data, 128u * gl_WorkGroupID.x, 8u, Layout); |
| 79 | + coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseAccumulator> tempArg_4 = _187; |
| 80 | + coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseAccumulator> C16 = tempArg_4; |
| 81 | + coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseAccumulator> _198; |
| 82 | + coopMatLoad(_198, ssbo16.data, 256u * gl_WorkGroupID.x, 16u, Layout); |
| 83 | + coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseAccumulator> tempArg_5 = _198; |
| 84 | + C16 = tempArg_5; |
| 85 | +} |
| 86 | + |
| 87 | +void stores() |
| 88 | +{ |
| 89 | + coopMatStore(coopmat<float, Scope, Rows, Columns, gl_MatrixUseA>(100.0), ssbo32.data, 128u * gl_WorkGroupID.x, 0u, Layout); |
| 90 | + coopMatStore(coopmat<uint, Scope, Rows, Columns, gl_MatrixUseA>(100u), ssbo32.data, 128u * gl_WorkGroupID.x, 0u, Layout); |
| 91 | + coopMatStore(coopmat<int, Scope, Rows, Columns, gl_MatrixUseA>(-100), ssbo32.data, 128u * gl_WorkGroupID.x, 0u, Layout); |
| 92 | + coopMatStore(coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseA>(float16_t(100.0)), ssbo32.data, 128u * gl_WorkGroupID.x, 0u, Layout); |
| 93 | + coopMatStore(coopmat<int16_t, Scope, Rows, Columns, gl_MatrixUseA>(-100s), ssbo32.data, 128u * gl_WorkGroupID.x, 0u, Layout); |
| 94 | + coopMatStore(coopmat<uint16_t, Scope, Rows, Columns, gl_MatrixUseA>(100us), ssbo32.data, 128u * gl_WorkGroupID.x, 0u, Layout); |
| 95 | +} |
| 96 | + |
| 97 | +void len() |
| 98 | +{ |
| 99 | + int len_1 = int(uint(coopmat<float, Scope, Rows, Columns, gl_MatrixUseA>(0).length())); |
| 100 | + len_1 = int(uint(coopmat<float16_t, Scope, Rows, Columns, gl_MatrixUseB>(0).length())); |
| 101 | + len_1 = int(uint(coopmat<int, Scope, Rows, Columns, gl_MatrixUseAccumulator>(0).length())); |
| 102 | + len_1 = int(uint(coopmat<uint, Scope, Rows, Columns, gl_MatrixUseA>(0).length())); |
| 103 | + len_1 = int(uint(coopmat<int16_t, Scope, Rows, Columns, gl_MatrixUseB>(0).length())); |
| 104 | + len_1 = int(uint(coopmat<uint16_t, Scope, Rows, Columns, gl_MatrixUseAccumulator>(0).length())); |
| 105 | +} |
| 106 | + |
| 107 | +void conversions() |
| 108 | +{ |
| 109 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> A = coopmat<float, Scope, Rows, Columns, gl_MatrixUseA>(100.0); |
| 110 | + coopmat<uint, Scope, Rows, Columns, gl_MatrixUseA> A2 = coopmat<uint, Scope, Rows, Columns, gl_MatrixUseA>(100u); |
| 111 | + coopmat<int, Scope, Rows, Columns, gl_MatrixUseA> B = coopmat<int, Scope, Rows, Columns, gl_MatrixUseA>(A); |
| 112 | + B = coopmat<int, Scope, Rows, Columns, gl_MatrixUseA>(A2); |
| 113 | +} |
| 114 | + |
| 115 | +void elementwise() |
| 116 | +{ |
| 117 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> A = coopmat<float, Scope, Rows, Columns, gl_MatrixUseA>(100.0); |
| 118 | + coopmat<int, Scope, Rows, Columns, gl_MatrixUseA> B = coopmat<int, Scope, Rows, Columns, gl_MatrixUseA>(100); |
| 119 | + A += A; |
| 120 | + A -= A; |
| 121 | + A *= A; |
| 122 | + A /= A; |
| 123 | + A *= 100.0; |
| 124 | + B += B; |
| 125 | + B -= B; |
| 126 | + B *= B; |
| 127 | + B /= B; |
| 128 | + B *= 100; |
| 129 | +} |
| 130 | + |
| 131 | +void insert_extract() |
| 132 | +{ |
| 133 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> A = coopmat<float, Scope, Rows, Columns, gl_MatrixUseA>(100.0); |
| 134 | + for (int i = 0; i < int(uint(coopmat<float, Scope, Rows, Columns, gl_MatrixUseA>(0).length())); i++) |
| 135 | + { |
| 136 | + A[i] += 50.0; |
| 137 | + } |
| 138 | + coopMatStore(A, ssbo32.data, 0u, 16u, Layout); |
| 139 | +} |
| 140 | + |
| 141 | +void scalar_construct() |
| 142 | +{ |
| 143 | + coopmat<uint, Scope, Rows, Columns, gl_MatrixUseA> A = coopmat<uint, Scope, Rows, Columns, gl_MatrixUseA>(gl_WorkGroupID.x); |
| 144 | + coopMatStore(A, ssbo32.data, 0u, 16u, Layout); |
| 145 | +} |
| 146 | + |
| 147 | +void matmul() |
| 148 | +{ |
| 149 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> _268; |
| 150 | + coopMatLoad(_268, ssbo32.data, 256u * gl_WorkGroupID.x, 16u, Layout); |
| 151 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> tempArg = _268; |
| 152 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> A = tempArg; |
| 153 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseB> _279; |
| 154 | + coopMatLoad(_279, ssbo32.data, 256u * gl_WorkGroupID.x, 16u, Layout); |
| 155 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseB> tempArg_1 = _279; |
| 156 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseB> B = tempArg_1; |
| 157 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseAccumulator> _290; |
| 158 | + coopMatLoad(_290, ssbo32.data, 256u * gl_WorkGroupID.x, 16u, Layout); |
| 159 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseAccumulator> tempArg_2 = _290; |
| 160 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseAccumulator> C = tempArg_2; |
| 161 | + C = coopMatMulAdd(A, B, C, 0); |
| 162 | + C = coopMatMulAdd(A, B, C, 0); |
| 163 | + C = coopMatMulAdd(A, B, C, 16); |
| 164 | +} |
| 165 | + |
| 166 | +void matmul_uint() |
| 167 | +{ |
| 168 | + coopmat<uint, Scope, Rows, Columns, gl_MatrixUseA> _315; |
| 169 | + coopMatLoad(_315, ssbo32.data, 256u * gl_WorkGroupID.x, 16u, Layout); |
| 170 | + coopmat<uint, Scope, Rows, Columns, gl_MatrixUseA> tempArg = _315; |
| 171 | + coopmat<uint, Scope, Rows, Columns, gl_MatrixUseA> A = tempArg; |
| 172 | + coopmat<uint, Scope, Rows, Columns, gl_MatrixUseB> _328; |
| 173 | + coopMatLoad(_328, ssbo32.data, 256u * gl_WorkGroupID.x, 16u, Layout); |
| 174 | + coopmat<uint, Scope, Rows, Columns, gl_MatrixUseB> tempArg_1 = _328; |
| 175 | + coopmat<uint, Scope, Rows, Columns, gl_MatrixUseB> B = tempArg_1; |
| 176 | + coopmat<uint, Scope, Rows, Columns, gl_MatrixUseAccumulator> _341; |
| 177 | + coopMatLoad(_341, ssbo32.data, 256u * gl_WorkGroupID.x, 16u, Layout); |
| 178 | + coopmat<uint, Scope, Rows, Columns, gl_MatrixUseAccumulator> tempArg_2 = _341; |
| 179 | + coopmat<uint, Scope, Rows, Columns, gl_MatrixUseAccumulator> C = tempArg_2; |
| 180 | + C = coopMatMulAdd(A, B, C, 31); |
| 181 | +} |
| 182 | + |
| 183 | +void matmul_int() |
| 184 | +{ |
| 185 | + coopmat<int, Scope, Rows, Columns, gl_MatrixUseA> _358; |
| 186 | + coopMatLoad(_358, ssbo32.data, 256u * gl_WorkGroupID.x, 16u, Layout); |
| 187 | + coopmat<int, Scope, Rows, Columns, gl_MatrixUseA> tempArg = _358; |
| 188 | + coopmat<int, Scope, Rows, Columns, gl_MatrixUseA> A = tempArg; |
| 189 | + coopmat<int, Scope, Rows, Columns, gl_MatrixUseB> _371; |
| 190 | + coopMatLoad(_371, ssbo32.data, 256u * gl_WorkGroupID.x, 16u, Layout); |
| 191 | + coopmat<int, Scope, Rows, Columns, gl_MatrixUseB> tempArg_1 = _371; |
| 192 | + coopmat<int, Scope, Rows, Columns, gl_MatrixUseB> B = tempArg_1; |
| 193 | + coopmat<int, Scope, Rows, Columns, gl_MatrixUseAccumulator> _384; |
| 194 | + coopMatLoad(_384, ssbo32.data, 256u * gl_WorkGroupID.x, 16u, Layout); |
| 195 | + coopmat<int, Scope, Rows, Columns, gl_MatrixUseAccumulator> tempArg_2 = _384; |
| 196 | + coopmat<int, Scope, Rows, Columns, gl_MatrixUseAccumulator> C = tempArg_2; |
| 197 | + C = coopMatMulAdd(A, B, C, 31); |
| 198 | +} |
| 199 | + |
| 200 | +void lds() |
| 201 | +{ |
| 202 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> _482; |
| 203 | + coopMatLoad(_482, blah, 0u, 16u, Layout); |
| 204 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> tempArg = _482; |
| 205 | + coopmat<float, Scope, Rows, Columns, gl_MatrixUseA> A = tempArg; |
| 206 | + coopMatStore(A, blah, 0u, 16u, Layout); |
| 207 | +} |
| 208 | + |
| 209 | +void main() |
| 210 | +{ |
| 211 | + loads_32(); |
| 212 | + loads_16(); |
| 213 | + stores(); |
| 214 | + len(); |
| 215 | + conversions(); |
| 216 | + elementwise(); |
| 217 | + insert_extract(); |
| 218 | + scalar_construct(); |
| 219 | + matmul(); |
| 220 | + matmul_uint(); |
| 221 | + matmul_int(); |
| 222 | + lds(); |
| 223 | +} |
| 224 | + |
0 commit comments