| #version 450 core | |
| #extension GL_EXT_bfloat16 : require | |
| #extension GL_KHR_cooperative_matrix : enable | |
| #extension GL_KHR_memory_scope_semantics : enable | |
| #extension GL_EXT_shader_explicit_arithmetic_types : enable | |
| #extension GL_EXT_scalar_block_layout : enable | |
| layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; | |
| const bfloat16_t bc10 = bfloat16_t(10); | |
| const bf16vec2 b2c20 = bf16vec2(20); | |
| layout(constant_id = 1) const bfloat16_t bsc10 = bfloat16_t(10); | |
| struct S { bfloat16_t b; }; | |
| const S sc = S(bc10); | |
| const bfloat16_t bca[2] = {bc10, bc10}; | |
| shared bfloat16_t bfs[10]; | |
| layout(scalar) buffer B { | |
| bf16vec3 b3; | |
| bf16vec2 b2; | |
| bfloat16_t b1; | |
| } buf; | |
| bfloat16_t funcbf16(bfloat16_t x) | |
| { | |
| return x; | |
| } | |
| bfloat16_t funcf32(float32_t x) | |
| { | |
| return bfloat16_t(x); | |
| } | |
| bfloat16_t funcf64(float64_t x) | |
| { | |
| return bfloat16_t(x); | |
| } | |
| void main() | |
| { | |
| float f = 2.0; | |
| bfloat16_t b = bfloat16_t(1.0); | |
| bfloat16_t(f); | |
| bf16vec2 b2 = bf16vec2(f); | |
| uint8_t u8 = uint8_t(5); | |
| uint16_t u16 = uint16_t(5); | |
| uint32_t u32 = 5; | |
| uint64_t u64 = 5; | |
| int8_t i8 = int8_t(6); | |
| int16_t i16 = int16_t(6); | |
| int32_t i32 = 6; | |
| int64_t i64 = 6; | |
| bfloat16_t bf16 = bfloat16_t(7); | |
| float16_t f16 = float16_t(7); | |
| float32_t f32 = 7; | |
| float64_t f64 = 7; | |
| b = bfloat16_t(u8); | |
| b = bfloat16_t(u16); | |
| b = bfloat16_t(u32); | |
| b = bfloat16_t(u64); | |
| b = bfloat16_t(i8); | |
| b = bfloat16_t(i16); | |
| b = bfloat16_t(i32); | |
| b = bfloat16_t(i64); | |
| b = bfloat16_t(bf16); | |
| b = bfloat16_t(f16); | |
| b = bfloat16_t(f32); | |
| b = bfloat16_t(f64); | |
| u8 = uint8_t(b); | |
| u16 = uint16_t(b); | |
| u32 = uint32_t(b); | |
| u64 = uint64_t(b); | |
| i8 = int8_t(b); | |
| i16 = int16_t(b); | |
| i32 = int32_t(b); | |
| i64 = int64_t(b); | |
| bf16 = bfloat16_t(b); | |
| f16 = float16_t(b); | |
| f32 = float32_t(b); | |
| f64 = float64_t(b); | |
| f32 = b; | |
| f64 = b; | |
| b = buf.b1; | |
| b2 = buf.b2; | |
| bf16vec3 b3 = buf.b3; | |
| dot(b2, b2); | |
| coopmat<bfloat16_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA> cmA = coopmat<bfloat16_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA>(3.0); | |
| coopmat<float, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA> cmAf = coopmat<float, gl_ScopeSubgroup, 16, 16, gl_MatrixUseA>(cmA); | |
| funcbf16(b); | |
| funcf32(b); | |
| funcf64(b); | |
| int16_t i16_1 = bfloat16BitsToIntEXT(b); | |
| i16vec2 i16_2 = bfloat16BitsToIntEXT(b2); | |
| i16vec3 i16_3 = bfloat16BitsToIntEXT(bf16vec3(b2, b)); | |
| i16vec4 i16_4 = bfloat16BitsToIntEXT(bf16vec4(b2, b2)); | |
| uint16_t u16_1 = bfloat16BitsToUintEXT(b); | |
| u16vec2 u16_2 = bfloat16BitsToUintEXT(b2); | |
| u16vec3 u16_3 = bfloat16BitsToUintEXT(bf16vec3(b2, b)); | |
| u16vec4 u16_4 = bfloat16BitsToUintEXT(bf16vec4(b2, b2)); | |
| bfloat16_t b16_1 = intBitsToBFloat16EXT(i16_1); | |
| bf16vec2 b16_2 = intBitsToBFloat16EXT(i16_2); | |
| bf16vec3 b16_3 = intBitsToBFloat16EXT(i16_3); | |
| bf16vec4 b16_4 = intBitsToBFloat16EXT(i16_4); | |
| b16_1 = uintBitsToBFloat16EXT(u16_1); | |
| b16_2 = uintBitsToBFloat16EXT(u16_2); | |
| b16_3 = uintBitsToBFloat16EXT(u16_3); | |
| b16_4 = uintBitsToBFloat16EXT(u16_4); | |
| } | |
| bfloat16_t func2(bf16vec4 v[2], int i) | |
| { | |
| bfloat16_t b; | |
| b = (bf16vec2(v[0]))[i]; | |
| b = (bf16vec2(v[1])).y; | |
| v[i].z = v[i][i]; | |
| S s; | |
| s.b = b; | |
| b = (i != 0) ? b : bfloat16_t(2); | |
| if (i == 2) { | |
| b = v[0].x; | |
| } else { | |
| b = v[1].y; | |
| } | |
| return b; | |
| } |