| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 |
| // RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \ |
| // RUN: -fvisibility=hidden | FileCheck --check-prefixes=AMDGCN,AMDGCN_CL_DEF %s |
| // RUN: %clang_cc1 %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa -ffreestanding \ |
| // RUN: -cl-std=CL2.0 -fvisibility=hidden | FileCheck --check-prefixes=AMDGCN,AMDGCN_CL_20 %s |
| // RUN: %clang_cc1 %s -emit-llvm -o - -triple=spirv64-unknown-unknown -ffreestanding \ |
| // RUN: -fvisibility=hidden | FileCheck --check-prefix=SPIRV %s |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden i32 @fi1a( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[V:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[V_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP1]], ptr [[V_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = load atomic i32, ptr [[TMP2]] syncscope("agent") monotonic, align 4 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP3]], ptr [[V_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP4]] syncscope("workgroup") monotonic, align 4 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP5]], ptr [[V_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP7:%.*]] = load atomic i32, ptr [[TMP6]] syncscope("cluster") monotonic, align 4 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP7]], ptr [[V_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP9:%.*]] = load atomic i32, ptr [[TMP8]] syncscope("wavefront") monotonic, align 4 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP9]], ptr [[V_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP10:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP11:%.*]] = load atomic i32, ptr [[TMP10]] syncscope("singlethread") monotonic, align 4 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP11]], ptr [[V_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP12:%.*]] = load i32, ptr [[V_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: ret i32 [[TMP12]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden i32 @fi1a( |
| // AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[V:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP1]], ptr addrspace(5) [[V]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load atomic i32, ptr [[TMP2]] syncscope("agent") monotonic, align 4 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP3]], ptr addrspace(5) [[V]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP4]] syncscope("workgroup") monotonic, align 4 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP5]], ptr addrspace(5) [[V]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP7:%.*]] = load atomic i32, ptr [[TMP6]] syncscope("cluster") monotonic, align 4 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP7]], ptr addrspace(5) [[V]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP9:%.*]] = load atomic i32, ptr [[TMP8]] syncscope("wavefront") monotonic, align 4 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP9]], ptr addrspace(5) [[V]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP10:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP11:%.*]] = load atomic i32, ptr [[TMP10]] syncscope("singlethread") monotonic, align 4 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP11]], ptr addrspace(5) [[V]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(5) [[V]], align 4 |
| // AMDGCN_CL_20-NEXT: ret i32 [[TMP12]] |
| // |
| // SPIRV-LABEL: define hidden spir_func i32 @fi1a( |
| // SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[V:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP1]], ptr [[V]], align 4 |
| // SPIRV-NEXT: [[TMP2:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP3:%.*]] = load atomic i32, ptr [[TMP2]] syncscope("device") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP3]], ptr [[V]], align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP4]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP5]], ptr [[V]], align 4 |
| // SPIRV-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP7:%.*]] = load atomic i32, ptr [[TMP6]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP7]], ptr [[V]], align 4 |
| // SPIRV-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP9:%.*]] = load atomic i32, ptr [[TMP8]] syncscope("subgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP9]], ptr [[V]], align 4 |
| // SPIRV-NEXT: [[TMP10:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP11:%.*]] = load atomic i32, ptr [[TMP10]] syncscope("singlethread") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP11]], ptr [[V]], align 4 |
| // SPIRV-NEXT: [[TMP12:%.*]] = load i32, ptr [[V]], align 4 |
| // SPIRV-NEXT: ret i32 [[TMP12]] |
| // |
| int fi1a(int *i) { |
| int v; |
| __scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); |
| __scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); |
| __scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); |
| __scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_CLUSTR); |
| __scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); |
| __scoped_atomic_load(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); |
| return v; |
| } |
| |
| // AMDGCN-LABEL: define hidden i32 @fi1b( |
| // AMDGCN-SAME: ptr noundef [[I:%.*]]) #[[ATTR0:[0-9]+]] { |
| // AMDGCN-NEXT: [[ENTRY:.*:]] |
| // AMDGCN-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN-NEXT: [[ATOMIC_TEMP5:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN-NEXT: [[ATOMIC_TEMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP1]] to ptr |
| // AMDGCN-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr |
| // AMDGCN-NEXT: [[ATOMIC_TEMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP3]] to ptr |
| // AMDGCN-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr |
| // AMDGCN-NEXT: [[ATOMIC_TEMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP5]] to ptr |
| // AMDGCN-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4 |
| // AMDGCN-NEXT: store i32 [[TMP1]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN-NEXT: [[TMP2:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN-NEXT: [[TMP3:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4 |
| // AMDGCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP4]] syncscope("agent") monotonic, align 4 |
| // AMDGCN-NEXT: store i32 [[TMP5]], ptr [[ATOMIC_TEMP1_ASCAST]], align 4 |
| // AMDGCN-NEXT: [[TMP6:%.*]] = load i32, ptr [[ATOMIC_TEMP1_ASCAST]], align 4 |
| // AMDGCN-NEXT: [[TMP7:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN-NEXT: store i32 [[TMP6]], ptr [[TMP7]], align 4 |
| // AMDGCN-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN-NEXT: [[TMP9:%.*]] = load atomic i32, ptr [[TMP8]] syncscope("workgroup") monotonic, align 4 |
| // AMDGCN-NEXT: store i32 [[TMP9]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN-NEXT: [[TMP10:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN-NEXT: [[TMP11:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN-NEXT: store i32 [[TMP10]], ptr [[TMP11]], align 4 |
| // AMDGCN-NEXT: [[TMP12:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN-NEXT: [[TMP13:%.*]] = load atomic i32, ptr [[TMP12]] syncscope("cluster") monotonic, align 4 |
| // AMDGCN-NEXT: store i32 [[TMP13]], ptr [[ATOMIC_TEMP3_ASCAST]], align 4 |
| // AMDGCN-NEXT: [[TMP14:%.*]] = load i32, ptr [[ATOMIC_TEMP3_ASCAST]], align 4 |
| // AMDGCN-NEXT: [[TMP15:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN-NEXT: store i32 [[TMP14]], ptr [[TMP15]], align 4 |
| // AMDGCN-NEXT: [[TMP16:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN-NEXT: [[TMP17:%.*]] = load atomic i32, ptr [[TMP16]] syncscope("wavefront") monotonic, align 4 |
| // AMDGCN-NEXT: store i32 [[TMP17]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN-NEXT: [[TMP19:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN-NEXT: store i32 [[TMP18]], ptr [[TMP19]], align 4 |
| // AMDGCN-NEXT: [[TMP20:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN-NEXT: [[TMP21:%.*]] = load atomic i32, ptr [[TMP20]] syncscope("singlethread") monotonic, align 4 |
| // AMDGCN-NEXT: store i32 [[TMP21]], ptr [[ATOMIC_TEMP5_ASCAST]], align 4 |
| // AMDGCN-NEXT: [[TMP22:%.*]] = load i32, ptr [[ATOMIC_TEMP5_ASCAST]], align 4 |
| // AMDGCN-NEXT: [[TMP23:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN-NEXT: store i32 [[TMP22]], ptr [[TMP23]], align 4 |
| // AMDGCN-NEXT: [[TMP24:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 |
| // AMDGCN-NEXT: ret i32 [[TMP25]] |
| // |
| // SPIRV-LABEL: define hidden spir_func i32 @fi1b( |
| // SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP5:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load atomic i32, ptr [[TMP0]] monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP1]], ptr [[ATOMIC_TEMP]], align 4 |
| // SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP5:%.*]] = load atomic i32, ptr [[TMP4]] syncscope("device") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP5]], ptr [[ATOMIC_TEMP1]], align 4 |
| // SPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr [[ATOMIC_TEMP1]], align 4 |
| // SPIRV-NEXT: [[TMP7:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP6]], ptr [[TMP7]], align 4 |
| // SPIRV-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP9:%.*]] = load atomic i32, ptr [[TMP8]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP9]], ptr [[ATOMIC_TEMP2]], align 4 |
| // SPIRV-NEXT: [[TMP10:%.*]] = load i32, ptr [[ATOMIC_TEMP2]], align 4 |
| // SPIRV-NEXT: [[TMP11:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP10]], ptr [[TMP11]], align 4 |
| // SPIRV-NEXT: [[TMP12:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP13:%.*]] = load atomic i32, ptr [[TMP12]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP13]], ptr [[ATOMIC_TEMP3]], align 4 |
| // SPIRV-NEXT: [[TMP14:%.*]] = load i32, ptr [[ATOMIC_TEMP3]], align 4 |
| // SPIRV-NEXT: [[TMP15:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP14]], ptr [[TMP15]], align 4 |
| // SPIRV-NEXT: [[TMP16:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP17:%.*]] = load atomic i32, ptr [[TMP16]] syncscope("subgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP17]], ptr [[ATOMIC_TEMP4]], align 4 |
| // SPIRV-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP4]], align 4 |
| // SPIRV-NEXT: [[TMP19:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP18]], ptr [[TMP19]], align 4 |
| // SPIRV-NEXT: [[TMP20:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP21:%.*]] = load atomic i32, ptr [[TMP20]] syncscope("singlethread") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP21]], ptr [[ATOMIC_TEMP5]], align 4 |
| // SPIRV-NEXT: [[TMP22:%.*]] = load i32, ptr [[ATOMIC_TEMP5]], align 4 |
| // SPIRV-NEXT: [[TMP23:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP22]], ptr [[TMP23]], align 4 |
| // SPIRV-NEXT: [[TMP24:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 |
| // SPIRV-NEXT: ret i32 [[TMP25]] |
| // |
| int fi1b(int *i) { |
| *i = __scoped_atomic_load_n(i, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); |
| *i = __scoped_atomic_load_n(i, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); |
| *i = __scoped_atomic_load_n(i, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); |
| *i = __scoped_atomic_load_n(i, __ATOMIC_RELAXED, __MEMORY_SCOPE_CLUSTR); |
| *i = __scoped_atomic_load_n(i, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); |
| *i = __scoped_atomic_load_n(i, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); |
| return *i; |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden void @fi2a( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[V:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[V_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[V_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i32, ptr [[V_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: store atomic i32 [[TMP1]], ptr [[TMP0]] monotonic, align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = load i32, ptr [[V_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: store atomic i32 [[TMP3]], ptr [[TMP2]] syncscope("agent") monotonic, align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = load i32, ptr [[V_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: store atomic i32 [[TMP5]], ptr [[TMP4]] syncscope("workgroup") monotonic, align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP7:%.*]] = load i32, ptr [[V_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: store atomic i32 [[TMP7]], ptr [[TMP6]] syncscope("cluster") monotonic, align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP9:%.*]] = load i32, ptr [[V_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: store atomic i32 [[TMP9]], ptr [[TMP8]] syncscope("wavefront") monotonic, align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP10:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP11:%.*]] = load i32, ptr [[V_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: store atomic i32 [[TMP11]], ptr [[TMP10]] syncscope("singlethread") monotonic, align 4 |
| // AMDGCN_CL_DEF-NEXT: ret void |
| // |
| // AMDGCN_CL_20-LABEL: define hidden void @fi2a( |
| // AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[V:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr addrspace(5) [[V]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[V]] to ptr |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // AMDGCN_CL_20-NEXT: store atomic i32 [[TMP2]], ptr [[TMP0]] monotonic, align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(5) [[V]] to ptr |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 |
| // AMDGCN_CL_20-NEXT: store atomic i32 [[TMP5]], ptr [[TMP3]] syncscope("agent") monotonic, align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(5) [[V]] to ptr |
| // AMDGCN_CL_20-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 |
| // AMDGCN_CL_20-NEXT: store atomic i32 [[TMP8]], ptr [[TMP6]] syncscope("workgroup") monotonic, align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP9:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(5) [[V]] to ptr |
| // AMDGCN_CL_20-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 |
| // AMDGCN_CL_20-NEXT: store atomic i32 [[TMP11]], ptr [[TMP9]] syncscope("cluster") monotonic, align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP12:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP13:%.*]] = addrspacecast ptr addrspace(5) [[V]] to ptr |
| // AMDGCN_CL_20-NEXT: [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4 |
| // AMDGCN_CL_20-NEXT: store atomic i32 [[TMP14]], ptr [[TMP12]] syncscope("wavefront") monotonic, align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP15:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP16:%.*]] = addrspacecast ptr addrspace(5) [[V]] to ptr |
| // AMDGCN_CL_20-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 |
| // AMDGCN_CL_20-NEXT: store atomic i32 [[TMP17]], ptr [[TMP15]] syncscope("singlethread") monotonic, align 4 |
| // AMDGCN_CL_20-NEXT: ret void |
| // |
| // SPIRV-LABEL: define hidden spir_func void @fi2a( |
| // SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[V:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[V]], align 4 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[V]], align 4 |
| // SPIRV-NEXT: store atomic i32 [[TMP1]], ptr [[TMP0]] monotonic, align 4 |
| // SPIRV-NEXT: [[TMP2:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP3:%.*]] = load i32, ptr [[V]], align 4 |
| // SPIRV-NEXT: store atomic i32 [[TMP3]], ptr [[TMP2]] syncscope("device") monotonic, align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP5:%.*]] = load i32, ptr [[V]], align 4 |
| // SPIRV-NEXT: store atomic i32 [[TMP5]], ptr [[TMP4]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP7:%.*]] = load i32, ptr [[V]], align 4 |
| // SPIRV-NEXT: store atomic i32 [[TMP7]], ptr [[TMP6]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP9:%.*]] = load i32, ptr [[V]], align 4 |
| // SPIRV-NEXT: store atomic i32 [[TMP9]], ptr [[TMP8]] syncscope("subgroup") monotonic, align 4 |
| // SPIRV-NEXT: [[TMP10:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP11:%.*]] = load i32, ptr [[V]], align 4 |
| // SPIRV-NEXT: store atomic i32 [[TMP11]], ptr [[TMP10]] syncscope("singlethread") monotonic, align 4 |
| // SPIRV-NEXT: ret void |
| // |
| void fi2a(int *i) { |
| int v = 1; |
| __scoped_atomic_store(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); |
| __scoped_atomic_store(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); |
| __scoped_atomic_store(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); |
| __scoped_atomic_store(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_CLUSTR); |
| __scoped_atomic_store(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); |
| __scoped_atomic_store(i, &v, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); |
| } |
| |
| // AMDGCN-LABEL: define hidden void @fi2b( |
| // AMDGCN-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN-NEXT: [[ENTRY:.*:]] |
| // AMDGCN-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN-NEXT: [[DOTATOMICTMP2:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN-NEXT: [[DOTATOMICTMP4:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr |
| // AMDGCN-NEXT: [[DOTATOMICTMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP2]] to ptr |
| // AMDGCN-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr |
| // AMDGCN-NEXT: [[DOTATOMICTMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP4]] to ptr |
| // AMDGCN-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr |
| // AMDGCN-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN-NEXT: store atomic i32 [[TMP1]], ptr [[TMP0]] monotonic, align 4 |
| // AMDGCN-NEXT: [[TMP2:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN-NEXT: store i32 1, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN-NEXT: store atomic i32 [[TMP3]], ptr [[TMP2]] syncscope("agent") monotonic, align 4 |
| // AMDGCN-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN-NEXT: store i32 1, ptr [[DOTATOMICTMP2_ASCAST]], align 4 |
| // AMDGCN-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTATOMICTMP2_ASCAST]], align 4 |
| // AMDGCN-NEXT: store atomic i32 [[TMP5]], ptr [[TMP4]] syncscope("workgroup") monotonic, align 4 |
| // AMDGCN-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN-NEXT: store i32 1, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN-NEXT: store atomic i32 [[TMP7]], ptr [[TMP6]] syncscope("cluster") monotonic, align 4 |
| // AMDGCN-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN-NEXT: store i32 1, ptr [[DOTATOMICTMP4_ASCAST]], align 4 |
| // AMDGCN-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTATOMICTMP4_ASCAST]], align 4 |
| // AMDGCN-NEXT: store atomic i32 [[TMP9]], ptr [[TMP8]] syncscope("wavefront") monotonic, align 4 |
| // AMDGCN-NEXT: [[TMP10:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN-NEXT: store i32 1, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN-NEXT: store atomic i32 [[TMP11]], ptr [[TMP10]] syncscope("singlethread") monotonic, align 4 |
| // AMDGCN-NEXT: ret void |
| // |
| // SPIRV-LABEL: define hidden spir_func void @fi2b( |
| // SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP2:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP4:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: store atomic i32 [[TMP1]], ptr [[TMP0]] monotonic, align 4 |
| // SPIRV-NEXT: [[TMP2:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP1]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTATOMICTMP1]], align 4 |
| // SPIRV-NEXT: store atomic i32 [[TMP3]], ptr [[TMP2]] syncscope("device") monotonic, align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP2]], align 4 |
| // SPIRV-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTATOMICTMP2]], align 4 |
| // SPIRV-NEXT: store atomic i32 [[TMP5]], ptr [[TMP4]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: [[TMP6:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP3]], align 4 |
| // SPIRV-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTATOMICTMP3]], align 4 |
| // SPIRV-NEXT: store atomic i32 [[TMP7]], ptr [[TMP6]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: [[TMP8:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP4]], align 4 |
| // SPIRV-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTATOMICTMP4]], align 4 |
| // SPIRV-NEXT: store atomic i32 [[TMP9]], ptr [[TMP8]] syncscope("subgroup") monotonic, align 4 |
| // SPIRV-NEXT: [[TMP10:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP5]], align 4 |
| // SPIRV-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP5]], align 4 |
| // SPIRV-NEXT: store atomic i32 [[TMP11]], ptr [[TMP10]] syncscope("singlethread") monotonic, align 4 |
| // SPIRV-NEXT: ret void |
| // |
| void fi2b(int *i) { |
| __scoped_atomic_store_n(i, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); |
| __scoped_atomic_store_n(i, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); |
| __scoped_atomic_store_n(i, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); |
| __scoped_atomic_store_n(i, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_CLUSTR); |
| __scoped_atomic_store_n(i, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); |
| __scoped_atomic_store_n(i, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden void @fi3a( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef [[E:%.*]], ptr noundef [[F:%.*]], ptr noundef [[G:%.*]], ptr noundef [[H:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[H_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP7:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP9:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP10:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[G_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[G_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[H_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[H_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP6]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP7_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP7]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP8_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP8]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP9_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP9]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP10]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[F]], ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[G]], ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[H]], ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP1]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP7:%.*]] = atomicrmw sub ptr [[TMP5]], i32 [[TMP6]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP9:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP8]], ptr [[TMP9]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP12:%.*]] = atomicrmw and ptr [[TMP10]], i32 [[TMP11]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP12]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP13:%.*]] = load i32, ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP14:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP13]], ptr [[TMP14]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP15:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP17:%.*]] = atomicrmw or ptr [[TMP15]], i32 [[TMP16]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP17]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP19:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP18]], ptr [[TMP19]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP20:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP7_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTATOMICTMP7_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP22:%.*]] = atomicrmw xor ptr [[TMP20]], i32 [[TMP21]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP22]], ptr [[ATOMIC_TEMP8_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP23:%.*]] = load i32, ptr [[ATOMIC_TEMP8_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP24:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP25:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP9_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTATOMICTMP9_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP27:%.*]] = atomicrmw nand ptr [[TMP25]], i32 [[TMP26]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP27]], ptr [[ATOMIC_TEMP10_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP28:%.*]] = load i32, ptr [[ATOMIC_TEMP10_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP29:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP28]], ptr [[TMP29]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP30:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP11_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTATOMICTMP11_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP32:%.*]] = atomicrmw min ptr [[TMP30]], i32 [[TMP31]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP32]], ptr [[ATOMIC_TEMP12_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP33:%.*]] = load i32, ptr [[ATOMIC_TEMP12_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP34:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP33]], ptr [[TMP34]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP35:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP13_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTATOMICTMP13_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP37:%.*]] = atomicrmw max ptr [[TMP35]], i32 [[TMP36]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP37]], ptr [[ATOMIC_TEMP14_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP38:%.*]] = load i32, ptr [[ATOMIC_TEMP14_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP39:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 |
| // AMDGCN_CL_DEF-NEXT: ret void |
| // |
| // AMDGCN_CL_20-LABEL: define hidden void @fi3a( |
| // AMDGCN_CL_20-SAME: ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef [[E:%.*]], ptr noundef [[F:%.*]], ptr noundef [[G:%.*]], ptr noundef [[H:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[H_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP7:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP9:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP10:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[G_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[G_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[H_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[H_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP6]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP7_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP7]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP8_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP8]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP9_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP9]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP10]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[F]], ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[G]], ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[H]], ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP1]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4:![0-9]+]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP7:%.*]] = atomicrmw sub ptr [[TMP5]], i32 [[TMP6]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP9:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP8]], ptr [[TMP9]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP12:%.*]] = atomicrmw and ptr [[TMP10]], i32 [[TMP11]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP12]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP13:%.*]] = load i32, ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP14:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP13]], ptr [[TMP14]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP15:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP17:%.*]] = atomicrmw or ptr [[TMP15]], i32 [[TMP16]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP17]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP19:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP18]], ptr [[TMP19]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP20:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP7_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTATOMICTMP7_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP22:%.*]] = atomicrmw xor ptr [[TMP20]], i32 [[TMP21]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP22]], ptr [[ATOMIC_TEMP8_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP23:%.*]] = load i32, ptr [[ATOMIC_TEMP8_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP24:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP25:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP9_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTATOMICTMP9_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP27:%.*]] = atomicrmw nand ptr [[TMP25]], i32 [[TMP26]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP27]], ptr [[ATOMIC_TEMP10_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP28:%.*]] = load i32, ptr [[ATOMIC_TEMP10_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP29:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP28]], ptr [[TMP29]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP30:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP11_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTATOMICTMP11_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP32:%.*]] = atomicrmw min ptr [[TMP30]], i32 [[TMP31]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP32]], ptr [[ATOMIC_TEMP12_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP33:%.*]] = load i32, ptr [[ATOMIC_TEMP12_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP34:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP33]], ptr [[TMP34]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP35:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP13_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTATOMICTMP13_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP37:%.*]] = atomicrmw max ptr [[TMP35]], i32 [[TMP36]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP37]], ptr [[ATOMIC_TEMP14_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP38:%.*]] = load i32, ptr [[ATOMIC_TEMP14_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP39:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 |
| // AMDGCN_CL_20-NEXT: ret void |
| // |
| // SPIRV-LABEL: define hidden spir_func void @fi3a( |
| // SPIRV-SAME: ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef [[E:%.*]], ptr noundef [[F:%.*]], ptr noundef [[G:%.*]], ptr noundef [[H:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[H_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP7:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP9:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP10:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[F]], ptr [[F_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[H]], ptr [[H_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP2:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP1]] monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 |
| // SPIRV-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP1]], align 4 |
| // SPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1]], align 4 |
| // SPIRV-NEXT: [[TMP7:%.*]] = atomicrmw sub ptr [[TMP5]], i32 [[TMP6]] monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP2]], align 4 |
| // SPIRV-NEXT: [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2]], align 4 |
| // SPIRV-NEXT: [[TMP9:%.*]] = load ptr, ptr [[B_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP8]], ptr [[TMP9]], align 4 |
| // SPIRV-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP3]], align 4 |
| // SPIRV-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP3]], align 4 |
| // SPIRV-NEXT: [[TMP12:%.*]] = atomicrmw and ptr [[TMP10]], i32 [[TMP11]] monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP12]], ptr [[ATOMIC_TEMP4]], align 4 |
| // SPIRV-NEXT: [[TMP13:%.*]] = load i32, ptr [[ATOMIC_TEMP4]], align 4 |
| // SPIRV-NEXT: [[TMP14:%.*]] = load ptr, ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP13]], ptr [[TMP14]], align 4 |
| // SPIRV-NEXT: [[TMP15:%.*]] = load ptr, ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP5]], align 4 |
| // SPIRV-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTATOMICTMP5]], align 4 |
| // SPIRV-NEXT: [[TMP17:%.*]] = atomicrmw or ptr [[TMP15]], i32 [[TMP16]] monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP17]], ptr [[ATOMIC_TEMP6]], align 4 |
| // SPIRV-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP6]], align 4 |
| // SPIRV-NEXT: [[TMP19:%.*]] = load ptr, ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP18]], ptr [[TMP19]], align 4 |
| // SPIRV-NEXT: [[TMP20:%.*]] = load ptr, ptr [[E_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP7]], align 4 |
| // SPIRV-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTATOMICTMP7]], align 4 |
| // SPIRV-NEXT: [[TMP22:%.*]] = atomicrmw xor ptr [[TMP20]], i32 [[TMP21]] monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP22]], ptr [[ATOMIC_TEMP8]], align 4 |
| // SPIRV-NEXT: [[TMP23:%.*]] = load i32, ptr [[ATOMIC_TEMP8]], align 4 |
| // SPIRV-NEXT: [[TMP24:%.*]] = load ptr, ptr [[E_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4 |
| // SPIRV-NEXT: [[TMP25:%.*]] = load ptr, ptr [[F_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP9]], align 4 |
| // SPIRV-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTATOMICTMP9]], align 4 |
| // SPIRV-NEXT: [[TMP27:%.*]] = atomicrmw nand ptr [[TMP25]], i32 [[TMP26]] monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP27]], ptr [[ATOMIC_TEMP10]], align 4 |
| // SPIRV-NEXT: [[TMP28:%.*]] = load i32, ptr [[ATOMIC_TEMP10]], align 4 |
| // SPIRV-NEXT: [[TMP29:%.*]] = load ptr, ptr [[F_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP28]], ptr [[TMP29]], align 4 |
| // SPIRV-NEXT: [[TMP30:%.*]] = load ptr, ptr [[G_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP11]], align 4 |
| // SPIRV-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTATOMICTMP11]], align 4 |
| // SPIRV-NEXT: [[TMP32:%.*]] = atomicrmw min ptr [[TMP30]], i32 [[TMP31]] monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP32]], ptr [[ATOMIC_TEMP12]], align 4 |
| // SPIRV-NEXT: [[TMP33:%.*]] = load i32, ptr [[ATOMIC_TEMP12]], align 4 |
| // SPIRV-NEXT: [[TMP34:%.*]] = load ptr, ptr [[G_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP33]], ptr [[TMP34]], align 4 |
| // SPIRV-NEXT: [[TMP35:%.*]] = load ptr, ptr [[H_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP13]], align 4 |
| // SPIRV-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTATOMICTMP13]], align 4 |
| // SPIRV-NEXT: [[TMP37:%.*]] = atomicrmw max ptr [[TMP35]], i32 [[TMP36]] monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP37]], ptr [[ATOMIC_TEMP14]], align 4 |
| // SPIRV-NEXT: [[TMP38:%.*]] = load i32, ptr [[ATOMIC_TEMP14]], align 4 |
| // SPIRV-NEXT: [[TMP39:%.*]] = load ptr, ptr [[H_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 |
| // SPIRV-NEXT: ret void |
| // |
| void fi3a(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { |
| *a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); |
| *b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); |
| *c = __scoped_atomic_fetch_and(c, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); |
| *d = __scoped_atomic_fetch_or(d, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); |
| *e = __scoped_atomic_fetch_xor(e, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); |
| *f = __scoped_atomic_fetch_nand(f, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); |
| *g = __scoped_atomic_fetch_min(g, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); |
| *h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden void @fi3b( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef [[E:%.*]], ptr noundef [[F:%.*]], ptr noundef [[G:%.*]], ptr noundef [[H:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[H_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP7:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP9:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP10:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[G_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[G_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[H_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[H_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP6]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP7_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP7]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP8_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP8]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP9_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP9]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP10]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[F]], ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[G]], ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[H]], ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP1]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP7:%.*]] = atomicrmw sub ptr [[TMP5]], i32 [[TMP6]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP9:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP8]], ptr [[TMP9]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP12:%.*]] = atomicrmw and ptr [[TMP10]], i32 [[TMP11]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP12]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP13:%.*]] = load i32, ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP14:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP13]], ptr [[TMP14]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP15:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP17:%.*]] = atomicrmw or ptr [[TMP15]], i32 [[TMP16]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP17]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP19:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP18]], ptr [[TMP19]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP20:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP7_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTATOMICTMP7_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP22:%.*]] = atomicrmw xor ptr [[TMP20]], i32 [[TMP21]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP22]], ptr [[ATOMIC_TEMP8_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP23:%.*]] = load i32, ptr [[ATOMIC_TEMP8_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP24:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP25:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP9_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTATOMICTMP9_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP27:%.*]] = atomicrmw nand ptr [[TMP25]], i32 [[TMP26]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP27]], ptr [[ATOMIC_TEMP10_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP28:%.*]] = load i32, ptr [[ATOMIC_TEMP10_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP29:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP28]], ptr [[TMP29]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP30:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP11_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTATOMICTMP11_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP32:%.*]] = atomicrmw min ptr [[TMP30]], i32 [[TMP31]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP32]], ptr [[ATOMIC_TEMP12_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP33:%.*]] = load i32, ptr [[ATOMIC_TEMP12_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP34:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP33]], ptr [[TMP34]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP35:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP13_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTATOMICTMP13_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP37:%.*]] = atomicrmw max ptr [[TMP35]], i32 [[TMP36]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP37]], ptr [[ATOMIC_TEMP14_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP38:%.*]] = load i32, ptr [[ATOMIC_TEMP14_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP39:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 |
| // AMDGCN_CL_DEF-NEXT: ret void |
| // |
| // AMDGCN_CL_20-LABEL: define hidden void @fi3b( |
| // AMDGCN_CL_20-SAME: ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef [[E:%.*]], ptr noundef [[F:%.*]], ptr noundef [[G:%.*]], ptr noundef [[H:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[H_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP7:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP9:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP10:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[G_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[G_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[H_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[H_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP6]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP7_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP7]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP8_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP8]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP9_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP9]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP10]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[F]], ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[G]], ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[H]], ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP1]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP7:%.*]] = atomicrmw sub ptr [[TMP5]], i32 [[TMP6]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP9:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP8]], ptr [[TMP9]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP12:%.*]] = atomicrmw and ptr [[TMP10]], i32 [[TMP11]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP12]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP13:%.*]] = load i32, ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP14:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP13]], ptr [[TMP14]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP15:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP17:%.*]] = atomicrmw or ptr [[TMP15]], i32 [[TMP16]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP17]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP19:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP18]], ptr [[TMP19]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP20:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP7_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTATOMICTMP7_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP22:%.*]] = atomicrmw xor ptr [[TMP20]], i32 [[TMP21]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP22]], ptr [[ATOMIC_TEMP8_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP23:%.*]] = load i32, ptr [[ATOMIC_TEMP8_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP24:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP25:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP9_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTATOMICTMP9_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP27:%.*]] = atomicrmw nand ptr [[TMP25]], i32 [[TMP26]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP27]], ptr [[ATOMIC_TEMP10_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP28:%.*]] = load i32, ptr [[ATOMIC_TEMP10_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP29:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP28]], ptr [[TMP29]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP30:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP11_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTATOMICTMP11_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP32:%.*]] = atomicrmw min ptr [[TMP30]], i32 [[TMP31]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP32]], ptr [[ATOMIC_TEMP12_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP33:%.*]] = load i32, ptr [[ATOMIC_TEMP12_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP34:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP33]], ptr [[TMP34]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP35:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP13_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTATOMICTMP13_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP37:%.*]] = atomicrmw max ptr [[TMP35]], i32 [[TMP36]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP37]], ptr [[ATOMIC_TEMP14_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP38:%.*]] = load i32, ptr [[ATOMIC_TEMP14_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP39:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 |
| // AMDGCN_CL_20-NEXT: ret void |
| // |
| // SPIRV-LABEL: define hidden spir_func void @fi3b( |
| // SPIRV-SAME: ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef [[E:%.*]], ptr noundef [[F:%.*]], ptr noundef [[G:%.*]], ptr noundef [[H:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[H_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP7:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP9:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP10:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[F]], ptr [[F_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[H]], ptr [[H_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP2:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP1]] syncscope("device") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 |
| // SPIRV-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP1]], align 4 |
| // SPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1]], align 4 |
| // SPIRV-NEXT: [[TMP7:%.*]] = atomicrmw sub ptr [[TMP5]], i32 [[TMP6]] syncscope("device") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP2]], align 4 |
| // SPIRV-NEXT: [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2]], align 4 |
| // SPIRV-NEXT: [[TMP9:%.*]] = load ptr, ptr [[B_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP8]], ptr [[TMP9]], align 4 |
| // SPIRV-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP3]], align 4 |
| // SPIRV-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP3]], align 4 |
| // SPIRV-NEXT: [[TMP12:%.*]] = atomicrmw and ptr [[TMP10]], i32 [[TMP11]] syncscope("device") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP12]], ptr [[ATOMIC_TEMP4]], align 4 |
| // SPIRV-NEXT: [[TMP13:%.*]] = load i32, ptr [[ATOMIC_TEMP4]], align 4 |
| // SPIRV-NEXT: [[TMP14:%.*]] = load ptr, ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP13]], ptr [[TMP14]], align 4 |
| // SPIRV-NEXT: [[TMP15:%.*]] = load ptr, ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP5]], align 4 |
| // SPIRV-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTATOMICTMP5]], align 4 |
| // SPIRV-NEXT: [[TMP17:%.*]] = atomicrmw or ptr [[TMP15]], i32 [[TMP16]] syncscope("device") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP17]], ptr [[ATOMIC_TEMP6]], align 4 |
| // SPIRV-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP6]], align 4 |
| // SPIRV-NEXT: [[TMP19:%.*]] = load ptr, ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP18]], ptr [[TMP19]], align 4 |
| // SPIRV-NEXT: [[TMP20:%.*]] = load ptr, ptr [[E_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP7]], align 4 |
| // SPIRV-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTATOMICTMP7]], align 4 |
| // SPIRV-NEXT: [[TMP22:%.*]] = atomicrmw xor ptr [[TMP20]], i32 [[TMP21]] syncscope("device") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP22]], ptr [[ATOMIC_TEMP8]], align 4 |
| // SPIRV-NEXT: [[TMP23:%.*]] = load i32, ptr [[ATOMIC_TEMP8]], align 4 |
| // SPIRV-NEXT: [[TMP24:%.*]] = load ptr, ptr [[E_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4 |
| // SPIRV-NEXT: [[TMP25:%.*]] = load ptr, ptr [[F_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP9]], align 4 |
| // SPIRV-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTATOMICTMP9]], align 4 |
| // SPIRV-NEXT: [[TMP27:%.*]] = atomicrmw nand ptr [[TMP25]], i32 [[TMP26]] syncscope("device") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP27]], ptr [[ATOMIC_TEMP10]], align 4 |
| // SPIRV-NEXT: [[TMP28:%.*]] = load i32, ptr [[ATOMIC_TEMP10]], align 4 |
| // SPIRV-NEXT: [[TMP29:%.*]] = load ptr, ptr [[F_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP28]], ptr [[TMP29]], align 4 |
| // SPIRV-NEXT: [[TMP30:%.*]] = load ptr, ptr [[G_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP11]], align 4 |
| // SPIRV-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTATOMICTMP11]], align 4 |
| // SPIRV-NEXT: [[TMP32:%.*]] = atomicrmw min ptr [[TMP30]], i32 [[TMP31]] syncscope("device") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP32]], ptr [[ATOMIC_TEMP12]], align 4 |
| // SPIRV-NEXT: [[TMP33:%.*]] = load i32, ptr [[ATOMIC_TEMP12]], align 4 |
| // SPIRV-NEXT: [[TMP34:%.*]] = load ptr, ptr [[G_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP33]], ptr [[TMP34]], align 4 |
| // SPIRV-NEXT: [[TMP35:%.*]] = load ptr, ptr [[H_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP13]], align 4 |
| // SPIRV-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTATOMICTMP13]], align 4 |
| // SPIRV-NEXT: [[TMP37:%.*]] = atomicrmw max ptr [[TMP35]], i32 [[TMP36]] syncscope("device") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP37]], ptr [[ATOMIC_TEMP14]], align 4 |
| // SPIRV-NEXT: [[TMP38:%.*]] = load i32, ptr [[ATOMIC_TEMP14]], align 4 |
| // SPIRV-NEXT: [[TMP39:%.*]] = load ptr, ptr [[H_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 |
| // SPIRV-NEXT: ret void |
| // |
| void fi3b(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { |
| *a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); |
| *b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); |
| *c = __scoped_atomic_fetch_and(c, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); |
| *d = __scoped_atomic_fetch_or(d, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); |
| *e = __scoped_atomic_fetch_xor(e, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); |
| *f = __scoped_atomic_fetch_nand(f, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); |
| *g = __scoped_atomic_fetch_min(g, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); |
| *h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden void @fi3c( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef [[E:%.*]], ptr noundef [[F:%.*]], ptr noundef [[G:%.*]], ptr noundef [[H:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[H_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP7:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP9:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP10:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[G_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[G_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[H_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[H_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP6]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP7_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP7]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP8_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP8]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP9_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP9]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP10]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[F]], ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[G]], ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[H]], ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP1]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP7:%.*]] = atomicrmw sub ptr [[TMP5]], i32 [[TMP6]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP9:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP8]], ptr [[TMP9]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP12:%.*]] = atomicrmw and ptr [[TMP10]], i32 [[TMP11]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP12]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP13:%.*]] = load i32, ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP14:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP13]], ptr [[TMP14]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP15:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP17:%.*]] = atomicrmw or ptr [[TMP15]], i32 [[TMP16]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP17]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP19:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP18]], ptr [[TMP19]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP20:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP7_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTATOMICTMP7_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP22:%.*]] = atomicrmw xor ptr [[TMP20]], i32 [[TMP21]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP22]], ptr [[ATOMIC_TEMP8_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP23:%.*]] = load i32, ptr [[ATOMIC_TEMP8_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP24:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP25:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP9_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTATOMICTMP9_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP27:%.*]] = atomicrmw nand ptr [[TMP25]], i32 [[TMP26]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP27]], ptr [[ATOMIC_TEMP10_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP28:%.*]] = load i32, ptr [[ATOMIC_TEMP10_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP29:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP28]], ptr [[TMP29]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP30:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP11_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTATOMICTMP11_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP32:%.*]] = atomicrmw min ptr [[TMP30]], i32 [[TMP31]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP32]], ptr [[ATOMIC_TEMP12_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP33:%.*]] = load i32, ptr [[ATOMIC_TEMP12_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP34:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP33]], ptr [[TMP34]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP35:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP13_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTATOMICTMP13_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP37:%.*]] = atomicrmw max ptr [[TMP35]], i32 [[TMP36]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP37]], ptr [[ATOMIC_TEMP14_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP38:%.*]] = load i32, ptr [[ATOMIC_TEMP14_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP39:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 |
| // AMDGCN_CL_DEF-NEXT: ret void |
| // |
| // AMDGCN_CL_20-LABEL: define hidden void @fi3c( |
| // AMDGCN_CL_20-SAME: ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef [[E:%.*]], ptr noundef [[F:%.*]], ptr noundef [[G:%.*]], ptr noundef [[H:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[H_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP7:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP9:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP10:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[G_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[G_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[H_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[H_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP6]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP7_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP7]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP8_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP8]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP9_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP9]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP10]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[F]], ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[G]], ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[H]], ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP1]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP7:%.*]] = atomicrmw sub ptr [[TMP5]], i32 [[TMP6]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP9:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP8]], ptr [[TMP9]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP12:%.*]] = atomicrmw and ptr [[TMP10]], i32 [[TMP11]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP12]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP13:%.*]] = load i32, ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP14:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP13]], ptr [[TMP14]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP15:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP17:%.*]] = atomicrmw or ptr [[TMP15]], i32 [[TMP16]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP17]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP19:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP18]], ptr [[TMP19]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP20:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP7_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTATOMICTMP7_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP22:%.*]] = atomicrmw xor ptr [[TMP20]], i32 [[TMP21]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP22]], ptr [[ATOMIC_TEMP8_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP23:%.*]] = load i32, ptr [[ATOMIC_TEMP8_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP24:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP25:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP9_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTATOMICTMP9_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP27:%.*]] = atomicrmw nand ptr [[TMP25]], i32 [[TMP26]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP27]], ptr [[ATOMIC_TEMP10_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP28:%.*]] = load i32, ptr [[ATOMIC_TEMP10_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP29:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP28]], ptr [[TMP29]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP30:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP11_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTATOMICTMP11_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP32:%.*]] = atomicrmw min ptr [[TMP30]], i32 [[TMP31]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP32]], ptr [[ATOMIC_TEMP12_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP33:%.*]] = load i32, ptr [[ATOMIC_TEMP12_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP34:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP33]], ptr [[TMP34]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP35:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP13_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTATOMICTMP13_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP37:%.*]] = atomicrmw max ptr [[TMP35]], i32 [[TMP36]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP37]], ptr [[ATOMIC_TEMP14_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP38:%.*]] = load i32, ptr [[ATOMIC_TEMP14_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP39:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 |
| // AMDGCN_CL_20-NEXT: ret void |
| // |
| // SPIRV-LABEL: define hidden spir_func void @fi3c( |
| // SPIRV-SAME: ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef [[E:%.*]], ptr noundef [[F:%.*]], ptr noundef [[G:%.*]], ptr noundef [[H:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[H_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP7:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP9:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP10:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[F]], ptr [[F_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[H]], ptr [[H_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP2:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP1]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 |
| // SPIRV-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP1]], align 4 |
| // SPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1]], align 4 |
| // SPIRV-NEXT: [[TMP7:%.*]] = atomicrmw sub ptr [[TMP5]], i32 [[TMP6]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP2]], align 4 |
| // SPIRV-NEXT: [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2]], align 4 |
| // SPIRV-NEXT: [[TMP9:%.*]] = load ptr, ptr [[B_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP8]], ptr [[TMP9]], align 4 |
| // SPIRV-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP3]], align 4 |
| // SPIRV-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP3]], align 4 |
| // SPIRV-NEXT: [[TMP12:%.*]] = atomicrmw and ptr [[TMP10]], i32 [[TMP11]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP12]], ptr [[ATOMIC_TEMP4]], align 4 |
| // SPIRV-NEXT: [[TMP13:%.*]] = load i32, ptr [[ATOMIC_TEMP4]], align 4 |
| // SPIRV-NEXT: [[TMP14:%.*]] = load ptr, ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP13]], ptr [[TMP14]], align 4 |
| // SPIRV-NEXT: [[TMP15:%.*]] = load ptr, ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP5]], align 4 |
| // SPIRV-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTATOMICTMP5]], align 4 |
| // SPIRV-NEXT: [[TMP17:%.*]] = atomicrmw or ptr [[TMP15]], i32 [[TMP16]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP17]], ptr [[ATOMIC_TEMP6]], align 4 |
| // SPIRV-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP6]], align 4 |
| // SPIRV-NEXT: [[TMP19:%.*]] = load ptr, ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP18]], ptr [[TMP19]], align 4 |
| // SPIRV-NEXT: [[TMP20:%.*]] = load ptr, ptr [[E_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP7]], align 4 |
| // SPIRV-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTATOMICTMP7]], align 4 |
| // SPIRV-NEXT: [[TMP22:%.*]] = atomicrmw xor ptr [[TMP20]], i32 [[TMP21]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP22]], ptr [[ATOMIC_TEMP8]], align 4 |
| // SPIRV-NEXT: [[TMP23:%.*]] = load i32, ptr [[ATOMIC_TEMP8]], align 4 |
| // SPIRV-NEXT: [[TMP24:%.*]] = load ptr, ptr [[E_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4 |
| // SPIRV-NEXT: [[TMP25:%.*]] = load ptr, ptr [[F_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP9]], align 4 |
| // SPIRV-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTATOMICTMP9]], align 4 |
| // SPIRV-NEXT: [[TMP27:%.*]] = atomicrmw nand ptr [[TMP25]], i32 [[TMP26]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP27]], ptr [[ATOMIC_TEMP10]], align 4 |
| // SPIRV-NEXT: [[TMP28:%.*]] = load i32, ptr [[ATOMIC_TEMP10]], align 4 |
| // SPIRV-NEXT: [[TMP29:%.*]] = load ptr, ptr [[F_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP28]], ptr [[TMP29]], align 4 |
| // SPIRV-NEXT: [[TMP30:%.*]] = load ptr, ptr [[G_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP11]], align 4 |
| // SPIRV-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTATOMICTMP11]], align 4 |
| // SPIRV-NEXT: [[TMP32:%.*]] = atomicrmw min ptr [[TMP30]], i32 [[TMP31]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP32]], ptr [[ATOMIC_TEMP12]], align 4 |
| // SPIRV-NEXT: [[TMP33:%.*]] = load i32, ptr [[ATOMIC_TEMP12]], align 4 |
| // SPIRV-NEXT: [[TMP34:%.*]] = load ptr, ptr [[G_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP33]], ptr [[TMP34]], align 4 |
| // SPIRV-NEXT: [[TMP35:%.*]] = load ptr, ptr [[H_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP13]], align 4 |
| // SPIRV-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTATOMICTMP13]], align 4 |
| // SPIRV-NEXT: [[TMP37:%.*]] = atomicrmw max ptr [[TMP35]], i32 [[TMP36]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP37]], ptr [[ATOMIC_TEMP14]], align 4 |
| // SPIRV-NEXT: [[TMP38:%.*]] = load i32, ptr [[ATOMIC_TEMP14]], align 4 |
| // SPIRV-NEXT: [[TMP39:%.*]] = load ptr, ptr [[H_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 |
| // SPIRV-NEXT: ret void |
| // |
| void fi3c(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { |
| *a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); |
| *b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); |
| *c = __scoped_atomic_fetch_and(c, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); |
| *d = __scoped_atomic_fetch_or(d, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); |
| *e = __scoped_atomic_fetch_xor(e, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); |
| *f = __scoped_atomic_fetch_nand(f, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); |
| *g = __scoped_atomic_fetch_min(g, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); |
| *h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden void @fi3_clustr( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef [[E:%.*]], ptr noundef [[F:%.*]], ptr noundef [[G:%.*]], ptr noundef [[H:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[H_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP7:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP9:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP10:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[G_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[G_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[H_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[H_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP6]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP7_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP7]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP8_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP8]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP9_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP9]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP10]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[F]], ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[G]], ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[H]], ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP1]] syncscope("cluster") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP7:%.*]] = atomicrmw sub ptr [[TMP5]], i32 [[TMP6]] syncscope("cluster") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP9:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP8]], ptr [[TMP9]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP12:%.*]] = atomicrmw and ptr [[TMP10]], i32 [[TMP11]] syncscope("cluster") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP12]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP13:%.*]] = load i32, ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP14:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP13]], ptr [[TMP14]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP15:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP17:%.*]] = atomicrmw or ptr [[TMP15]], i32 [[TMP16]] syncscope("cluster") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP17]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP19:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP18]], ptr [[TMP19]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP20:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP7_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTATOMICTMP7_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP22:%.*]] = atomicrmw xor ptr [[TMP20]], i32 [[TMP21]] syncscope("cluster") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP22]], ptr [[ATOMIC_TEMP8_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP23:%.*]] = load i32, ptr [[ATOMIC_TEMP8_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP24:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP25:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP9_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTATOMICTMP9_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP27:%.*]] = atomicrmw nand ptr [[TMP25]], i32 [[TMP26]] syncscope("cluster") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP27]], ptr [[ATOMIC_TEMP10_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP28:%.*]] = load i32, ptr [[ATOMIC_TEMP10_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP29:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP28]], ptr [[TMP29]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP30:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP11_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTATOMICTMP11_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP32:%.*]] = atomicrmw min ptr [[TMP30]], i32 [[TMP31]] syncscope("cluster") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP32]], ptr [[ATOMIC_TEMP12_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP33:%.*]] = load i32, ptr [[ATOMIC_TEMP12_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP34:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP33]], ptr [[TMP34]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP35:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP13_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTATOMICTMP13_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP37:%.*]] = atomicrmw max ptr [[TMP35]], i32 [[TMP36]] syncscope("cluster") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP37]], ptr [[ATOMIC_TEMP14_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP38:%.*]] = load i32, ptr [[ATOMIC_TEMP14_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP39:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 |
| // AMDGCN_CL_DEF-NEXT: ret void |
| // |
| // AMDGCN_CL_20-LABEL: define hidden void @fi3_clustr( |
| // AMDGCN_CL_20-SAME: ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef [[E:%.*]], ptr noundef [[F:%.*]], ptr noundef [[G:%.*]], ptr noundef [[H:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[H_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP7:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP9:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP10:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[G_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[G_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[H_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[H_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP6]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP7_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP7]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP8_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP8]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP9_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP9]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP10]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[F]], ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[G]], ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[H]], ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP1]] syncscope("cluster") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP7:%.*]] = atomicrmw sub ptr [[TMP5]], i32 [[TMP6]] syncscope("cluster") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP9:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP8]], ptr [[TMP9]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP12:%.*]] = atomicrmw and ptr [[TMP10]], i32 [[TMP11]] syncscope("cluster") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP12]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP13:%.*]] = load i32, ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP14:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP13]], ptr [[TMP14]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP15:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP17:%.*]] = atomicrmw or ptr [[TMP15]], i32 [[TMP16]] syncscope("cluster") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP17]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP19:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP18]], ptr [[TMP19]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP20:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP7_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTATOMICTMP7_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP22:%.*]] = atomicrmw xor ptr [[TMP20]], i32 [[TMP21]] syncscope("cluster") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP22]], ptr [[ATOMIC_TEMP8_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP23:%.*]] = load i32, ptr [[ATOMIC_TEMP8_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP24:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP25:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP9_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTATOMICTMP9_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP27:%.*]] = atomicrmw nand ptr [[TMP25]], i32 [[TMP26]] syncscope("cluster") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP27]], ptr [[ATOMIC_TEMP10_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP28:%.*]] = load i32, ptr [[ATOMIC_TEMP10_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP29:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP28]], ptr [[TMP29]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP30:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP11_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTATOMICTMP11_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP32:%.*]] = atomicrmw min ptr [[TMP30]], i32 [[TMP31]] syncscope("cluster") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP32]], ptr [[ATOMIC_TEMP12_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP33:%.*]] = load i32, ptr [[ATOMIC_TEMP12_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP34:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP33]], ptr [[TMP34]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP35:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP13_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTATOMICTMP13_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP37:%.*]] = atomicrmw max ptr [[TMP35]], i32 [[TMP36]] syncscope("cluster") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP37]], ptr [[ATOMIC_TEMP14_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP38:%.*]] = load i32, ptr [[ATOMIC_TEMP14_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP39:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 |
| // AMDGCN_CL_20-NEXT: ret void |
| // |
| // SPIRV-LABEL: define hidden spir_func void @fi3_clustr( |
| // SPIRV-SAME: ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef [[E:%.*]], ptr noundef [[F:%.*]], ptr noundef [[G:%.*]], ptr noundef [[H:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[H_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP7:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP9:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP10:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[F]], ptr [[F_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[H]], ptr [[H_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP2:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP1]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 |
| // SPIRV-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP1]], align 4 |
| // SPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1]], align 4 |
| // SPIRV-NEXT: [[TMP7:%.*]] = atomicrmw sub ptr [[TMP5]], i32 [[TMP6]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP2]], align 4 |
| // SPIRV-NEXT: [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2]], align 4 |
| // SPIRV-NEXT: [[TMP9:%.*]] = load ptr, ptr [[B_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP8]], ptr [[TMP9]], align 4 |
| // SPIRV-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP3]], align 4 |
| // SPIRV-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP3]], align 4 |
| // SPIRV-NEXT: [[TMP12:%.*]] = atomicrmw and ptr [[TMP10]], i32 [[TMP11]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP12]], ptr [[ATOMIC_TEMP4]], align 4 |
| // SPIRV-NEXT: [[TMP13:%.*]] = load i32, ptr [[ATOMIC_TEMP4]], align 4 |
| // SPIRV-NEXT: [[TMP14:%.*]] = load ptr, ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP13]], ptr [[TMP14]], align 4 |
| // SPIRV-NEXT: [[TMP15:%.*]] = load ptr, ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP5]], align 4 |
| // SPIRV-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTATOMICTMP5]], align 4 |
| // SPIRV-NEXT: [[TMP17:%.*]] = atomicrmw or ptr [[TMP15]], i32 [[TMP16]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP17]], ptr [[ATOMIC_TEMP6]], align 4 |
| // SPIRV-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP6]], align 4 |
| // SPIRV-NEXT: [[TMP19:%.*]] = load ptr, ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP18]], ptr [[TMP19]], align 4 |
| // SPIRV-NEXT: [[TMP20:%.*]] = load ptr, ptr [[E_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP7]], align 4 |
| // SPIRV-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTATOMICTMP7]], align 4 |
| // SPIRV-NEXT: [[TMP22:%.*]] = atomicrmw xor ptr [[TMP20]], i32 [[TMP21]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP22]], ptr [[ATOMIC_TEMP8]], align 4 |
| // SPIRV-NEXT: [[TMP23:%.*]] = load i32, ptr [[ATOMIC_TEMP8]], align 4 |
| // SPIRV-NEXT: [[TMP24:%.*]] = load ptr, ptr [[E_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4 |
| // SPIRV-NEXT: [[TMP25:%.*]] = load ptr, ptr [[F_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP9]], align 4 |
| // SPIRV-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTATOMICTMP9]], align 4 |
| // SPIRV-NEXT: [[TMP27:%.*]] = atomicrmw nand ptr [[TMP25]], i32 [[TMP26]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP27]], ptr [[ATOMIC_TEMP10]], align 4 |
| // SPIRV-NEXT: [[TMP28:%.*]] = load i32, ptr [[ATOMIC_TEMP10]], align 4 |
| // SPIRV-NEXT: [[TMP29:%.*]] = load ptr, ptr [[F_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP28]], ptr [[TMP29]], align 4 |
| // SPIRV-NEXT: [[TMP30:%.*]] = load ptr, ptr [[G_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP11]], align 4 |
| // SPIRV-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTATOMICTMP11]], align 4 |
| // SPIRV-NEXT: [[TMP32:%.*]] = atomicrmw min ptr [[TMP30]], i32 [[TMP31]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP32]], ptr [[ATOMIC_TEMP12]], align 4 |
| // SPIRV-NEXT: [[TMP33:%.*]] = load i32, ptr [[ATOMIC_TEMP12]], align 4 |
| // SPIRV-NEXT: [[TMP34:%.*]] = load ptr, ptr [[G_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP33]], ptr [[TMP34]], align 4 |
| // SPIRV-NEXT: [[TMP35:%.*]] = load ptr, ptr [[H_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP13]], align 4 |
| // SPIRV-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTATOMICTMP13]], align 4 |
| // SPIRV-NEXT: [[TMP37:%.*]] = atomicrmw max ptr [[TMP35]], i32 [[TMP36]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP37]], ptr [[ATOMIC_TEMP14]], align 4 |
| // SPIRV-NEXT: [[TMP38:%.*]] = load i32, ptr [[ATOMIC_TEMP14]], align 4 |
| // SPIRV-NEXT: [[TMP39:%.*]] = load ptr, ptr [[H_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 |
| // SPIRV-NEXT: ret void |
| // |
| void fi3_clustr(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { |
| *a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_CLUSTR); |
| *b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_CLUSTR); |
| *c = __scoped_atomic_fetch_and(c, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_CLUSTR); |
| *d = __scoped_atomic_fetch_or(d, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_CLUSTR); |
| *e = __scoped_atomic_fetch_xor(e, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_CLUSTR); |
| *f = __scoped_atomic_fetch_nand(f, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_CLUSTR); |
| *g = __scoped_atomic_fetch_min(g, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_CLUSTR); |
| *h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_CLUSTR); |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden void @fi3d( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef [[E:%.*]], ptr noundef [[F:%.*]], ptr noundef [[G:%.*]], ptr noundef [[H:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[H_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP7:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP9:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP10:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[G_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[G_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[H_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[H_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP6]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP7_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP7]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP8_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP8]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP9_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP9]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP10]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[F]], ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[G]], ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[H]], ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP1]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP7:%.*]] = atomicrmw sub ptr [[TMP5]], i32 [[TMP6]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP9:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP8]], ptr [[TMP9]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP12:%.*]] = atomicrmw and ptr [[TMP10]], i32 [[TMP11]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP12]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP13:%.*]] = load i32, ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP14:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP13]], ptr [[TMP14]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP15:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP17:%.*]] = atomicrmw or ptr [[TMP15]], i32 [[TMP16]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP17]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP19:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP18]], ptr [[TMP19]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP20:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP7_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTATOMICTMP7_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP22:%.*]] = atomicrmw xor ptr [[TMP20]], i32 [[TMP21]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP22]], ptr [[ATOMIC_TEMP8_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP23:%.*]] = load i32, ptr [[ATOMIC_TEMP8_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP24:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP25:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP9_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTATOMICTMP9_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP27:%.*]] = atomicrmw nand ptr [[TMP25]], i32 [[TMP26]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP27]], ptr [[ATOMIC_TEMP10_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP28:%.*]] = load i32, ptr [[ATOMIC_TEMP10_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP29:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP28]], ptr [[TMP29]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP30:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP11_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTATOMICTMP11_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP32:%.*]] = atomicrmw min ptr [[TMP30]], i32 [[TMP31]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP32]], ptr [[ATOMIC_TEMP12_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP33:%.*]] = load i32, ptr [[ATOMIC_TEMP12_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP34:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP33]], ptr [[TMP34]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP35:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP13_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTATOMICTMP13_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP37:%.*]] = atomicrmw max ptr [[TMP35]], i32 [[TMP36]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP37]], ptr [[ATOMIC_TEMP14_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP38:%.*]] = load i32, ptr [[ATOMIC_TEMP14_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP39:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 |
| // AMDGCN_CL_DEF-NEXT: ret void |
| // |
| // AMDGCN_CL_20-LABEL: define hidden void @fi3d( |
| // AMDGCN_CL_20-SAME: ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef [[E:%.*]], ptr noundef [[F:%.*]], ptr noundef [[G:%.*]], ptr noundef [[H:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[H_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP7:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP9:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP10:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[G_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[G_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[H_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[H_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP6]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP7_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP7]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP8_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP8]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP9_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP9]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP10]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[F]], ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[G]], ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[H]], ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP1]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP7:%.*]] = atomicrmw sub ptr [[TMP5]], i32 [[TMP6]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP9:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP8]], ptr [[TMP9]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP12:%.*]] = atomicrmw and ptr [[TMP10]], i32 [[TMP11]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP12]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP13:%.*]] = load i32, ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP14:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP13]], ptr [[TMP14]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP15:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP17:%.*]] = atomicrmw or ptr [[TMP15]], i32 [[TMP16]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP17]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP19:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP18]], ptr [[TMP19]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP20:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP7_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTATOMICTMP7_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP22:%.*]] = atomicrmw xor ptr [[TMP20]], i32 [[TMP21]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP22]], ptr [[ATOMIC_TEMP8_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP23:%.*]] = load i32, ptr [[ATOMIC_TEMP8_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP24:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP25:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP9_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTATOMICTMP9_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP27:%.*]] = atomicrmw nand ptr [[TMP25]], i32 [[TMP26]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP27]], ptr [[ATOMIC_TEMP10_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP28:%.*]] = load i32, ptr [[ATOMIC_TEMP10_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP29:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP28]], ptr [[TMP29]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP30:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP11_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTATOMICTMP11_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP32:%.*]] = atomicrmw min ptr [[TMP30]], i32 [[TMP31]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP32]], ptr [[ATOMIC_TEMP12_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP33:%.*]] = load i32, ptr [[ATOMIC_TEMP12_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP34:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP33]], ptr [[TMP34]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP35:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP13_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTATOMICTMP13_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP37:%.*]] = atomicrmw max ptr [[TMP35]], i32 [[TMP36]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP37]], ptr [[ATOMIC_TEMP14_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP38:%.*]] = load i32, ptr [[ATOMIC_TEMP14_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP39:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 |
| // AMDGCN_CL_20-NEXT: ret void |
| // |
| // SPIRV-LABEL: define hidden spir_func void @fi3d( |
| // SPIRV-SAME: ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef [[E:%.*]], ptr noundef [[F:%.*]], ptr noundef [[G:%.*]], ptr noundef [[H:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[H_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP7:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP9:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP10:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[F]], ptr [[F_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[H]], ptr [[H_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP2:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP1]] syncscope("subgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 |
| // SPIRV-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP1]], align 4 |
| // SPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1]], align 4 |
| // SPIRV-NEXT: [[TMP7:%.*]] = atomicrmw sub ptr [[TMP5]], i32 [[TMP6]] syncscope("subgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP2]], align 4 |
| // SPIRV-NEXT: [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2]], align 4 |
| // SPIRV-NEXT: [[TMP9:%.*]] = load ptr, ptr [[B_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP8]], ptr [[TMP9]], align 4 |
| // SPIRV-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP3]], align 4 |
| // SPIRV-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP3]], align 4 |
| // SPIRV-NEXT: [[TMP12:%.*]] = atomicrmw and ptr [[TMP10]], i32 [[TMP11]] syncscope("subgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP12]], ptr [[ATOMIC_TEMP4]], align 4 |
| // SPIRV-NEXT: [[TMP13:%.*]] = load i32, ptr [[ATOMIC_TEMP4]], align 4 |
| // SPIRV-NEXT: [[TMP14:%.*]] = load ptr, ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP13]], ptr [[TMP14]], align 4 |
| // SPIRV-NEXT: [[TMP15:%.*]] = load ptr, ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP5]], align 4 |
| // SPIRV-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTATOMICTMP5]], align 4 |
| // SPIRV-NEXT: [[TMP17:%.*]] = atomicrmw or ptr [[TMP15]], i32 [[TMP16]] syncscope("subgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP17]], ptr [[ATOMIC_TEMP6]], align 4 |
| // SPIRV-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP6]], align 4 |
| // SPIRV-NEXT: [[TMP19:%.*]] = load ptr, ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP18]], ptr [[TMP19]], align 4 |
| // SPIRV-NEXT: [[TMP20:%.*]] = load ptr, ptr [[E_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP7]], align 4 |
| // SPIRV-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTATOMICTMP7]], align 4 |
| // SPIRV-NEXT: [[TMP22:%.*]] = atomicrmw xor ptr [[TMP20]], i32 [[TMP21]] syncscope("subgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP22]], ptr [[ATOMIC_TEMP8]], align 4 |
| // SPIRV-NEXT: [[TMP23:%.*]] = load i32, ptr [[ATOMIC_TEMP8]], align 4 |
| // SPIRV-NEXT: [[TMP24:%.*]] = load ptr, ptr [[E_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4 |
| // SPIRV-NEXT: [[TMP25:%.*]] = load ptr, ptr [[F_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP9]], align 4 |
| // SPIRV-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTATOMICTMP9]], align 4 |
| // SPIRV-NEXT: [[TMP27:%.*]] = atomicrmw nand ptr [[TMP25]], i32 [[TMP26]] syncscope("subgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP27]], ptr [[ATOMIC_TEMP10]], align 4 |
| // SPIRV-NEXT: [[TMP28:%.*]] = load i32, ptr [[ATOMIC_TEMP10]], align 4 |
| // SPIRV-NEXT: [[TMP29:%.*]] = load ptr, ptr [[F_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP28]], ptr [[TMP29]], align 4 |
| // SPIRV-NEXT: [[TMP30:%.*]] = load ptr, ptr [[G_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP11]], align 4 |
| // SPIRV-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTATOMICTMP11]], align 4 |
| // SPIRV-NEXT: [[TMP32:%.*]] = atomicrmw min ptr [[TMP30]], i32 [[TMP31]] syncscope("subgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP32]], ptr [[ATOMIC_TEMP12]], align 4 |
| // SPIRV-NEXT: [[TMP33:%.*]] = load i32, ptr [[ATOMIC_TEMP12]], align 4 |
| // SPIRV-NEXT: [[TMP34:%.*]] = load ptr, ptr [[G_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP33]], ptr [[TMP34]], align 4 |
| // SPIRV-NEXT: [[TMP35:%.*]] = load ptr, ptr [[H_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP13]], align 4 |
| // SPIRV-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTATOMICTMP13]], align 4 |
| // SPIRV-NEXT: [[TMP37:%.*]] = atomicrmw max ptr [[TMP35]], i32 [[TMP36]] syncscope("subgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP37]], ptr [[ATOMIC_TEMP14]], align 4 |
| // SPIRV-NEXT: [[TMP38:%.*]] = load i32, ptr [[ATOMIC_TEMP14]], align 4 |
| // SPIRV-NEXT: [[TMP39:%.*]] = load ptr, ptr [[H_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 |
| // SPIRV-NEXT: ret void |
| // |
| void fi3d(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { |
| *a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); |
| *b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); |
| *c = __scoped_atomic_fetch_and(c, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); |
| *d = __scoped_atomic_fetch_or(d, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); |
| *e = __scoped_atomic_fetch_xor(e, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); |
| *f = __scoped_atomic_fetch_nand(f, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); |
| *g = __scoped_atomic_fetch_min(g, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); |
| *h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden void @fi3e( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef [[E:%.*]], ptr noundef [[F:%.*]], ptr noundef [[G:%.*]], ptr noundef [[H:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[H_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP7:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP9:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP10:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[G_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[G_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[H_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[H_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP6]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP7_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP7]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP8_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP8]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP9_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP9]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP10]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[F]], ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[G]], ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[H]], ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP1]] syncscope("singlethread") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP7:%.*]] = atomicrmw sub ptr [[TMP5]], i32 [[TMP6]] syncscope("singlethread") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP9:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP8]], ptr [[TMP9]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP12:%.*]] = atomicrmw and ptr [[TMP10]], i32 [[TMP11]] syncscope("singlethread") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP12]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP13:%.*]] = load i32, ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP14:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP13]], ptr [[TMP14]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP15:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP17:%.*]] = atomicrmw or ptr [[TMP15]], i32 [[TMP16]] syncscope("singlethread") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP17]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP19:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP18]], ptr [[TMP19]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP20:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP7_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTATOMICTMP7_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP22:%.*]] = atomicrmw xor ptr [[TMP20]], i32 [[TMP21]] syncscope("singlethread") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP22]], ptr [[ATOMIC_TEMP8_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP23:%.*]] = load i32, ptr [[ATOMIC_TEMP8_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP24:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP25:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP9_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTATOMICTMP9_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP27:%.*]] = atomicrmw nand ptr [[TMP25]], i32 [[TMP26]] syncscope("singlethread") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP27]], ptr [[ATOMIC_TEMP10_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP28:%.*]] = load i32, ptr [[ATOMIC_TEMP10_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP29:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP28]], ptr [[TMP29]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP30:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP11_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTATOMICTMP11_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP32:%.*]] = atomicrmw min ptr [[TMP30]], i32 [[TMP31]] syncscope("singlethread") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP32]], ptr [[ATOMIC_TEMP12_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP33:%.*]] = load i32, ptr [[ATOMIC_TEMP12_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP34:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP33]], ptr [[TMP34]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP35:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP13_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTATOMICTMP13_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP37:%.*]] = atomicrmw max ptr [[TMP35]], i32 [[TMP36]] syncscope("singlethread") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP37]], ptr [[ATOMIC_TEMP14_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP38:%.*]] = load i32, ptr [[ATOMIC_TEMP14_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP39:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 |
| // AMDGCN_CL_DEF-NEXT: ret void |
| // |
| // AMDGCN_CL_20-LABEL: define hidden void @fi3e( |
| // AMDGCN_CL_20-SAME: ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef [[E:%.*]], ptr noundef [[F:%.*]], ptr noundef [[G:%.*]], ptr noundef [[H:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[H_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP7:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP9:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP10:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[E_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[E_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[F_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[G_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[G_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[H_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[H_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP1]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP2]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP3]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP4]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP5_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP5]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP6_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP6]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP7_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP7]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP8_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP8]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP9_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP9]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP10_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP10]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP11_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP11]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP12_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP12]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP13_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP13]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP14_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP14]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[A]], ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[B]], ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[E]], ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[F]], ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[G]], ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[H]], ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP1]] syncscope("singlethread") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP7:%.*]] = atomicrmw sub ptr [[TMP5]], i32 [[TMP6]] syncscope("singlethread") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP9:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP8]], ptr [[TMP9]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP3_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP12:%.*]] = atomicrmw and ptr [[TMP10]], i32 [[TMP11]] syncscope("singlethread") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP12]], ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP13:%.*]] = load i32, ptr [[ATOMIC_TEMP4_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP14:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP13]], ptr [[TMP14]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP15:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTATOMICTMP5_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP17:%.*]] = atomicrmw or ptr [[TMP15]], i32 [[TMP16]] syncscope("singlethread") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP17]], ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP6_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP19:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP18]], ptr [[TMP19]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP20:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP7_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTATOMICTMP7_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP22:%.*]] = atomicrmw xor ptr [[TMP20]], i32 [[TMP21]] syncscope("singlethread") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP22]], ptr [[ATOMIC_TEMP8_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP23:%.*]] = load i32, ptr [[ATOMIC_TEMP8_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP24:%.*]] = load ptr, ptr [[E_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP25:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP9_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTATOMICTMP9_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP27:%.*]] = atomicrmw nand ptr [[TMP25]], i32 [[TMP26]] syncscope("singlethread") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP27]], ptr [[ATOMIC_TEMP10_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP28:%.*]] = load i32, ptr [[ATOMIC_TEMP10_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP29:%.*]] = load ptr, ptr [[F_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP28]], ptr [[TMP29]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP30:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP11_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTATOMICTMP11_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP32:%.*]] = atomicrmw min ptr [[TMP30]], i32 [[TMP31]] syncscope("singlethread") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP32]], ptr [[ATOMIC_TEMP12_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP33:%.*]] = load i32, ptr [[ATOMIC_TEMP12_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP34:%.*]] = load ptr, ptr [[G_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP33]], ptr [[TMP34]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP35:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP13_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTATOMICTMP13_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP37:%.*]] = atomicrmw max ptr [[TMP35]], i32 [[TMP36]] syncscope("singlethread") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP37]], ptr [[ATOMIC_TEMP14_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP38:%.*]] = load i32, ptr [[ATOMIC_TEMP14_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP39:%.*]] = load ptr, ptr [[H_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 |
| // AMDGCN_CL_20-NEXT: ret void |
| // |
| // SPIRV-LABEL: define hidden spir_func void @fi3e( |
| // SPIRV-SAME: ptr noundef [[A:%.*]], ptr noundef [[B:%.*]], ptr noundef [[C:%.*]], ptr noundef [[D:%.*]], ptr noundef [[E:%.*]], ptr noundef [[F:%.*]], ptr noundef [[G:%.*]], ptr noundef [[H:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[F_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[H_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP1:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP3:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP4:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP5:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP6:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP7:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP8:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP9:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP10:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP11:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP12:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP13:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[ATOMIC_TEMP14:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[F]], ptr [[F_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[H]], ptr [[H_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP2:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP1]] syncscope("singlethread") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP2]], ptr [[ATOMIC_TEMP]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP3]], ptr [[TMP4]], align 4 |
| // SPIRV-NEXT: [[TMP5:%.*]] = load ptr, ptr [[B_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP1]], align 4 |
| // SPIRV-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTATOMICTMP1]], align 4 |
| // SPIRV-NEXT: [[TMP7:%.*]] = atomicrmw sub ptr [[TMP5]], i32 [[TMP6]] syncscope("singlethread") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP7]], ptr [[ATOMIC_TEMP2]], align 4 |
| // SPIRV-NEXT: [[TMP8:%.*]] = load i32, ptr [[ATOMIC_TEMP2]], align 4 |
| // SPIRV-NEXT: [[TMP9:%.*]] = load ptr, ptr [[B_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP8]], ptr [[TMP9]], align 4 |
| // SPIRV-NEXT: [[TMP10:%.*]] = load ptr, ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP3]], align 4 |
| // SPIRV-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTATOMICTMP3]], align 4 |
| // SPIRV-NEXT: [[TMP12:%.*]] = atomicrmw and ptr [[TMP10]], i32 [[TMP11]] syncscope("singlethread") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP12]], ptr [[ATOMIC_TEMP4]], align 4 |
| // SPIRV-NEXT: [[TMP13:%.*]] = load i32, ptr [[ATOMIC_TEMP4]], align 4 |
| // SPIRV-NEXT: [[TMP14:%.*]] = load ptr, ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP13]], ptr [[TMP14]], align 4 |
| // SPIRV-NEXT: [[TMP15:%.*]] = load ptr, ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP5]], align 4 |
| // SPIRV-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTATOMICTMP5]], align 4 |
| // SPIRV-NEXT: [[TMP17:%.*]] = atomicrmw or ptr [[TMP15]], i32 [[TMP16]] syncscope("singlethread") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP17]], ptr [[ATOMIC_TEMP6]], align 4 |
| // SPIRV-NEXT: [[TMP18:%.*]] = load i32, ptr [[ATOMIC_TEMP6]], align 4 |
| // SPIRV-NEXT: [[TMP19:%.*]] = load ptr, ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP18]], ptr [[TMP19]], align 4 |
| // SPIRV-NEXT: [[TMP20:%.*]] = load ptr, ptr [[E_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP7]], align 4 |
| // SPIRV-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTATOMICTMP7]], align 4 |
| // SPIRV-NEXT: [[TMP22:%.*]] = atomicrmw xor ptr [[TMP20]], i32 [[TMP21]] syncscope("singlethread") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP22]], ptr [[ATOMIC_TEMP8]], align 4 |
| // SPIRV-NEXT: [[TMP23:%.*]] = load i32, ptr [[ATOMIC_TEMP8]], align 4 |
| // SPIRV-NEXT: [[TMP24:%.*]] = load ptr, ptr [[E_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4 |
| // SPIRV-NEXT: [[TMP25:%.*]] = load ptr, ptr [[F_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP9]], align 4 |
| // SPIRV-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTATOMICTMP9]], align 4 |
| // SPIRV-NEXT: [[TMP27:%.*]] = atomicrmw nand ptr [[TMP25]], i32 [[TMP26]] syncscope("singlethread") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP27]], ptr [[ATOMIC_TEMP10]], align 4 |
| // SPIRV-NEXT: [[TMP28:%.*]] = load i32, ptr [[ATOMIC_TEMP10]], align 4 |
| // SPIRV-NEXT: [[TMP29:%.*]] = load ptr, ptr [[F_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP28]], ptr [[TMP29]], align 4 |
| // SPIRV-NEXT: [[TMP30:%.*]] = load ptr, ptr [[G_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP11]], align 4 |
| // SPIRV-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTATOMICTMP11]], align 4 |
| // SPIRV-NEXT: [[TMP32:%.*]] = atomicrmw min ptr [[TMP30]], i32 [[TMP31]] syncscope("singlethread") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP32]], ptr [[ATOMIC_TEMP12]], align 4 |
| // SPIRV-NEXT: [[TMP33:%.*]] = load i32, ptr [[ATOMIC_TEMP12]], align 4 |
| // SPIRV-NEXT: [[TMP34:%.*]] = load ptr, ptr [[G_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP33]], ptr [[TMP34]], align 4 |
| // SPIRV-NEXT: [[TMP35:%.*]] = load ptr, ptr [[H_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP13]], align 4 |
| // SPIRV-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTATOMICTMP13]], align 4 |
| // SPIRV-NEXT: [[TMP37:%.*]] = atomicrmw max ptr [[TMP35]], i32 [[TMP36]] syncscope("singlethread") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP37]], ptr [[ATOMIC_TEMP14]], align 4 |
| // SPIRV-NEXT: [[TMP38:%.*]] = load i32, ptr [[ATOMIC_TEMP14]], align 4 |
| // SPIRV-NEXT: [[TMP39:%.*]] = load ptr, ptr [[H_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 [[TMP38]], ptr [[TMP39]], align 4 |
| // SPIRV-NEXT: ret void |
| // |
| void fi3e(int *a, int *b, int *c, int *d, int *e, int *f, int *g, int *h) { |
| *a = __scoped_atomic_fetch_add(a, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); |
| *b = __scoped_atomic_fetch_sub(b, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); |
| *c = __scoped_atomic_fetch_and(c, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); |
| *d = __scoped_atomic_fetch_or(d, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); |
| *e = __scoped_atomic_fetch_xor(e, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); |
| *f = __scoped_atomic_fetch_nand(f, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); |
| *g = __scoped_atomic_fetch_min(g, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); |
| *h = __scoped_atomic_fetch_max(h, 1, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi4a( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DESIRED:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DESIRED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 0, ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DESIRED_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = load i32, ptr [[DESIRED_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] acquire acquire, align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // AMDGCN_CL_DEF-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // AMDGCN_CL_DEF: [[CMPXCHG_STORE_EXPECTED]]: |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP4]], ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // AMDGCN_CL_DEF: [[CMPXCHG_CONTINUE]]: |
| // AMDGCN_CL_DEF-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // AMDGCN_CL_DEF-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // AMDGCN_CL_DEF-NEXT: ret i1 [[LOADEDV]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi4a( |
| // AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DESIRED:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 0, ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr addrspace(5) [[DESIRED]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED]] to ptr |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP2]], i32 [[TMP3]] acquire acquire, align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 |
| // AMDGCN_CL_20-NEXT: [[TMP6:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 |
| // AMDGCN_CL_20-NEXT: br i1 [[TMP6]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // AMDGCN_CL_20: [[CMPXCHG_STORE_EXPECTED]]: |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP5]], ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // AMDGCN_CL_20: [[CMPXCHG_CONTINUE]]: |
| // AMDGCN_CL_20-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP6]] to i8 |
| // AMDGCN_CL_20-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP7:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP7]] to i1 |
| // AMDGCN_CL_20-NEXT: ret i1 [[LOADEDV]] |
| // |
| // SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4a( |
| // SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[CMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DESIRED:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1 |
| // SPIRV-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 0, ptr [[CMP]], align 4 |
| // SPIRV-NEXT: store i32 1, ptr [[DESIRED]], align 4 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP]], align 4 |
| // SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr [[DESIRED]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] acquire acquire, align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // SPIRV-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // SPIRV-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // SPIRV: [[CMPXCHG_STORE_EXPECTED]]: |
| // SPIRV-NEXT: store i32 [[TMP4]], ptr [[CMP]], align 4 |
| // SPIRV-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // SPIRV: [[CMPXCHG_CONTINUE]]: |
| // SPIRV-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // SPIRV-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1 |
| // SPIRV-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1 |
| // SPIRV-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // SPIRV-NEXT: ret i1 [[LOADEDV]] |
| // |
| _Bool fi4a(int *i) { |
| int cmp = 0; |
| int desired = 1; |
| return __scoped_atomic_compare_exchange(i, &cmp, &desired, 0, |
| __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, |
| __MEMORY_SCOPE_SYSTEM); |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi4b( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DESIRED:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DESIRED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 0, ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DESIRED_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = load i32, ptr [[DESIRED_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("agent") acquire acquire, align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // AMDGCN_CL_DEF-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // AMDGCN_CL_DEF: [[CMPXCHG_STORE_EXPECTED]]: |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP4]], ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // AMDGCN_CL_DEF: [[CMPXCHG_CONTINUE]]: |
| // AMDGCN_CL_DEF-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // AMDGCN_CL_DEF-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // AMDGCN_CL_DEF-NEXT: ret i1 [[LOADEDV]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi4b( |
| // AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DESIRED:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 0, ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr addrspace(5) [[DESIRED]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED]] to ptr |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP2]], i32 [[TMP3]] syncscope("agent") acquire acquire, align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 |
| // AMDGCN_CL_20-NEXT: [[TMP6:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 |
| // AMDGCN_CL_20-NEXT: br i1 [[TMP6]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // AMDGCN_CL_20: [[CMPXCHG_STORE_EXPECTED]]: |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP5]], ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // AMDGCN_CL_20: [[CMPXCHG_CONTINUE]]: |
| // AMDGCN_CL_20-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP6]] to i8 |
| // AMDGCN_CL_20-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP7:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP7]] to i1 |
| // AMDGCN_CL_20-NEXT: ret i1 [[LOADEDV]] |
| // |
| // SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4b( |
| // SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[CMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DESIRED:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1 |
| // SPIRV-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 0, ptr [[CMP]], align 4 |
| // SPIRV-NEXT: store i32 1, ptr [[DESIRED]], align 4 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP]], align 4 |
| // SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr [[DESIRED]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("device") acquire acquire, align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // SPIRV-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // SPIRV-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // SPIRV: [[CMPXCHG_STORE_EXPECTED]]: |
| // SPIRV-NEXT: store i32 [[TMP4]], ptr [[CMP]], align 4 |
| // SPIRV-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // SPIRV: [[CMPXCHG_CONTINUE]]: |
| // SPIRV-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // SPIRV-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1 |
| // SPIRV-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1 |
| // SPIRV-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // SPIRV-NEXT: ret i1 [[LOADEDV]] |
| // |
| _Bool fi4b(int *i) { |
| int cmp = 0; |
| int desired = 1; |
| return __scoped_atomic_compare_exchange(i, &cmp, &desired, 0, |
| __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, |
| __MEMORY_SCOPE_DEVICE); |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi4c( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DESIRED:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DESIRED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 0, ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DESIRED_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = load i32, ptr [[DESIRED_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("workgroup") acquire acquire, align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // AMDGCN_CL_DEF-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // AMDGCN_CL_DEF: [[CMPXCHG_STORE_EXPECTED]]: |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP4]], ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // AMDGCN_CL_DEF: [[CMPXCHG_CONTINUE]]: |
| // AMDGCN_CL_DEF-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // AMDGCN_CL_DEF-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // AMDGCN_CL_DEF-NEXT: ret i1 [[LOADEDV]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi4c( |
| // AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DESIRED:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 0, ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr addrspace(5) [[DESIRED]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED]] to ptr |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP2]], i32 [[TMP3]] syncscope("workgroup") acquire acquire, align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 |
| // AMDGCN_CL_20-NEXT: [[TMP6:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 |
| // AMDGCN_CL_20-NEXT: br i1 [[TMP6]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // AMDGCN_CL_20: [[CMPXCHG_STORE_EXPECTED]]: |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP5]], ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // AMDGCN_CL_20: [[CMPXCHG_CONTINUE]]: |
| // AMDGCN_CL_20-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP6]] to i8 |
| // AMDGCN_CL_20-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP7:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP7]] to i1 |
| // AMDGCN_CL_20-NEXT: ret i1 [[LOADEDV]] |
| // |
| // SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4c( |
| // SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[CMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DESIRED:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1 |
| // SPIRV-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 0, ptr [[CMP]], align 4 |
| // SPIRV-NEXT: store i32 1, ptr [[DESIRED]], align 4 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP]], align 4 |
| // SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr [[DESIRED]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("workgroup") acquire acquire, align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // SPIRV-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // SPIRV-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // SPIRV: [[CMPXCHG_STORE_EXPECTED]]: |
| // SPIRV-NEXT: store i32 [[TMP4]], ptr [[CMP]], align 4 |
| // SPIRV-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // SPIRV: [[CMPXCHG_CONTINUE]]: |
| // SPIRV-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // SPIRV-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1 |
| // SPIRV-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1 |
| // SPIRV-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // SPIRV-NEXT: ret i1 [[LOADEDV]] |
| // |
| _Bool fi4c(int *i) { |
| int cmp = 0; |
| int desired = 1; |
| return __scoped_atomic_compare_exchange(i, &cmp, &desired, 0, |
| __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, |
| __MEMORY_SCOPE_WRKGRP); |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi4_clustr( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DESIRED:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DESIRED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 0, ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DESIRED_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = load i32, ptr [[DESIRED_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("cluster") acquire acquire, align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // AMDGCN_CL_DEF-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // AMDGCN_CL_DEF: [[CMPXCHG_STORE_EXPECTED]]: |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP4]], ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // AMDGCN_CL_DEF: [[CMPXCHG_CONTINUE]]: |
| // AMDGCN_CL_DEF-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // AMDGCN_CL_DEF-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // AMDGCN_CL_DEF-NEXT: ret i1 [[LOADEDV]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi4_clustr( |
| // AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DESIRED:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 0, ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr addrspace(5) [[DESIRED]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED]] to ptr |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP2]], i32 [[TMP3]] syncscope("cluster") acquire acquire, align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 |
| // AMDGCN_CL_20-NEXT: [[TMP6:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 |
| // AMDGCN_CL_20-NEXT: br i1 [[TMP6]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // AMDGCN_CL_20: [[CMPXCHG_STORE_EXPECTED]]: |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP5]], ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // AMDGCN_CL_20: [[CMPXCHG_CONTINUE]]: |
| // AMDGCN_CL_20-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP6]] to i8 |
| // AMDGCN_CL_20-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP7:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP7]] to i1 |
| // AMDGCN_CL_20-NEXT: ret i1 [[LOADEDV]] |
| // |
| // SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4_clustr( |
| // SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[CMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DESIRED:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1 |
| // SPIRV-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 0, ptr [[CMP]], align 4 |
| // SPIRV-NEXT: store i32 1, ptr [[DESIRED]], align 4 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP]], align 4 |
| // SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr [[DESIRED]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("workgroup") acquire acquire, align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // SPIRV-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // SPIRV-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // SPIRV: [[CMPXCHG_STORE_EXPECTED]]: |
| // SPIRV-NEXT: store i32 [[TMP4]], ptr [[CMP]], align 4 |
| // SPIRV-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // SPIRV: [[CMPXCHG_CONTINUE]]: |
| // SPIRV-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // SPIRV-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1 |
| // SPIRV-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1 |
| // SPIRV-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // SPIRV-NEXT: ret i1 [[LOADEDV]] |
| // |
| _Bool fi4_clustr(int *i) { |
| int cmp = 0; |
| int desired = 1; |
| return __scoped_atomic_compare_exchange(i, &cmp, &desired, 0, |
| __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, |
| __MEMORY_SCOPE_CLUSTR); |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi4d( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DESIRED:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DESIRED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 0, ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DESIRED_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = load i32, ptr [[DESIRED_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("wavefront") acquire acquire, align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // AMDGCN_CL_DEF-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // AMDGCN_CL_DEF: [[CMPXCHG_STORE_EXPECTED]]: |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP4]], ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // AMDGCN_CL_DEF: [[CMPXCHG_CONTINUE]]: |
| // AMDGCN_CL_DEF-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // AMDGCN_CL_DEF-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // AMDGCN_CL_DEF-NEXT: ret i1 [[LOADEDV]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi4d( |
| // AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DESIRED:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 0, ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr addrspace(5) [[DESIRED]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED]] to ptr |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP2]], i32 [[TMP3]] syncscope("wavefront") acquire acquire, align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 |
| // AMDGCN_CL_20-NEXT: [[TMP6:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 |
| // AMDGCN_CL_20-NEXT: br i1 [[TMP6]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // AMDGCN_CL_20: [[CMPXCHG_STORE_EXPECTED]]: |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP5]], ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // AMDGCN_CL_20: [[CMPXCHG_CONTINUE]]: |
| // AMDGCN_CL_20-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP6]] to i8 |
| // AMDGCN_CL_20-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP7:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP7]] to i1 |
| // AMDGCN_CL_20-NEXT: ret i1 [[LOADEDV]] |
| // |
| // SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4d( |
| // SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[CMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DESIRED:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1 |
| // SPIRV-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 0, ptr [[CMP]], align 4 |
| // SPIRV-NEXT: store i32 1, ptr [[DESIRED]], align 4 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP]], align 4 |
| // SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr [[DESIRED]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("subgroup") acquire acquire, align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // SPIRV-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // SPIRV-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // SPIRV: [[CMPXCHG_STORE_EXPECTED]]: |
| // SPIRV-NEXT: store i32 [[TMP4]], ptr [[CMP]], align 4 |
| // SPIRV-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // SPIRV: [[CMPXCHG_CONTINUE]]: |
| // SPIRV-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // SPIRV-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1 |
| // SPIRV-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1 |
| // SPIRV-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // SPIRV-NEXT: ret i1 [[LOADEDV]] |
| // |
| _Bool fi4d(int *i) { |
| int cmp = 0; |
| int desired = 1; |
| return __scoped_atomic_compare_exchange(i, &cmp, &desired, 0, |
| __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, |
| __MEMORY_SCOPE_WVFRNT); |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi4e( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DESIRED:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DESIRED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 0, ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DESIRED_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = load i32, ptr [[DESIRED_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("singlethread") acquire acquire, align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // AMDGCN_CL_DEF-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // AMDGCN_CL_DEF: [[CMPXCHG_STORE_EXPECTED]]: |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP4]], ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // AMDGCN_CL_DEF: [[CMPXCHG_CONTINUE]]: |
| // AMDGCN_CL_DEF-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // AMDGCN_CL_DEF-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // AMDGCN_CL_DEF-NEXT: ret i1 [[LOADEDV]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi4e( |
| // AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DESIRED:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 0, ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr addrspace(5) [[DESIRED]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(5) [[DESIRED]] to ptr |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP2]], i32 [[TMP3]] syncscope("singlethread") acquire acquire, align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP4]], 0 |
| // AMDGCN_CL_20-NEXT: [[TMP6:%.*]] = extractvalue { i32, i1 } [[TMP4]], 1 |
| // AMDGCN_CL_20-NEXT: br i1 [[TMP6]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // AMDGCN_CL_20: [[CMPXCHG_STORE_EXPECTED]]: |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP5]], ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // AMDGCN_CL_20: [[CMPXCHG_CONTINUE]]: |
| // AMDGCN_CL_20-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP6]] to i8 |
| // AMDGCN_CL_20-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP7:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP7]] to i1 |
| // AMDGCN_CL_20-NEXT: ret i1 [[LOADEDV]] |
| // |
| // SPIRV-LABEL: define hidden spir_func zeroext i1 @fi4e( |
| // SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[CMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DESIRED:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1 |
| // SPIRV-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 0, ptr [[CMP]], align 4 |
| // SPIRV-NEXT: store i32 1, ptr [[DESIRED]], align 4 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP]], align 4 |
| // SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr [[DESIRED]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = cmpxchg ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("singlethread") acquire acquire, align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // SPIRV-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // SPIRV-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // SPIRV: [[CMPXCHG_STORE_EXPECTED]]: |
| // SPIRV-NEXT: store i32 [[TMP4]], ptr [[CMP]], align 4 |
| // SPIRV-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // SPIRV: [[CMPXCHG_CONTINUE]]: |
| // SPIRV-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // SPIRV-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1 |
| // SPIRV-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1 |
| // SPIRV-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // SPIRV-NEXT: ret i1 [[LOADEDV]] |
| // |
| _Bool fi4e(int *i) { |
| int cmp = 0; |
| int desired = 1; |
| return __scoped_atomic_compare_exchange(i, &cmp, &desired, 0, |
| __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, |
| __MEMORY_SCOPE_SINGLE); |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi5a( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 0, ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = cmpxchg weak ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] acquire acquire, align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // AMDGCN_CL_DEF-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // AMDGCN_CL_DEF: [[CMPXCHG_STORE_EXPECTED]]: |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP4]], ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // AMDGCN_CL_DEF: [[CMPXCHG_CONTINUE]]: |
| // AMDGCN_CL_DEF-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // AMDGCN_CL_DEF-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // AMDGCN_CL_DEF-NEXT: ret i1 [[LOADEDV]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi5a( |
| // AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 0, ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = cmpxchg weak ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] acquire acquire, align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // AMDGCN_CL_20-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // AMDGCN_CL_20: [[CMPXCHG_STORE_EXPECTED]]: |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP4]], ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // AMDGCN_CL_20: [[CMPXCHG_CONTINUE]]: |
| // AMDGCN_CL_20-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // AMDGCN_CL_20-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // AMDGCN_CL_20-NEXT: ret i1 [[LOADEDV]] |
| // |
| // SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5a( |
| // SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[CMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1 |
| // SPIRV-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 0, ptr [[CMP]], align 4 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP]], align 4 |
| // SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = cmpxchg weak ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] acquire acquire, align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // SPIRV-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // SPIRV-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // SPIRV: [[CMPXCHG_STORE_EXPECTED]]: |
| // SPIRV-NEXT: store i32 [[TMP4]], ptr [[CMP]], align 4 |
| // SPIRV-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // SPIRV: [[CMPXCHG_CONTINUE]]: |
| // SPIRV-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // SPIRV-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1 |
| // SPIRV-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1 |
| // SPIRV-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // SPIRV-NEXT: ret i1 [[LOADEDV]] |
| // |
| _Bool fi5a(int *i) { |
| int cmp = 0; |
| return __scoped_atomic_compare_exchange_n(i, &cmp, 1, 1, __ATOMIC_ACQUIRE, |
| __ATOMIC_ACQUIRE, |
| __MEMORY_SCOPE_SYSTEM); |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi5b( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 0, ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = cmpxchg weak ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("agent") acquire acquire, align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // AMDGCN_CL_DEF-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // AMDGCN_CL_DEF: [[CMPXCHG_STORE_EXPECTED]]: |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP4]], ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // AMDGCN_CL_DEF: [[CMPXCHG_CONTINUE]]: |
| // AMDGCN_CL_DEF-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // AMDGCN_CL_DEF-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // AMDGCN_CL_DEF-NEXT: ret i1 [[LOADEDV]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi5b( |
| // AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 0, ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = cmpxchg weak ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("agent") acquire acquire, align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // AMDGCN_CL_20-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // AMDGCN_CL_20: [[CMPXCHG_STORE_EXPECTED]]: |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP4]], ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // AMDGCN_CL_20: [[CMPXCHG_CONTINUE]]: |
| // AMDGCN_CL_20-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // AMDGCN_CL_20-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // AMDGCN_CL_20-NEXT: ret i1 [[LOADEDV]] |
| // |
| // SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5b( |
| // SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[CMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1 |
| // SPIRV-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 0, ptr [[CMP]], align 4 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP]], align 4 |
| // SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = cmpxchg weak ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("device") acquire acquire, align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // SPIRV-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // SPIRV-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // SPIRV: [[CMPXCHG_STORE_EXPECTED]]: |
| // SPIRV-NEXT: store i32 [[TMP4]], ptr [[CMP]], align 4 |
| // SPIRV-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // SPIRV: [[CMPXCHG_CONTINUE]]: |
| // SPIRV-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // SPIRV-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1 |
| // SPIRV-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1 |
| // SPIRV-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // SPIRV-NEXT: ret i1 [[LOADEDV]] |
| // |
| _Bool fi5b(int *i) { |
| int cmp = 0; |
| return __scoped_atomic_compare_exchange_n(i, &cmp, 1, 1, __ATOMIC_ACQUIRE, |
| __ATOMIC_ACQUIRE, |
| __MEMORY_SCOPE_DEVICE); |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi5c( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 0, ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = cmpxchg weak ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("workgroup") acquire acquire, align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // AMDGCN_CL_DEF-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // AMDGCN_CL_DEF: [[CMPXCHG_STORE_EXPECTED]]: |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP4]], ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // AMDGCN_CL_DEF: [[CMPXCHG_CONTINUE]]: |
| // AMDGCN_CL_DEF-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // AMDGCN_CL_DEF-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // AMDGCN_CL_DEF-NEXT: ret i1 [[LOADEDV]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi5c( |
| // AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 0, ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = cmpxchg weak ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("workgroup") acquire acquire, align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // AMDGCN_CL_20-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // AMDGCN_CL_20: [[CMPXCHG_STORE_EXPECTED]]: |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP4]], ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // AMDGCN_CL_20: [[CMPXCHG_CONTINUE]]: |
| // AMDGCN_CL_20-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // AMDGCN_CL_20-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // AMDGCN_CL_20-NEXT: ret i1 [[LOADEDV]] |
| // |
| // SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5c( |
| // SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[CMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1 |
| // SPIRV-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 0, ptr [[CMP]], align 4 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP]], align 4 |
| // SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = cmpxchg weak ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("workgroup") acquire acquire, align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // SPIRV-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // SPIRV-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // SPIRV: [[CMPXCHG_STORE_EXPECTED]]: |
| // SPIRV-NEXT: store i32 [[TMP4]], ptr [[CMP]], align 4 |
| // SPIRV-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // SPIRV: [[CMPXCHG_CONTINUE]]: |
| // SPIRV-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // SPIRV-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1 |
| // SPIRV-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1 |
| // SPIRV-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // SPIRV-NEXT: ret i1 [[LOADEDV]] |
| // |
| _Bool fi5c(int *i) { |
| int cmp = 0; |
| return __scoped_atomic_compare_exchange_n( |
| i, &cmp, 1, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_WRKGRP); |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi5_clustr( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 0, ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = cmpxchg weak ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("cluster") acquire acquire, align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // AMDGCN_CL_DEF-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // AMDGCN_CL_DEF: [[CMPXCHG_STORE_EXPECTED]]: |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP4]], ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // AMDGCN_CL_DEF: [[CMPXCHG_CONTINUE]]: |
| // AMDGCN_CL_DEF-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // AMDGCN_CL_DEF-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // AMDGCN_CL_DEF-NEXT: ret i1 [[LOADEDV]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi5_clustr( |
| // AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 0, ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = cmpxchg weak ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("cluster") acquire acquire, align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // AMDGCN_CL_20-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // AMDGCN_CL_20: [[CMPXCHG_STORE_EXPECTED]]: |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP4]], ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // AMDGCN_CL_20: [[CMPXCHG_CONTINUE]]: |
| // AMDGCN_CL_20-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // AMDGCN_CL_20-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // AMDGCN_CL_20-NEXT: ret i1 [[LOADEDV]] |
| // |
| // SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5_clustr( |
| // SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[CMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1 |
| // SPIRV-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 0, ptr [[CMP]], align 4 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP]], align 4 |
| // SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = cmpxchg weak ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("workgroup") acquire acquire, align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // SPIRV-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // SPIRV-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // SPIRV: [[CMPXCHG_STORE_EXPECTED]]: |
| // SPIRV-NEXT: store i32 [[TMP4]], ptr [[CMP]], align 4 |
| // SPIRV-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // SPIRV: [[CMPXCHG_CONTINUE]]: |
| // SPIRV-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // SPIRV-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1 |
| // SPIRV-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1 |
| // SPIRV-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // SPIRV-NEXT: ret i1 [[LOADEDV]] |
| // |
| _Bool fi5_clustr(int *i) { |
| int cmp = 0; |
| return __scoped_atomic_compare_exchange_n( |
| i, &cmp, 1, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_CLUSTR); |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi5d( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 0, ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = cmpxchg weak ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("wavefront") acquire acquire, align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // AMDGCN_CL_DEF-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // AMDGCN_CL_DEF: [[CMPXCHG_STORE_EXPECTED]]: |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP4]], ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // AMDGCN_CL_DEF: [[CMPXCHG_CONTINUE]]: |
| // AMDGCN_CL_DEF-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // AMDGCN_CL_DEF-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // AMDGCN_CL_DEF-NEXT: ret i1 [[LOADEDV]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi5d( |
| // AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 0, ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = cmpxchg weak ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("wavefront") acquire acquire, align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // AMDGCN_CL_20-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // AMDGCN_CL_20: [[CMPXCHG_STORE_EXPECTED]]: |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP4]], ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // AMDGCN_CL_20: [[CMPXCHG_CONTINUE]]: |
| // AMDGCN_CL_20-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // AMDGCN_CL_20-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // AMDGCN_CL_20-NEXT: ret i1 [[LOADEDV]] |
| // |
| // SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5d( |
| // SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[CMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1 |
| // SPIRV-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 0, ptr [[CMP]], align 4 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP]], align 4 |
| // SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = cmpxchg weak ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("subgroup") acquire acquire, align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // SPIRV-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // SPIRV-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // SPIRV: [[CMPXCHG_STORE_EXPECTED]]: |
| // SPIRV-NEXT: store i32 [[TMP4]], ptr [[CMP]], align 4 |
| // SPIRV-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // SPIRV: [[CMPXCHG_CONTINUE]]: |
| // SPIRV-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // SPIRV-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1 |
| // SPIRV-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1 |
| // SPIRV-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // SPIRV-NEXT: ret i1 [[LOADEDV]] |
| // |
| _Bool fi5d(int *i) { |
| int cmp = 0; |
| return __scoped_atomic_compare_exchange_n( |
| i, &cmp, 1, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_WVFRNT); |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi5e( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[CMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 0, ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = cmpxchg weak ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("singlethread") acquire acquire, align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // AMDGCN_CL_DEF-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // AMDGCN_CL_DEF-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // AMDGCN_CL_DEF: [[CMPXCHG_STORE_EXPECTED]]: |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP4]], ptr [[CMP_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // AMDGCN_CL_DEF: [[CMPXCHG_CONTINUE]]: |
| // AMDGCN_CL_DEF-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // AMDGCN_CL_DEF-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // AMDGCN_CL_DEF-NEXT: ret i1 [[LOADEDV]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi5e( |
| // AMDGCN_CL_20-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[CMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_20-NEXT: [[CMPXCHG_BOOL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CMPXCHG_BOOL]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[I]], ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 0, ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i32 1, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP_ASCAST]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = cmpxchg weak ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("singlethread") acquire acquire, align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // AMDGCN_CL_20-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // AMDGCN_CL_20: [[CMPXCHG_STORE_EXPECTED]]: |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP4]], ptr addrspace(5) [[CMP]], align 4 |
| // AMDGCN_CL_20-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // AMDGCN_CL_20: [[CMPXCHG_CONTINUE]]: |
| // AMDGCN_CL_20-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // AMDGCN_CL_20-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // AMDGCN_CL_20-NEXT: ret i1 [[LOADEDV]] |
| // |
| // SPIRV-LABEL: define hidden spir_func zeroext i1 @fi5e( |
| // SPIRV-SAME: ptr noundef [[I:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[CMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[DOTATOMICTMP:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: [[CMPXCHG_BOOL:%.*]] = alloca i8, align 1 |
| // SPIRV-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 0, ptr [[CMP]], align 4 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8 |
| // SPIRV-NEXT: store i32 1, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i32, ptr [[CMP]], align 4 |
| // SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTATOMICTMP]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = cmpxchg weak ptr [[TMP0]], i32 [[TMP1]], i32 [[TMP2]] syncscope("singlethread") acquire acquire, align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 |
| // SPIRV-NEXT: [[TMP5:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 |
| // SPIRV-NEXT: br i1 [[TMP5]], label %[[CMPXCHG_CONTINUE:.*]], label %[[CMPXCHG_STORE_EXPECTED:.*]] |
| // SPIRV: [[CMPXCHG_STORE_EXPECTED]]: |
| // SPIRV-NEXT: store i32 [[TMP4]], ptr [[CMP]], align 4 |
| // SPIRV-NEXT: br label %[[CMPXCHG_CONTINUE]] |
| // SPIRV: [[CMPXCHG_CONTINUE]]: |
| // SPIRV-NEXT: [[STOREDV:%.*]] = zext i1 [[TMP5]] to i8 |
| // SPIRV-NEXT: store i8 [[STOREDV]], ptr [[CMPXCHG_BOOL]], align 1 |
| // SPIRV-NEXT: [[TMP6:%.*]] = load i8, ptr [[CMPXCHG_BOOL]], align 1 |
| // SPIRV-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP6]] to i1 |
| // SPIRV-NEXT: ret i1 [[LOADEDV]] |
| // |
| _Bool fi5e(int *i) { |
| int cmp = 0; |
| return __scoped_atomic_compare_exchange_n( |
| i, &cmp, 1, 1, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE, __MEMORY_SCOPE_SINGLE); |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden i32 @fi6a( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RET:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[RET_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RET]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = atomicrmw xchg ptr [[TMP0]], i32 [[TMP2]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP3]], ptr [[RET_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = load i32, ptr [[RET_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: ret i32 [[TMP4]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden i32 @fi6a( |
| // AMDGCN_CL_20-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RET:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[RET]] to ptr |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = atomicrmw xchg ptr [[TMP0]], i32 [[TMP3]] monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP4]], ptr [[TMP2]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(5) [[RET]], align 4 |
| // AMDGCN_CL_20-NEXT: ret i32 [[TMP5]] |
| // |
| // SPIRV-LABEL: define hidden spir_func i32 @fi6a( |
| // SPIRV-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[RET:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = atomicrmw xchg ptr [[TMP0]], i32 [[TMP2]] monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP3]], ptr [[RET]], align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = load i32, ptr [[RET]], align 4 |
| // SPIRV-NEXT: ret i32 [[TMP4]] |
| // |
| int fi6a(int *c, int *d) { |
| int ret; |
| __scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); |
| return ret; |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden i32 @fi6b( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RET:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[RET_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RET]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = atomicrmw xchg ptr [[TMP0]], i32 [[TMP2]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP3]], ptr [[RET_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = load i32, ptr [[RET_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: ret i32 [[TMP4]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden i32 @fi6b( |
| // AMDGCN_CL_20-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RET:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[RET]] to ptr |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = atomicrmw xchg ptr [[TMP0]], i32 [[TMP3]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP4]], ptr [[TMP2]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(5) [[RET]], align 4 |
| // AMDGCN_CL_20-NEXT: ret i32 [[TMP5]] |
| // |
| // SPIRV-LABEL: define hidden spir_func i32 @fi6b( |
| // SPIRV-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[RET:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = atomicrmw xchg ptr [[TMP0]], i32 [[TMP2]] syncscope("device") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP3]], ptr [[RET]], align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = load i32, ptr [[RET]], align 4 |
| // SPIRV-NEXT: ret i32 [[TMP4]] |
| // |
| int fi6b(int *c, int *d) { |
| int ret; |
| __scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_DEVICE); |
| return ret; |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden i32 @fi6c( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RET:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[RET_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RET]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = atomicrmw xchg ptr [[TMP0]], i32 [[TMP2]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP3]], ptr [[RET_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = load i32, ptr [[RET_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: ret i32 [[TMP4]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden i32 @fi6c( |
| // AMDGCN_CL_20-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RET:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[RET]] to ptr |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = atomicrmw xchg ptr [[TMP0]], i32 [[TMP3]] syncscope("workgroup") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP4]], ptr [[TMP2]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(5) [[RET]], align 4 |
| // AMDGCN_CL_20-NEXT: ret i32 [[TMP5]] |
| // |
| // SPIRV-LABEL: define hidden spir_func i32 @fi6c( |
| // SPIRV-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[RET:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = atomicrmw xchg ptr [[TMP0]], i32 [[TMP2]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP3]], ptr [[RET]], align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = load i32, ptr [[RET]], align 4 |
| // SPIRV-NEXT: ret i32 [[TMP4]] |
| // |
| int fi6c(int *c, int *d) { |
| int ret; |
| __scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_WRKGRP); |
| return ret; |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden i32 @fi6_clustr( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RET:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[RET_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RET]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = atomicrmw xchg ptr [[TMP0]], i32 [[TMP2]] syncscope("cluster") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP3]], ptr [[RET_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = load i32, ptr [[RET_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: ret i32 [[TMP4]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden i32 @fi6_clustr( |
| // AMDGCN_CL_20-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RET:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[RET]] to ptr |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = atomicrmw xchg ptr [[TMP0]], i32 [[TMP3]] syncscope("cluster") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP4]], ptr [[TMP2]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(5) [[RET]], align 4 |
| // AMDGCN_CL_20-NEXT: ret i32 [[TMP5]] |
| // |
| // SPIRV-LABEL: define hidden spir_func i32 @fi6_clustr( |
| // SPIRV-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[RET:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = atomicrmw xchg ptr [[TMP0]], i32 [[TMP2]] syncscope("workgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP3]], ptr [[RET]], align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = load i32, ptr [[RET]], align 4 |
| // SPIRV-NEXT: ret i32 [[TMP4]] |
| // |
| int fi6_clustr(int *c, int *d) { |
| int ret; |
| __scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_CLUSTR); |
| return ret; |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden i32 @fi6d( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RET:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[RET_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RET]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = atomicrmw xchg ptr [[TMP0]], i32 [[TMP2]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP3]], ptr [[RET_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = load i32, ptr [[RET_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: ret i32 [[TMP4]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden i32 @fi6d( |
| // AMDGCN_CL_20-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RET:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[RET]] to ptr |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = atomicrmw xchg ptr [[TMP0]], i32 [[TMP3]] syncscope("wavefront") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP4]], ptr [[TMP2]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(5) [[RET]], align 4 |
| // AMDGCN_CL_20-NEXT: ret i32 [[TMP5]] |
| // |
| // SPIRV-LABEL: define hidden spir_func i32 @fi6d( |
| // SPIRV-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[RET:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = atomicrmw xchg ptr [[TMP0]], i32 [[TMP2]] syncscope("subgroup") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP3]], ptr [[RET]], align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = load i32, ptr [[RET]], align 4 |
| // SPIRV-NEXT: ret i32 [[TMP4]] |
| // |
| int fi6d(int *c, int *d) { |
| int ret; |
| __scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_WVFRNT); |
| return ret; |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden i32 @fi6e( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RET:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[RET_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RET]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = atomicrmw xchg ptr [[TMP0]], i32 [[TMP2]] syncscope("singlethread") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i32 [[TMP3]], ptr [[RET_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: [[TMP4:%.*]] = load i32, ptr [[RET_ASCAST]], align 4 |
| // AMDGCN_CL_DEF-NEXT: ret i32 [[TMP4]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden i32 @fi6e( |
| // AMDGCN_CL_20-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RET:%.*]] = alloca i32, align 4, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store ptr [[D]], ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[RET]] to ptr |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP4:%.*]] = atomicrmw xchg ptr [[TMP0]], i32 [[TMP3]] syncscope("singlethread") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i32 [[TMP4]], ptr [[TMP2]], align 4 |
| // AMDGCN_CL_20-NEXT: [[TMP5:%.*]] = load i32, ptr addrspace(5) [[RET]], align 4 |
| // AMDGCN_CL_20-NEXT: ret i32 [[TMP5]] |
| // |
| // SPIRV-LABEL: define hidden spir_func i32 @fi6e( |
| // SPIRV-SAME: ptr noundef [[C:%.*]], ptr noundef [[D:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[RET:%.*]] = alloca i32, align 4 |
| // SPIRV-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 |
| // SPIRV-NEXT: [[TMP3:%.*]] = atomicrmw xchg ptr [[TMP0]], i32 [[TMP2]] syncscope("singlethread") monotonic, align 4 |
| // SPIRV-NEXT: store i32 [[TMP3]], ptr [[RET]], align 4 |
| // SPIRV-NEXT: [[TMP4:%.*]] = load i32, ptr [[RET]], align 4 |
| // SPIRV-NEXT: ret i32 [[TMP4]] |
| // |
| int fi6e(int *c, int *d) { |
| int ret; |
| __scoped_atomic_exchange(c, d, &ret, __ATOMIC_RELAXED, __MEMORY_SCOPE_SINGLE); |
| return ret; |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi7a( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i8 1, ptr [[DOTATOMICTMP_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTATOMICTMP_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 [[TMP1]] monotonic, align 1, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i8 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = load i8, ptr [[ATOMIC_TEMP_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1 |
| // AMDGCN_CL_DEF-NEXT: ret i1 [[LOADEDV]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi7a( |
| // AMDGCN_CL_20-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i8 1, ptr [[DOTATOMICTMP_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTATOMICTMP_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 [[TMP1]] monotonic, align 1, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i8 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i8, ptr [[ATOMIC_TEMP_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1 |
| // AMDGCN_CL_20-NEXT: ret i1 [[LOADEDV]] |
| // |
| // SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7a( |
| // SPIRV-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1 |
| // SPIRV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 |
| // SPIRV-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store i8 1, ptr [[DOTATOMICTMP]], align 1 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTATOMICTMP]], align 1 |
| // SPIRV-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 [[TMP1]] monotonic, align 1 |
| // SPIRV-NEXT: store i8 [[TMP2]], ptr [[ATOMIC_TEMP]], align 1 |
| // SPIRV-NEXT: [[TMP3:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 |
| // SPIRV-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1 |
| // SPIRV-NEXT: ret i1 [[LOADEDV]] |
| // |
| _Bool fi7a(_Bool *c) { |
| return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, |
| __MEMORY_SCOPE_SYSTEM); |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi7b( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i8 1, ptr [[DOTATOMICTMP_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTATOMICTMP_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 [[TMP1]] syncscope("agent") monotonic, align 1, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i8 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = load i8, ptr [[ATOMIC_TEMP_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1 |
| // AMDGCN_CL_DEF-NEXT: ret i1 [[LOADEDV]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi7b( |
| // AMDGCN_CL_20-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i8 1, ptr [[DOTATOMICTMP_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTATOMICTMP_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 [[TMP1]] syncscope("agent") monotonic, align 1, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i8 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i8, ptr [[ATOMIC_TEMP_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1 |
| // AMDGCN_CL_20-NEXT: ret i1 [[LOADEDV]] |
| // |
| // SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7b( |
| // SPIRV-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1 |
| // SPIRV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 |
| // SPIRV-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store i8 1, ptr [[DOTATOMICTMP]], align 1 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTATOMICTMP]], align 1 |
| // SPIRV-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 [[TMP1]] syncscope("device") monotonic, align 1 |
| // SPIRV-NEXT: store i8 [[TMP2]], ptr [[ATOMIC_TEMP]], align 1 |
| // SPIRV-NEXT: [[TMP3:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 |
| // SPIRV-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1 |
| // SPIRV-NEXT: ret i1 [[LOADEDV]] |
| // |
| _Bool fi7b(_Bool *c) { |
| return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, |
| __MEMORY_SCOPE_DEVICE); |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi7c( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i8 1, ptr [[DOTATOMICTMP_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTATOMICTMP_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 [[TMP1]] syncscope("workgroup") monotonic, align 1, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i8 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = load i8, ptr [[ATOMIC_TEMP_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1 |
| // AMDGCN_CL_DEF-NEXT: ret i1 [[LOADEDV]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi7c( |
| // AMDGCN_CL_20-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i8 1, ptr [[DOTATOMICTMP_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTATOMICTMP_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 [[TMP1]] syncscope("workgroup") monotonic, align 1, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i8 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i8, ptr [[ATOMIC_TEMP_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1 |
| // AMDGCN_CL_20-NEXT: ret i1 [[LOADEDV]] |
| // |
| // SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7c( |
| // SPIRV-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1 |
| // SPIRV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 |
| // SPIRV-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store i8 1, ptr [[DOTATOMICTMP]], align 1 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTATOMICTMP]], align 1 |
| // SPIRV-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 [[TMP1]] syncscope("workgroup") monotonic, align 1 |
| // SPIRV-NEXT: store i8 [[TMP2]], ptr [[ATOMIC_TEMP]], align 1 |
| // SPIRV-NEXT: [[TMP3:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 |
| // SPIRV-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1 |
| // SPIRV-NEXT: ret i1 [[LOADEDV]] |
| // |
| _Bool fi7c(_Bool *c) { |
| return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, |
| __MEMORY_SCOPE_WRKGRP); |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi7_clustr( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i8 1, ptr [[DOTATOMICTMP_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTATOMICTMP_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 [[TMP1]] syncscope("cluster") monotonic, align 1, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i8 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = load i8, ptr [[ATOMIC_TEMP_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1 |
| // AMDGCN_CL_DEF-NEXT: ret i1 [[LOADEDV]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi7_clustr( |
| // AMDGCN_CL_20-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i8 1, ptr [[DOTATOMICTMP_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTATOMICTMP_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 [[TMP1]] syncscope("cluster") monotonic, align 1, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i8 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i8, ptr [[ATOMIC_TEMP_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1 |
| // AMDGCN_CL_20-NEXT: ret i1 [[LOADEDV]] |
| // |
| // SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7_clustr( |
| // SPIRV-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1 |
| // SPIRV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 |
| // SPIRV-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store i8 1, ptr [[DOTATOMICTMP]], align 1 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTATOMICTMP]], align 1 |
| // SPIRV-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 [[TMP1]] syncscope("workgroup") monotonic, align 1 |
| // SPIRV-NEXT: store i8 [[TMP2]], ptr [[ATOMIC_TEMP]], align 1 |
| // SPIRV-NEXT: [[TMP3:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 |
| // SPIRV-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1 |
| // SPIRV-NEXT: ret i1 [[LOADEDV]] |
| // |
| _Bool fi7_clustr(_Bool *c) { |
| return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, |
| __MEMORY_SCOPE_CLUSTR); |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi7d( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i8 1, ptr [[DOTATOMICTMP_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTATOMICTMP_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 [[TMP1]] syncscope("wavefront") monotonic, align 1, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i8 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = load i8, ptr [[ATOMIC_TEMP_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1 |
| // AMDGCN_CL_DEF-NEXT: ret i1 [[LOADEDV]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi7d( |
| // AMDGCN_CL_20-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i8 1, ptr [[DOTATOMICTMP_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTATOMICTMP_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 [[TMP1]] syncscope("wavefront") monotonic, align 1, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i8 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i8, ptr [[ATOMIC_TEMP_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1 |
| // AMDGCN_CL_20-NEXT: ret i1 [[LOADEDV]] |
| // |
| // SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7d( |
| // SPIRV-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1 |
| // SPIRV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 |
| // SPIRV-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store i8 1, ptr [[DOTATOMICTMP]], align 1 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTATOMICTMP]], align 1 |
| // SPIRV-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 [[TMP1]] syncscope("subgroup") monotonic, align 1 |
| // SPIRV-NEXT: store i8 [[TMP2]], ptr [[ATOMIC_TEMP]], align 1 |
| // SPIRV-NEXT: [[TMP3:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 |
| // SPIRV-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1 |
| // SPIRV-NEXT: ret i1 [[LOADEDV]] |
| // |
| _Bool fi7d(_Bool *c) { |
| return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, |
| __MEMORY_SCOPE_WVFRNT); |
| } |
| |
| // AMDGCN_CL_DEF-LABEL: define hidden zeroext i1 @fi7e( |
| // AMDGCN_CL_DEF-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_DEF-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_DEF-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN_CL_DEF-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_DEF-NEXT: store i8 1, ptr [[DOTATOMICTMP_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTATOMICTMP_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 [[TMP1]] syncscope("singlethread") monotonic, align 1, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]] |
| // AMDGCN_CL_DEF-NEXT: store i8 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[TMP3:%.*]] = load i8, ptr [[ATOMIC_TEMP_ASCAST]], align 1 |
| // AMDGCN_CL_DEF-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1 |
| // AMDGCN_CL_DEF-NEXT: ret i1 [[LOADEDV]] |
| // |
| // AMDGCN_CL_20-LABEL: define hidden zeroext i1 @fi7e( |
| // AMDGCN_CL_20-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] { |
| // AMDGCN_CL_20-NEXT: [[ENTRY:.*:]] |
| // AMDGCN_CL_20-NEXT: [[RETVAL:%.*]] = alloca i1, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1, addrspace(5) |
| // AMDGCN_CL_20-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr |
| // AMDGCN_CL_20-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr |
| // AMDGCN_CL_20-NEXT: [[DOTATOMICTMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTATOMICTMP]] to ptr |
| // AMDGCN_CL_20-NEXT: [[ATOMIC_TEMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ATOMIC_TEMP]] to ptr |
| // AMDGCN_CL_20-NEXT: store ptr [[C]], ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8 |
| // AMDGCN_CL_20-NEXT: store i8 1, ptr [[DOTATOMICTMP_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTATOMICTMP_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 [[TMP1]] syncscope("singlethread") monotonic, align 1, !amdgpu.no.fine.grained.memory [[META4]], !amdgpu.no.remote.memory [[META4]] |
| // AMDGCN_CL_20-NEXT: store i8 [[TMP2]], ptr [[ATOMIC_TEMP_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[TMP3:%.*]] = load i8, ptr [[ATOMIC_TEMP_ASCAST]], align 1 |
| // AMDGCN_CL_20-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1 |
| // AMDGCN_CL_20-NEXT: ret i1 [[LOADEDV]] |
| // |
| // SPIRV-LABEL: define hidden spir_func zeroext i1 @fi7e( |
| // SPIRV-SAME: ptr noundef [[C:%.*]]) #[[ATTR0]] { |
| // SPIRV-NEXT: [[ENTRY:.*:]] |
| // SPIRV-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 |
| // SPIRV-NEXT: [[DOTATOMICTMP:%.*]] = alloca i8, align 1 |
| // SPIRV-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i8, align 1 |
| // SPIRV-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 |
| // SPIRV-NEXT: store i8 1, ptr [[DOTATOMICTMP]], align 1 |
| // SPIRV-NEXT: [[TMP1:%.*]] = load i8, ptr [[DOTATOMICTMP]], align 1 |
| // SPIRV-NEXT: [[TMP2:%.*]] = atomicrmw xchg ptr [[TMP0]], i8 [[TMP1]] syncscope("singlethread") monotonic, align 1 |
| // SPIRV-NEXT: store i8 [[TMP2]], ptr [[ATOMIC_TEMP]], align 1 |
| // SPIRV-NEXT: [[TMP3:%.*]] = load i8, ptr [[ATOMIC_TEMP]], align 1 |
| // SPIRV-NEXT: [[LOADEDV:%.*]] = trunc i8 [[TMP3]] to i1 |
| // SPIRV-NEXT: ret i1 [[LOADEDV]] |
| // |
| _Bool fi7e(_Bool *c) { |
| return __scoped_atomic_exchange_n(c, 1, __ATOMIC_RELAXED, |
| __MEMORY_SCOPE_SINGLE); |
| } |
| //. |
| // AMDGCN_CL_DEF: [[META3]] = !{} |
| //. |
| // AMDGCN_CL_20: [[META4]] = !{} |
| //. |