@@ -16,7 +16,7 @@ define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_offset_rtn(<2 x half> %val
1616 ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0
1717 ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1818 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY1]], %subreg.sub3
19- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
19+ ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:av_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
2020 ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]]
2121 ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
2222 %ret = call <2 x half > @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16 (<2 x half > %val , <4 x i32 > %rsrc , i32 0 , i32 %soffset , i32 0 )
@@ -36,7 +36,7 @@ define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_offen_rtn(<2 x half> %val,
3636 ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
3737 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3838 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
39- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
39+ ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:av_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
4040 ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]]
4141 ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
4242 %ret = call <2 x half > @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16 (<2 x half > %val , <4 x i32 > %rsrc , i32 %voffset , i32 %soffset , i32 0 )
@@ -56,7 +56,7 @@ define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_idxen_rtn(<2 x half> %val,
5656 ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:sgpr_32 = COPY $sgpr0
5757 ; GFX90A_GFX942-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5858 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3
59- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
59+ ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:av_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
6060 ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]]
6161 ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
6262 %ret = call <2 x half > @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16 (<2 x half > %val , <4 x i32 > %rsrc , i32 %vindex , i32 0 , i32 %soffset , i32 0 )
@@ -78,7 +78,7 @@ define amdgpu_ps <2 x half> @buffer_atomic_fadd_v2f16_bothen_rtn(<2 x half> %val
7878 ; GFX90A_GFX942-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr0
7979 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY3]], %subreg.sub3
8080 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
81- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
81+ ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:av_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE1]], killed [[REG_SEQUENCE]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8)
8282 ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]]
8383 ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
8484 %ret = call <2 x half > @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16 (<2 x half > %val , <4 x i32 > %rsrc , i32 %vindex , i32 %voffset , i32 %soffset , i32 2 )
@@ -103,7 +103,7 @@ define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_offset_rtn(<2 x half>
103103 ; GFX90A_GFX942-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
104104 ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
105105 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY9]], %subreg.sub0, killed [[COPY8]], %subreg.sub1, killed [[COPY7]], %subreg.sub2, killed [[COPY6]], %subreg.sub3
106- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
106+ ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN:%[0-9]+]]:av_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN [[COPY5]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
107107 ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFSET_RTN]]
108108 ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
109109 %ret = call <2 x half > @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16 (<2 x half > %val , ptr addrspace (8 ) %rsrc , i32 0 , i32 %soffset , i32 0 )
@@ -129,7 +129,7 @@ define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_offen_rtn(<2 x half> %
129129 ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
130130 ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
131131 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
132- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
132+ ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN:%[0-9]+]]:av_32 = BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
133133 ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_OFFEN_RTN]]
134134 ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
135135 %ret = call <2 x half > @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16 (<2 x half > %val , ptr addrspace (8 ) %rsrc , i32 %voffset , i32 %soffset , i32 0 )
@@ -155,7 +155,7 @@ define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_idxen_rtn(<2 x half> %
155155 ; GFX90A_GFX942-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub1
156156 ; GFX90A_GFX942-NEXT: [[COPY10:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
157157 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY10]], %subreg.sub0, killed [[COPY9]], %subreg.sub1, killed [[COPY8]], %subreg.sub2, killed [[COPY7]], %subreg.sub3
158- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
158+ ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN:%[0-9]+]]:av_32 = BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN [[COPY6]], [[COPY1]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
159159 ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_IDXEN_RTN]]
160160 ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
161161 %ret = call <2 x half > @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16 (<2 x half > %val , ptr addrspace (8 ) %rsrc , i32 %vindex , i32 0 , i32 %soffset , i32 0 )
@@ -183,7 +183,7 @@ define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_bothen_rtn(<2 x half>
183183 ; GFX90A_GFX942-NEXT: [[COPY11:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE1]].sub0
184184 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[COPY11]], %subreg.sub0, killed [[COPY10]], %subreg.sub1, killed [[COPY9]], %subreg.sub2, killed [[COPY8]], %subreg.sub3
185185 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY1]], %subreg.sub1
186- ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:vgpr_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
186+ ; GFX90A_GFX942-NEXT: [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN:%[0-9]+]]:av_32 = BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN [[COPY7]], killed [[REG_SEQUENCE3]], killed [[REG_SEQUENCE2]], [[COPY]], 0, 3, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8)
187187 ; GFX90A_GFX942-NEXT: $vgpr0 = COPY [[BUFFER_ATOMIC_PK_ADD_F16_BOTHEN_RTN]]
188188 ; GFX90A_GFX942-NEXT: SI_RETURN_TO_EPILOG $vgpr0
189189 %ret = call <2 x half > @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16 (<2 x half > %val , ptr addrspace (8 ) %rsrc , i32 %vindex , i32 %voffset , i32 %soffset , i32 2 )
0 commit comments