@@ -8,37 +8,34 @@ define amdgpu_kernel void @v_mul_i64_no_zext(ptr addrspace(1) %out, ptr addrspac
88; GFX10-LABEL: v_mul_i64_no_zext:
99; GFX10: ; %bb.0:
1010; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c
11- ; GFX10-NEXT: v_lshlrev_b32_e32 v7 , 3, v0
11+ ; GFX10-NEXT: v_lshlrev_b32_e32 v6 , 3, v0
1212; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1313; GFX10-NEXT: s_clause 0x1
14- ; GFX10-NEXT: global_load_dwordx2 v[0:1 ], v7 , s[0:1]
15- ; GFX10-NEXT: global_load_dwordx2 v[2:3 ], v7 , s[2:3]
14+ ; GFX10-NEXT: global_load_dwordx2 v[2:3 ], v6 , s[0:1]
15+ ; GFX10-NEXT: global_load_dwordx2 v[4:5 ], v6 , s[2:3]
1616; GFX10-NEXT: s_waitcnt vmcnt(0)
17- ; GFX10-NEXT: v_mad_u64_u32 v[4:5], s0, v0, v2, 0
18- ; GFX10-NEXT: v_mad_u64_u32 v[5:6], s0, v0, v3, v[5:6]
19- ; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v1, v2, v[5:6]
20- ; GFX10-NEXT: v_mov_b32_e32 v5, v0
21- ; GFX10-NEXT: global_store_dwordx2 v7, v[4:5], s[2:3]
17+ ; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v2, v4, 0
18+ ; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, v2, v5, v[1:2]
19+ ; GFX10-NEXT: v_mad_u64_u32 v[1:2], s0, v3, v4, v[1:2]
20+ ; GFX10-NEXT: global_store_dwordx2 v6, v[0:1], s[2:3]
2221; GFX10-NEXT: s_endpgm
2322;
2423; GFX11-LABEL: v_mul_i64_no_zext:
2524; GFX11: ; %bb.0:
2625; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x2c
2726; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
2827; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
29- ; GFX11-NEXT: v_lshlrev_b32_e32 v9 , 3, v0
28+ ; GFX11-NEXT: v_lshlrev_b32_e32 v8 , 3, v0
3029; GFX11-NEXT: s_waitcnt lgkmcnt(0)
3130; GFX11-NEXT: s_clause 0x1
32- ; GFX11-NEXT: global_load_b64 v[0:1 ], v9 , s[0:1]
33- ; GFX11-NEXT: global_load_b64 v[2:3 ], v9 , s[2:3]
31+ ; GFX11-NEXT: global_load_b64 v[2:3 ], v8 , s[0:1]
32+ ; GFX11-NEXT: global_load_b64 v[4:5 ], v8 , s[2:3]
3433; GFX11-NEXT: s_waitcnt vmcnt(0)
35- ; GFX11-NEXT: v_mad_u64_u32 v[4:5 ], null, v0, v2 , 0
34+ ; GFX11-NEXT: v_mad_u64_u32 v[0:1 ], null, v2, v4 , 0
3635; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
37- ; GFX11-NEXT: v_mad_u64_u32 v[6:7], null, v0, v3, v[5:6]
38- ; GFX11-NEXT: v_mad_u64_u32 v[7:8], null, v1, v2, v[6:7]
39- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
40- ; GFX11-NEXT: v_mov_b32_e32 v5, v7
41- ; GFX11-NEXT: global_store_b64 v9, v[4:5], s[2:3]
36+ ; GFX11-NEXT: v_mad_u64_u32 v[6:7], null, v2, v5, v[1:2]
37+ ; GFX11-NEXT: v_mad_u64_u32 v[1:2], null, v3, v4, v[6:7]
38+ ; GFX11-NEXT: global_store_b64 v8, v[0:1], s[2:3]
4239; GFX11-NEXT: s_endpgm
4340 %tid = call i32 @llvm.amdgcn.workitem.id.x ()
4441 %gep.a = getelementptr inbounds i64 , ptr addrspace (1 ) %aptr , i32 %tid
@@ -58,18 +55,16 @@ define amdgpu_kernel void @v_mul_i64_zext_src1(ptr addrspace(1) %out, ptr addrsp
5855; GFX10-NEXT: s_clause 0x1
5956; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
6057; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
61- ; GFX10-NEXT: v_lshlrev_b32_e32 v2 , 3, v0
62- ; GFX10-NEXT: v_lshlrev_b32_e32 v3 , 2, v0
58+ ; GFX10-NEXT: v_lshlrev_b32_e32 v1 , 3, v0
59+ ; GFX10-NEXT: v_lshlrev_b32_e32 v0 , 2, v0
6360; GFX10-NEXT: s_waitcnt lgkmcnt(0)
64- ; GFX10-NEXT: global_load_dwordx2 v[0:1 ], v2 , s[2:3]
65- ; GFX10-NEXT: global_load_dword v4, v3 , s[6:7]
61+ ; GFX10-NEXT: global_load_dwordx2 v[2:3 ], v1 , s[2:3]
62+ ; GFX10-NEXT: global_load_dword v4, v0 , s[6:7]
6663; GFX10-NEXT: s_waitcnt vmcnt(0)
67- ; GFX10-NEXT: v_mad_u64_u32 v[2:3], s2, v0, v4, 0
68- ; GFX10-NEXT: v_mov_b32_e32 v0, v3
69- ; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, v1, v4, v[0:1]
70- ; GFX10-NEXT: v_mov_b32_e32 v3, v0
71- ; GFX10-NEXT: v_mov_b32_e32 v0, 0
72- ; GFX10-NEXT: global_store_dwordx2 v0, v[2:3], s[0:1]
64+ ; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, v2, v4, 0
65+ ; GFX10-NEXT: v_mad_u64_u32 v[1:2], s2, v3, v4, v[1:2]
66+ ; GFX10-NEXT: v_mov_b32_e32 v2, 0
67+ ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
7368; GFX10-NEXT: s_endpgm
7469;
7570; GFX11-LABEL: v_mul_i64_zext_src1:
@@ -80,17 +75,17 @@ define amdgpu_kernel void @v_mul_i64_zext_src1(ptr addrspace(1) %out, ptr addrsp
8075; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
8176; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
8277; GFX11-NEXT: v_lshlrev_b32_e32 v1, 3, v0
83- ; GFX11-NEXT: v_lshlrev_b32_e32 v2 , 2, v0
78+ ; GFX11-NEXT: v_lshlrev_b32_e32 v0 , 2, v0
8479; GFX11-NEXT: s_waitcnt lgkmcnt(0)
85- ; GFX11-NEXT: global_load_b64 v[0:1 ], v1, s[2:3]
86- ; GFX11-NEXT: global_load_b32 v5, v2 , s[4:5]
80+ ; GFX11-NEXT: global_load_b64 v[2:3 ], v1, s[2:3]
81+ ; GFX11-NEXT: global_load_b32 v5, v0 , s[4:5]
8782; GFX11-NEXT: s_waitcnt vmcnt(0)
88- ; GFX11-NEXT: v_mad_u64_u32 v[2:3 ], null, v0 , v5, 0
83+ ; GFX11-NEXT: v_mad_u64_u32 v[0:1 ], null, v2 , v5, 0
8984; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
90- ; GFX11-NEXT: v_mov_b32_e32 v0, v3
91- ; GFX11-NEXT: v_mad_u64_u32 v[3:4 ], null, v1 , v5, v[0:1 ]
92- ; GFX11-NEXT: v_mov_b32_e32 v0 , 0
93- ; GFX11-NEXT: global_store_b64 v0 , v[2:3 ], s[0:1]
85+ ; GFX11-NEXT: v_mov_b32_e32 v4, v1
86+ ; GFX11-NEXT: v_mad_u64_u32 v[1:2 ], null, v3 , v5, v[4:5 ]
87+ ; GFX11-NEXT: v_mov_b32_e32 v2 , 0
88+ ; GFX11-NEXT: global_store_b64 v2 , v[0:1 ], s[0:1]
9489; GFX11-NEXT: s_endpgm
9590 %tid = call i32 @llvm.amdgcn.workitem.id.x ()
9691 %gep.a = getelementptr inbounds i64 , ptr addrspace (1 ) %aptr , i32 %tid
@@ -110,18 +105,16 @@ define amdgpu_kernel void @v_mul_i64_zext_src0(ptr addrspace(1) %out, ptr addrsp
110105; GFX10-NEXT: s_clause 0x1
111106; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
112107; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
113- ; GFX10-NEXT: v_lshlrev_b32_e32 v2 , 2, v0
114- ; GFX10-NEXT: v_lshlrev_b32_e32 v3 , 3, v0
108+ ; GFX10-NEXT: v_lshlrev_b32_e32 v1 , 2, v0
109+ ; GFX10-NEXT: v_lshlrev_b32_e32 v0 , 3, v0
115110; GFX10-NEXT: s_waitcnt lgkmcnt(0)
116- ; GFX10-NEXT: global_load_dword v4, v2 , s[2:3]
117- ; GFX10-NEXT: global_load_dwordx2 v[0:1 ], v3 , s[6:7]
111+ ; GFX10-NEXT: global_load_dword v4, v1 , s[2:3]
112+ ; GFX10-NEXT: global_load_dwordx2 v[2:3 ], v0 , s[6:7]
118113; GFX10-NEXT: s_waitcnt vmcnt(0)
119- ; GFX10-NEXT: v_mad_u64_u32 v[2:3], s2, v4, v0, 0
120- ; GFX10-NEXT: v_mov_b32_e32 v0, v3
121- ; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, v4, v1, v[0:1]
122- ; GFX10-NEXT: v_mov_b32_e32 v3, v0
123- ; GFX10-NEXT: v_mov_b32_e32 v0, 0
124- ; GFX10-NEXT: global_store_dwordx2 v0, v[2:3], s[0:1]
114+ ; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, v4, v2, 0
115+ ; GFX10-NEXT: v_mad_u64_u32 v[1:2], s2, v4, v3, v[1:2]
116+ ; GFX10-NEXT: v_mov_b32_e32 v2, 0
117+ ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
125118; GFX10-NEXT: s_endpgm
126119;
127120; GFX11-LABEL: v_mul_i64_zext_src0:
@@ -135,14 +128,14 @@ define amdgpu_kernel void @v_mul_i64_zext_src0(ptr addrspace(1) %out, ptr addrsp
135128; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
136129; GFX11-NEXT: s_waitcnt lgkmcnt(0)
137130; GFX11-NEXT: global_load_b32 v5, v1, s[2:3]
138- ; GFX11-NEXT: global_load_b64 v[0:1 ], v0, s[4:5]
131+ ; GFX11-NEXT: global_load_b64 v[2:3 ], v0, s[4:5]
139132; GFX11-NEXT: s_waitcnt vmcnt(0)
140- ; GFX11-NEXT: v_mad_u64_u32 v[2:3 ], null, v5, v0 , 0
133+ ; GFX11-NEXT: v_mad_u64_u32 v[0:1 ], null, v5, v2 , 0
141134; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
142- ; GFX11-NEXT: v_mov_b32_e32 v0, v3
143- ; GFX11-NEXT: v_mad_u64_u32 v[3:4 ], null, v5, v1 , v[0:1 ]
144- ; GFX11-NEXT: v_mov_b32_e32 v0 , 0
145- ; GFX11-NEXT: global_store_b64 v0 , v[2:3 ], s[0:1]
135+ ; GFX11-NEXT: v_mov_b32_e32 v4, v1
136+ ; GFX11-NEXT: v_mad_u64_u32 v[1:2 ], null, v5, v3 , v[4:5 ]
137+ ; GFX11-NEXT: v_mov_b32_e32 v2 , 0
138+ ; GFX11-NEXT: global_store_b64 v2 , v[0:1 ], s[0:1]
146139; GFX11-NEXT: s_endpgm
147140 %tid = call i32 @llvm.amdgcn.workitem.id.x ()
148141 %gep.a = getelementptr inbounds i32 , ptr addrspace (1 ) %aptr , i32 %tid
@@ -209,18 +202,16 @@ define amdgpu_kernel void @v_mul_i64_masked_src0_hi(ptr addrspace(1) %out, ptr a
209202; GFX10-NEXT: s_clause 0x1
210203; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
211204; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
212- ; GFX10-NEXT: v_lshlrev_b32_e32 v2 , 3, v0
205+ ; GFX10-NEXT: v_lshlrev_b32_e32 v0 , 3, v0
213206; GFX10-NEXT: s_waitcnt lgkmcnt(0)
214207; GFX10-NEXT: s_clause 0x1
215- ; GFX10-NEXT: global_load_dword v4, v2 , s[2:3]
216- ; GFX10-NEXT: global_load_dwordx2 v[0:1 ], v2 , s[6:7]
208+ ; GFX10-NEXT: global_load_dword v4, v0 , s[2:3]
209+ ; GFX10-NEXT: global_load_dwordx2 v[2:3 ], v0 , s[6:7]
217210; GFX10-NEXT: s_waitcnt vmcnt(0)
218- ; GFX10-NEXT: v_mad_u64_u32 v[2:3], s2, v4, v0, 0
219- ; GFX10-NEXT: v_mov_b32_e32 v0, v3
220- ; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, v4, v1, v[0:1]
221- ; GFX10-NEXT: v_mov_b32_e32 v3, v0
222- ; GFX10-NEXT: v_mov_b32_e32 v0, 0
223- ; GFX10-NEXT: global_store_dwordx2 v0, v[2:3], s[0:1]
211+ ; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, v4, v2, 0
212+ ; GFX10-NEXT: v_mad_u64_u32 v[1:2], s2, v4, v3, v[1:2]
213+ ; GFX10-NEXT: v_mov_b32_e32 v2, 0
214+ ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
224215; GFX10-NEXT: s_endpgm
225216;
226217; GFX11-LABEL: v_mul_i64_masked_src0_hi:
@@ -234,14 +225,14 @@ define amdgpu_kernel void @v_mul_i64_masked_src0_hi(ptr addrspace(1) %out, ptr a
234225; GFX11-NEXT: s_waitcnt lgkmcnt(0)
235226; GFX11-NEXT: s_clause 0x1
236227; GFX11-NEXT: global_load_b32 v5, v0, s[2:3]
237- ; GFX11-NEXT: global_load_b64 v[0:1 ], v0, s[4:5]
228+ ; GFX11-NEXT: global_load_b64 v[2:3 ], v0, s[4:5]
238229; GFX11-NEXT: s_waitcnt vmcnt(0)
239- ; GFX11-NEXT: v_mad_u64_u32 v[2:3 ], null, v5, v0 , 0
230+ ; GFX11-NEXT: v_mad_u64_u32 v[0:1 ], null, v5, v2 , 0
240231; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
241- ; GFX11-NEXT: v_mov_b32_e32 v0, v3
242- ; GFX11-NEXT: v_mad_u64_u32 v[3:4 ], null, v5, v1 , v[0:1 ]
243- ; GFX11-NEXT: v_mov_b32_e32 v0 , 0
244- ; GFX11-NEXT: global_store_b64 v0 , v[2:3 ], s[0:1]
232+ ; GFX11-NEXT: v_mov_b32_e32 v4, v1
233+ ; GFX11-NEXT: v_mad_u64_u32 v[1:2 ], null, v5, v3 , v[4:5 ]
234+ ; GFX11-NEXT: v_mov_b32_e32 v2 , 0
235+ ; GFX11-NEXT: global_store_b64 v2 , v[0:1 ], s[0:1]
245236; GFX11-NEXT: s_endpgm
246237 %tid = call i32 @llvm.amdgcn.workitem.id.x ()
247238 %gep.a = getelementptr inbounds i64 , ptr addrspace (1 ) %aptr , i32 %tid
@@ -389,22 +380,20 @@ define amdgpu_kernel void @v_mul_i64_partially_masked_src0(ptr addrspace(1) %out
389380; GFX10-NEXT: s_clause 0x1
390381; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
391382; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
392- ; GFX10-NEXT: v_lshlrev_b32_e32 v4 , 3, v0
383+ ; GFX10-NEXT: v_lshlrev_b32_e32 v0 , 3, v0
393384; GFX10-NEXT: s_waitcnt lgkmcnt(0)
394385; GFX10-NEXT: s_clause 0x1
395- ; GFX10-NEXT: global_load_dwordx2 v[0:1 ], v4 , s[2:3]
396- ; GFX10-NEXT: global_load_dwordx2 v[2:3 ], v4 , s[6:7]
386+ ; GFX10-NEXT: global_load_dwordx2 v[1:2 ], v0 , s[2:3]
387+ ; GFX10-NEXT: global_load_dwordx2 v[3:4 ], v0 , s[6:7]
397388; GFX10-NEXT: s_waitcnt vmcnt(1)
398- ; GFX10-NEXT: v_and_b32_e32 v6 , 0xfff00000, v0
389+ ; GFX10-NEXT: v_and_b32_e32 v5 , 0xfff00000, v1
399390; GFX10-NEXT: s_waitcnt vmcnt(0)
400- ; GFX10-NEXT: v_mad_u64_u32 v[4:5], s2, v6, v2, 0
401- ; GFX10-NEXT: v_mov_b32_e32 v0, v5
402- ; GFX10-NEXT: v_mad_u64_u32 v[5:6], s2, v6, v3, v[0:1]
403- ; GFX10-NEXT: v_and_b32_e32 v0, 0xf00f, v1
404- ; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, v0, v2, v[5:6]
405- ; GFX10-NEXT: v_mov_b32_e32 v5, v0
406- ; GFX10-NEXT: v_mov_b32_e32 v0, 0
407- ; GFX10-NEXT: global_store_dwordx2 v0, v[4:5], s[0:1]
391+ ; GFX10-NEXT: v_mad_u64_u32 v[0:1], s2, v5, v3, 0
392+ ; GFX10-NEXT: v_mad_u64_u32 v[4:5], s2, v5, v4, v[1:2]
393+ ; GFX10-NEXT: v_and_b32_e32 v1, 0xf00f, v2
394+ ; GFX10-NEXT: v_mad_u64_u32 v[1:2], s2, v1, v3, v[4:5]
395+ ; GFX10-NEXT: v_mov_b32_e32 v2, 0
396+ ; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
408397; GFX10-NEXT: s_endpgm
409398;
410399; GFX11-LABEL: v_mul_i64_partially_masked_src0:
@@ -414,24 +403,22 @@ define amdgpu_kernel void @v_mul_i64_partially_masked_src0(ptr addrspace(1) %out
414403; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
415404; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
416405; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
417- ; GFX11-NEXT: v_lshlrev_b32_e32 v2 , 3, v0
406+ ; GFX11-NEXT: v_lshlrev_b32_e32 v0 , 3, v0
418407; GFX11-NEXT: s_waitcnt lgkmcnt(0)
419408; GFX11-NEXT: s_clause 0x1
420- ; GFX11-NEXT: global_load_b64 v[0:1 ], v2 , s[2:3]
421- ; GFX11-NEXT: global_load_b64 v[2:3 ], v2 , s[4:5]
409+ ; GFX11-NEXT: global_load_b64 v[1:2 ], v0 , s[2:3]
410+ ; GFX11-NEXT: global_load_b64 v[3:4 ], v0 , s[4:5]
422411; GFX11-NEXT: s_waitcnt vmcnt(1)
423- ; GFX11-NEXT: v_and_b32_e32 v7, 0xfff00000, v0
412+ ; GFX11-NEXT: v_and_b32_e32 v7, 0xfff00000, v1
424413; GFX11-NEXT: s_waitcnt vmcnt(0)
425414; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
426- ; GFX11-NEXT: v_mad_u64_u32 v[4:5], null, v7, v2, 0
427- ; GFX11-NEXT: v_mov_b32_e32 v0, v5
428- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
429- ; GFX11-NEXT: v_mad_u64_u32 v[5:6], null, v7, v3, v[0:1]
430- ; GFX11-NEXT: v_and_b32_e32 v3, 0xf00f, v1
431- ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, v[5:6]
415+ ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v7, v3, 0
416+ ; GFX11-NEXT: v_mad_u64_u32 v[5:6], null, v7, v4, v[1:2]
417+ ; GFX11-NEXT: v_and_b32_e32 v4, 0xf00f, v2
432418; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
433- ; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v0, 0
434- ; GFX11-NEXT: global_store_b64 v0, v[4:5], s[0:1]
419+ ; GFX11-NEXT: v_mad_u64_u32 v[1:2], null, v4, v3, v[5:6]
420+ ; GFX11-NEXT: v_mov_b32_e32 v2, 0
421+ ; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
435422; GFX11-NEXT: s_endpgm
436423 %tid = call i32 @llvm.amdgcn.workitem.id.x ()
437424 %gep.a = getelementptr inbounds i64 , ptr addrspace (1 ) %aptr , i32 %tid
@@ -536,28 +523,28 @@ define amdgpu_kernel void @v_mul64_masked_before_and_in_branch(ptr addrspace(1)
536523; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
537524; GFX11-NEXT: s_waitcnt lgkmcnt(0)
538525; GFX11-NEXT: s_clause 0x1
539- ; GFX11-NEXT: global_load_b64 v[2:3 ], v0, s[2:3]
540- ; GFX11-NEXT: global_load_b64 v[4:5 ], v0, s[4:5]
526+ ; GFX11-NEXT: global_load_b64 v[3:4 ], v0, s[2:3]
527+ ; GFX11-NEXT: global_load_b64 v[5:6 ], v0, s[4:5]
541528; GFX11-NEXT: s_mov_b32 s2, exec_lo
542529; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1
543530; GFX11-NEXT: s_waitcnt vmcnt(1)
544- ; GFX11-NEXT: v_cmpx_ge_u64_e32 0, v[2:3 ]
531+ ; GFX11-NEXT: v_cmpx_ge_u64_e32 0, v[3:4 ]
545532; GFX11-NEXT: s_xor_b32 s2, exec_lo, s2
546533; GFX11-NEXT: s_cbranch_execz .LBB10_2
547534; GFX11-NEXT: ; %bb.1: ; %else
548535; GFX11-NEXT: s_waitcnt vmcnt(0)
549- ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v2, v4 , 0
536+ ; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v5 , 0
550537; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
551- ; GFX11-NEXT: v_mad_u64_u32 v[3:4], null, v2, v5, v[1:2]
552- ; GFX11-NEXT: ; implicit-def: $vgpr4_vgpr5
553- ; GFX11-NEXT: v_mov_b32_e32 v1, v3
554- ; GFX11-NEXT: ; implicit-def: $vgpr2_vgpr3
538+ ; GFX11-NEXT: v_mov_b32_e32 v4, v1
539+ ; GFX11-NEXT: v_mad_u64_u32 v[1:2], null, v3, v6, v[4:5]
540+ ; GFX11-NEXT: ; implicit-def: $vgpr3_vgpr4
541+ ; GFX11-NEXT: ; implicit-def: $vgpr5_vgpr6
555542; GFX11-NEXT: .LBB10_2: ; %Flow
556543; GFX11-NEXT: s_and_not1_saveexec_b32 s2, s2
557544; GFX11-NEXT: s_cbranch_execz .LBB10_4
558545; GFX11-NEXT: ; %bb.3: ; %if
559546; GFX11-NEXT: s_waitcnt vmcnt(0)
560- ; GFX11-NEXT: v_mul_lo_u32 v1, v2, v5
547+ ; GFX11-NEXT: v_mul_lo_u32 v1, v3, v6
561548; GFX11-NEXT: v_mov_b32_e32 v0, 0
562549; GFX11-NEXT: .LBB10_4: ; %endif
563550; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s2
0 commit comments