-
Notifications
You must be signed in to change notification settings - Fork 15.5k
AMDGPU/GlobalISel: Regbanklegalize rules for G_UNMERGE_VALUES #171653
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -137,34 +137,20 @@ define amdgpu_ps void @fpext_f16_to_f64_div(half %a, ptr addrspace(1) %ptr) { | |
| } | ||
|
|
||
| define amdgpu_ps <2 x float> @fpext_v2f16_to_v2f32_uniform(<2 x half> inreg %a) { | ||
| ; GFX11-FAKE16-LABEL: fpext_v2f16_to_v2f32_uniform: | ||
| ; GFX11-FAKE16: ; %bb.0: | ||
| ; GFX11-FAKE16-NEXT: s_lshr_b32 s1, s0, 16 | ||
| ; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v0, s0 | ||
| ; GFX11-FAKE16-NEXT: v_cvt_f32_f16_e32 v1, s1 | ||
| ; GFX11-FAKE16-NEXT: ; return to shader part epilog | ||
| ; | ||
| ; GFX11-TRUE16-LABEL: fpext_v2f16_to_v2f32_uniform: | ||
| ; GFX11-TRUE16: ; %bb.0: | ||
| ; GFX11-TRUE16-NEXT: v_cvt_f32_f16_e32 v0, s0 | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was a bug. |
||
| ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | ||
| ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, v0 | ||
| ; GFX11-TRUE16-NEXT: ; return to shader part epilog | ||
| ; | ||
| ; GFX12-FAKE16-LABEL: fpext_v2f16_to_v2f32_uniform: | ||
| ; GFX12-FAKE16: ; %bb.0: | ||
| ; GFX12-FAKE16-NEXT: s_cvt_f32_f16 s1, s0 | ||
| ; GFX12-FAKE16-NEXT: s_cvt_hi_f32_f16 s0, s0 | ||
| ; GFX12-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | ||
| ; GFX12-FAKE16-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 | ||
| ; GFX12-FAKE16-NEXT: ; return to shader part epilog | ||
| ; GFX11-LABEL: fpext_v2f16_to_v2f32_uniform: | ||
| ; GFX11: ; %bb.0: | ||
| ; GFX11-NEXT: s_lshr_b32 s1, s0, 16 | ||
| ; GFX11-NEXT: v_cvt_f32_f16_e32 v0, s0 | ||
| ; GFX11-NEXT: v_cvt_f32_f16_e32 v1, s1 | ||
| ; GFX11-NEXT: ; return to shader part epilog | ||
| ; | ||
| ; GFX12-TRUE16-LABEL: fpext_v2f16_to_v2f32_uniform: | ||
| ; GFX12-TRUE16: ; %bb.0: | ||
| ; GFX12-TRUE16-NEXT: s_cvt_f32_f16 s0, s0 | ||
| ; GFX12-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | ||
| ; GFX12-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s0 | ||
| ; GFX12-TRUE16-NEXT: ; return to shader part epilog | ||
| ; GFX12-LABEL: fpext_v2f16_to_v2f32_uniform: | ||
| ; GFX12: ; %bb.0: | ||
| ; GFX12-NEXT: s_cvt_f32_f16 s1, s0 | ||
| ; GFX12-NEXT: s_cvt_hi_f32_f16 s0, s0 | ||
| ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | ||
| ; GFX12-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0 | ||
| ; GFX12-NEXT: ; return to shader part epilog | ||
| %result = fpext <2 x half> %a to <2 x float> | ||
| ret <2 x float> %result | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,36 @@ | ||
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
| ; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GFX11 %s | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For testing purpose of the crash, try -global-isel or |
||
|
|
||
| define amdgpu_ps void @unmerge_sgprS16_from_V2S16(ptr addrspace(1) inreg %ptr, ptr addrspace(1) inreg %out) { | ||
| ; GFX11-LABEL: unmerge_sgprS16_from_V2S16: | ||
| ; GFX11: ; %bb.0: | ||
| ; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x0 | ||
| ; GFX11-NEXT: v_mov_b32_e32 v1, 0 | ||
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) | ||
| ; GFX11-NEXT: s_pack_hl_b32_b16 s0, s0, s0 | ||
| ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | ||
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 | ||
| ; GFX11-NEXT: global_store_b32 v1, v0, s[2:3] | ||
| ; GFX11-NEXT: s_endpgm | ||
| %load = load <2 x i16>, ptr addrspace(1) %ptr | ||
| %shuffle = shufflevector <2 x i16> %load, <2 x i16> poison, <2 x i32> <i32 1, i32 0> | ||
| store <2 x i16> %shuffle, ptr addrspace(1) %out | ||
| ret void | ||
| } | ||
|
|
||
| define amdgpu_ps void @unmerge_sgprS16_from_V4S16(ptr addrspace(1) inreg %ptr, ptr addrspace(1) inreg %out) { | ||
| ; GFX11-LABEL: unmerge_sgprS16_from_V4S16: | ||
| ; GFX11: ; %bb.0: | ||
| ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 | ||
| ; GFX11-NEXT: v_mov_b32_e32 v1, 0 | ||
| ; GFX11-NEXT: s_waitcnt lgkmcnt(0) | ||
| ; GFX11-NEXT: s_pack_lh_b32_b16 s0, s0, s1 | ||
| ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | ||
| ; GFX11-NEXT: v_mov_b32_e32 v0, s0 | ||
| ; GFX11-NEXT: global_store_b32 v1, v0, s[2:3] | ||
| ; GFX11-NEXT: s_endpgm | ||
| %load = load <4 x i16>, ptr addrspace(1) %ptr | ||
| %shuffle = shufflevector <4 x i16> %load, <4 x i16> poison, <2 x i32> <i32 0, i32 3> | ||
| store <2 x i16> %shuffle, ptr addrspace(1) %out | ||
| ret void | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Need tests for DivS16 and DivBRC
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are many in existing files, just moved this to AMDGPURegBankLegalizeRules.cpp, the sgpr s16 is changed. Also sgpr s16 appears in some tests and it was fine to just apply sgpr to all operands since it was combined away. I added test for the case when sgpr s16 g_unmerge was not combined away and actually meant to be inst selected but unlike any of the existing tests, tests in unmerge-sgpr-s16.ll are the cases that crash the compiler.