-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[AMDGPU][GlobalISel] Add RegBankLegalize support for G_FSUB #171244
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
+232
−2
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Member
|
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-amdgpu Author: None (vangthao95) ChangesFull diff: https://github.com/llvm/llvm-project/pull/171244.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 9de309279a247..f7a54adbade45 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -934,7 +934,9 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
bool hasSALUFloat = ST->hasSALUFloatInsts();
- addRulesForGOpcs({G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL}, Standard)
+ addRulesForGOpcs(
+ {G_FADD, G_FSUB, G_FMUL, G_STRICT_FADD, G_STRICT_FSUB, G_STRICT_FMUL},
+ Standard)
.Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
.Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
.Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fsub.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fsub.ll
new file mode 100644
index 0000000000000..ab3699ddc8b1b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fsub.ll
@@ -0,0 +1,220 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-FAKE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-TRUE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-FAKE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-TRUE16 %s
+
+define amdgpu_ps half @fsub_s16_uniform(half inreg %a, half inreg %b) {
+; GFX11-FAKE16-LABEL: fsub_s16_uniform:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: v_sub_f16_e64 v0, s0, s1
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX11-TRUE16-LABEL: fsub_s16_uniform:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: v_sub_f16_e64 v0.l, s0, s1
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-LABEL: fsub_s16_uniform:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_sub_f16 s0, s0, s1
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-NEXT: ; return to shader part epilog
+ %fsub = fsub half %a, %b
+ ret half %fsub
+}
+
+define amdgpu_ps half @fsub_s16_div(half %a, half %b) {
+; GFX11-FAKE16-LABEL: fsub_s16_div:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: v_sub_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX11-TRUE16-LABEL: fsub_s16_div:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: fsub_s16_div:
+; GFX12-FAKE16: ; %bb.0:
+; GFX12-FAKE16-NEXT: v_sub_f16_e32 v0, v0, v1
+; GFX12-FAKE16-NEXT: ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: fsub_s16_div:
+; GFX12-TRUE16: ; %bb.0:
+; GFX12-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.l, v1.l
+; GFX12-TRUE16-NEXT: ; return to shader part epilog
+ %fsub = fsub half %a, %b
+ ret half %fsub
+}
+
+define amdgpu_ps float @fsub_s32_uniform(float inreg %a, float inreg %b) {
+; GFX11-LABEL: fsub_s32_uniform:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_sub_f32_e64 v0, s0, s1
+; GFX11-NEXT: ; return to shader part epilog
+;
+; GFX12-LABEL: fsub_s32_uniform:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_sub_f32 s0, s0, s1
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-NEXT: ; return to shader part epilog
+ %fsub = fsub float %a, %b
+ ret float %fsub
+}
+
+define amdgpu_ps float @fsub_s32_div(float %a, float %b) {
+; GCN-LABEL: fsub_s32_div:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_sub_f32_e32 v0, v0, v1
+; GCN-NEXT: ; return to shader part epilog
+ %fsub = fsub float %a, %b
+ ret float %fsub
+}
+
+define amdgpu_ps void @fsub_s64_uniform(double inreg %a, double inreg %b, ptr addrspace(1) %ptr) {
+; GFX11-LABEL: fsub_s64_uniform:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f64 v[2:3], s[0:1], -s[2:3]
+; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: fsub_s64_uniform:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: v_add_f64_e64 v[2:3], s[0:1], -s[2:3]
+; GFX12-NEXT: global_store_b64 v[0:1], v[2:3], off
+; GFX12-NEXT: s_endpgm
+ %fsub = fsub double %a, %b
+ store double %fsub, ptr addrspace(1) %ptr
+ ret void
+}
+
+define amdgpu_ps void @fsub_s64_div(double %a, double %b, ptr addrspace(1) %ptr) {
+; GFX11-LABEL: fsub_s64_div:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
+; GFX11-NEXT: global_store_b64 v[4:5], v[0:1], off
+; GFX11-NEXT: s_endpgm
+;
+; GFX12-LABEL: fsub_s64_div:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: v_add_f64_e64 v[0:1], v[0:1], -v[2:3]
+; GFX12-NEXT: global_store_b64 v[4:5], v[0:1], off
+; GFX12-NEXT: s_endpgm
+ %fsub = fsub double %a, %b
+ store double %fsub, ptr addrspace(1) %ptr
+ ret void
+}
+
+define amdgpu_ps <2 x half> @fsub_v2s16_uniform(<2 x half> inreg %a, <2 x half> inreg %b) {
+; GFX11-LABEL: fsub_v2s16_uniform:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_pk_add_f16 v0, s0, s1 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NEXT: ; return to shader part epilog
+;
+; GFX12-LABEL: fsub_v2s16_uniform:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_lshr_b32 s2, s1, 16
+; GFX12-NEXT: s_xor_b32 s1, s1, 0x8000
+; GFX12-NEXT: s_xor_b32 s2, s2, 0x8000
+; GFX12-NEXT: s_lshr_b32 s3, s0, 16
+; GFX12-NEXT: s_add_f16 s0, s0, s1
+; GFX12-NEXT: s_add_f16 s1, s3, s2
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX12-NEXT: s_pack_ll_b32_b16 s0, s0, s1
+; GFX12-NEXT: v_mov_b32_e32 v0, s0
+; GFX12-NEXT: ; return to shader part epilog
+ %fsub = fsub <2 x half> %a, %b
+ ret <2 x half> %fsub
+}
+
+define amdgpu_ps <2 x half> @fsub_v2s16_div(<2 x half> %a, <2 x half> %b) {
+; GCN-LABEL: fsub_v2s16_div:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
+; GCN-NEXT: ; return to shader part epilog
+ %fsub = fsub <2 x half> %a, %b
+ ret <2 x half> %fsub
+}
+
+define amdgpu_ps <2 x float> @fsub_v2s32_uniform(<2 x float> inreg %a, <2 x float> inreg %b) {
+; GFX11-LABEL: fsub_v2s32_uniform:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_sub_f32_e64 v0, s0, s2
+; GFX11-NEXT: v_sub_f32_e64 v1, s1, s3
+; GFX11-NEXT: ; return to shader part epilog
+;
+; GFX12-LABEL: fsub_v2s32_uniform:
+; GFX12: ; %bb.0:
+; GFX12-NEXT: s_sub_f32 s0, s0, s2
+; GFX12-NEXT: s_sub_f32 s1, s1, s3
+; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-NEXT: ; return to shader part epilog
+ %fsub = fsub <2 x float> %a, %b
+ ret <2 x float> %fsub
+}
+
+define amdgpu_ps <2 x float> @fsub_v2s32_div(<2 x float> %a, <2 x float> %b) {
+; GCN-LABEL: fsub_v2s32_div:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
+; GCN-NEXT: ; return to shader part epilog
+ %fsub = fsub <2 x float> %a, %b
+ ret <2 x float> %fsub
+}
+
+define amdgpu_ps float @fsub_fneg_s32(float %a, float %b) {
+; GCN-LABEL: fsub_fneg_s32:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_add_f32_e32 v0, v0, v1
+; GCN-NEXT: ; return to shader part epilog
+ %neg = fneg float %b
+ %result = fsub float %a, %neg
+ ret float %result
+}
+
+define amdgpu_ps float @fneg_fsub_s32(float %a, float %b) {
+; GCN-LABEL: fneg_fsub_s32:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_sub_f32_e64 v0, -v0, v1
+; GCN-NEXT: ; return to shader part epilog
+ %neg = fneg float %a
+ %result = fsub float %neg, %b
+ ret float %result
+}
+
+define amdgpu_ps float @fsub_fabs_s32(float %a, float %b) {
+; GCN-LABEL: fsub_fabs_s32:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_sub_f32_e64 v0, v0, |v1|
+; GCN-NEXT: ; return to shader part epilog
+ %abs = call float @llvm.fabs.f32(float %b)
+ %result = fsub float %a, %abs
+ ret float %result
+}
+
+define amdgpu_ps float @fabs_fsub_s32(float %a, float %b) {
+; GCN-LABEL: fabs_fsub_s32:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_sub_f32_e64 v0, |v0|, v1
+; GCN-NEXT: ; return to shader part epilog
+ %abs = call float @llvm.fabs.f32(float %a)
+ %result = fsub float %abs, %b
+ ret float %result
+}
+
+define amdgpu_ps float @fsub_fneg_fabs_s32(float %a, float %b) {
+; GCN-LABEL: fsub_fneg_fabs_s32:
+; GCN: ; %bb.0:
+; GCN-NEXT: v_add_f32_e64 v0, v0, |v1|
+; GCN-NEXT: ; return to shader part epilog
+ %abs = call float @llvm.fabs.f32(float %b)
+ %neg = fneg float %abs
+ %result = fsub float %a, %neg
+ ret float %result
+}
+
+declare float @llvm.fabs.f32(float)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsub.mir
index 33b39180d22ea..392bb486a80e2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsub.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsub.mir
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
---
name: fsub_ss
diff --git a/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll b/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll
index 85286841cbcac..2629fb3ff0a73 100644
--- a/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll
@@ -2,6 +2,8 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=CHECK,SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=CHECK,GISEL %s
+; TODO: Switch test to use -new-reg-bank-select after adding G_FCANONICALIZE support.
+
; Test that fneg is folded into source modifiers when it wasn't
; possible to fold fsub to fneg without context.
|
petar-avramovic
approved these changes
Dec 10, 2025
arsenm
reviewed
Dec 10, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.