Skip to content

Conversation

@vangthao95
Copy link
Contributor

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Dec 9, 2025

@llvm/pr-subscribers-llvm-globalisel

@llvm/pr-subscribers-backend-amdgpu

Author: None (vangthao95)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/171244.diff

4 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp (+3-1)
  • (added) llvm/test/CodeGen/AMDGPU/GlobalISel/fsub.ll (+220)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsub.mir (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll (+2)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 9de309279a247..f7a54adbade45 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -934,7 +934,9 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
 
   bool hasSALUFloat = ST->hasSALUFloatInsts();
 
-  addRulesForGOpcs({G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL}, Standard)
+  addRulesForGOpcs(
+      {G_FADD, G_FSUB, G_FMUL, G_STRICT_FADD, G_STRICT_FSUB, G_STRICT_FMUL},
+      Standard)
       .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
       .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
       .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fsub.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fsub.ll
new file mode 100644
index 0000000000000..ab3699ddc8b1b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fsub.ll
@@ -0,0 +1,220 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-FAKE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-TRUE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-FAKE16 %s
+; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-TRUE16 %s
+
+define amdgpu_ps half @fsub_s16_uniform(half inreg %a, half inreg %b) {
+; GFX11-FAKE16-LABEL: fsub_s16_uniform:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    v_sub_f16_e64 v0, s0, s1
+; GFX11-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX11-TRUE16-LABEL: fsub_s16_uniform:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    v_sub_f16_e64 v0.l, s0, s1
+; GFX11-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX12-LABEL: fsub_s16_uniform:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_sub_f16 s0, s0, s1
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT:    v_mov_b32_e32 v0, s0
+; GFX12-NEXT:    ; return to shader part epilog
+  %fsub = fsub half %a, %b
+  ret half %fsub
+}
+
+define amdgpu_ps half @fsub_s16_div(half %a, half %b) {
+; GFX11-FAKE16-LABEL: fsub_s16_div:
+; GFX11-FAKE16:       ; %bb.0:
+; GFX11-FAKE16-NEXT:    v_sub_f16_e32 v0, v0, v1
+; GFX11-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX11-TRUE16-LABEL: fsub_s16_div:
+; GFX11-TRUE16:       ; %bb.0:
+; GFX11-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v0.l, v1.l
+; GFX11-TRUE16-NEXT:    ; return to shader part epilog
+;
+; GFX12-FAKE16-LABEL: fsub_s16_div:
+; GFX12-FAKE16:       ; %bb.0:
+; GFX12-FAKE16-NEXT:    v_sub_f16_e32 v0, v0, v1
+; GFX12-FAKE16-NEXT:    ; return to shader part epilog
+;
+; GFX12-TRUE16-LABEL: fsub_s16_div:
+; GFX12-TRUE16:       ; %bb.0:
+; GFX12-TRUE16-NEXT:    v_sub_f16_e32 v0.l, v0.l, v1.l
+; GFX12-TRUE16-NEXT:    ; return to shader part epilog
+  %fsub = fsub half %a, %b
+  ret half %fsub
+}
+
+define amdgpu_ps float @fsub_s32_uniform(float inreg %a, float inreg %b) {
+; GFX11-LABEL: fsub_s32_uniform:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_sub_f32_e64 v0, s0, s1
+; GFX11-NEXT:    ; return to shader part epilog
+;
+; GFX12-LABEL: fsub_s32_uniform:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_sub_f32 s0, s0, s1
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT:    v_mov_b32_e32 v0, s0
+; GFX12-NEXT:    ; return to shader part epilog
+  %fsub = fsub float %a, %b
+  ret float %fsub
+}
+
+define amdgpu_ps float @fsub_s32_div(float %a, float %b) {
+; GCN-LABEL: fsub_s32_div:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_sub_f32_e32 v0, v0, v1
+; GCN-NEXT:    ; return to shader part epilog
+  %fsub = fsub float %a, %b
+  ret float %fsub
+}
+
+define amdgpu_ps void @fsub_s64_uniform(double inreg %a, double inreg %b, ptr addrspace(1) %ptr) {
+; GFX11-LABEL: fsub_s64_uniform:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_add_f64 v[2:3], s[0:1], -s[2:3]
+; GFX11-NEXT:    global_store_b64 v[0:1], v[2:3], off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fsub_s64_uniform:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    v_add_f64_e64 v[2:3], s[0:1], -s[2:3]
+; GFX12-NEXT:    global_store_b64 v[0:1], v[2:3], off
+; GFX12-NEXT:    s_endpgm
+  %fsub = fsub double %a, %b
+  store double %fsub, ptr addrspace(1) %ptr
+  ret void
+}
+
+define amdgpu_ps void @fsub_s64_div(double %a, double %b, ptr addrspace(1) %ptr) {
+; GFX11-LABEL: fsub_s64_div:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
+; GFX11-NEXT:    global_store_b64 v[4:5], v[0:1], off
+; GFX11-NEXT:    s_endpgm
+;
+; GFX12-LABEL: fsub_s64_div:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    v_add_f64_e64 v[0:1], v[0:1], -v[2:3]
+; GFX12-NEXT:    global_store_b64 v[4:5], v[0:1], off
+; GFX12-NEXT:    s_endpgm
+  %fsub = fsub double %a, %b
+  store double %fsub, ptr addrspace(1) %ptr
+  ret void
+}
+
+define amdgpu_ps <2 x half> @fsub_v2s16_uniform(<2 x half> inreg %a, <2 x half> inreg %b) {
+; GFX11-LABEL: fsub_v2s16_uniform:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_pk_add_f16 v0, s0, s1 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-NEXT:    ; return to shader part epilog
+;
+; GFX12-LABEL: fsub_v2s16_uniform:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_lshr_b32 s2, s1, 16
+; GFX12-NEXT:    s_xor_b32 s1, s1, 0x8000
+; GFX12-NEXT:    s_xor_b32 s2, s2, 0x8000
+; GFX12-NEXT:    s_lshr_b32 s3, s0, 16
+; GFX12-NEXT:    s_add_f16 s0, s0, s1
+; GFX12-NEXT:    s_add_f16 s1, s3, s2
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX12-NEXT:    s_pack_ll_b32_b16 s0, s0, s1
+; GFX12-NEXT:    v_mov_b32_e32 v0, s0
+; GFX12-NEXT:    ; return to shader part epilog
+  %fsub = fsub <2 x half> %a, %b
+  ret <2 x half> %fsub
+}
+
+define amdgpu_ps <2 x half> @fsub_v2s16_div(<2 x half> %a, <2 x half> %b) {
+; GCN-LABEL: fsub_v2s16_div:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
+; GCN-NEXT:    ; return to shader part epilog
+  %fsub = fsub <2 x half> %a, %b
+  ret <2 x half> %fsub
+}
+
+define amdgpu_ps <2 x float> @fsub_v2s32_uniform(<2 x float> inreg %a, <2 x float> inreg %b) {
+; GFX11-LABEL: fsub_v2s32_uniform:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_sub_f32_e64 v0, s0, s2
+; GFX11-NEXT:    v_sub_f32_e64 v1, s1, s3
+; GFX11-NEXT:    ; return to shader part epilog
+;
+; GFX12-LABEL: fsub_v2s32_uniform:
+; GFX12:       ; %bb.0:
+; GFX12-NEXT:    s_sub_f32 s0, s0, s2
+; GFX12-NEXT:    s_sub_f32 s1, s1, s3
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_3)
+; GFX12-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-NEXT:    ; return to shader part epilog
+  %fsub = fsub <2 x float> %a, %b
+  ret <2 x float> %fsub
+}
+
+define amdgpu_ps <2 x float> @fsub_v2s32_div(<2 x float> %a, <2 x float> %b) {
+; GCN-LABEL: fsub_v2s32_div:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
+; GCN-NEXT:    ; return to shader part epilog
+  %fsub = fsub <2 x float> %a, %b
+  ret <2 x float> %fsub
+}
+
+define amdgpu_ps float @fsub_fneg_s32(float %a, float %b) {
+; GCN-LABEL: fsub_fneg_s32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_add_f32_e32 v0, v0, v1
+; GCN-NEXT:    ; return to shader part epilog
+  %neg = fneg float %b
+  %result = fsub float %a, %neg
+  ret float %result
+}
+
+define amdgpu_ps float @fneg_fsub_s32(float %a, float %b) {
+; GCN-LABEL: fneg_fsub_s32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_sub_f32_e64 v0, -v0, v1
+; GCN-NEXT:    ; return to shader part epilog
+  %neg = fneg float %a
+  %result = fsub float %neg, %b
+  ret float %result
+}
+
+define amdgpu_ps float @fsub_fabs_s32(float %a, float %b) {
+; GCN-LABEL: fsub_fabs_s32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_sub_f32_e64 v0, v0, |v1|
+; GCN-NEXT:    ; return to shader part epilog
+  %abs = call float @llvm.fabs.f32(float %b)
+  %result = fsub float %a, %abs
+  ret float %result
+}
+
+define amdgpu_ps float @fabs_fsub_s32(float %a, float %b) {
+; GCN-LABEL: fabs_fsub_s32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_sub_f32_e64 v0, |v0|, v1
+; GCN-NEXT:    ; return to shader part epilog
+  %abs = call float @llvm.fabs.f32(float %a)
+  %result = fsub float %abs, %b
+  ret float %result
+}
+
+define amdgpu_ps float @fsub_fneg_fabs_s32(float %a, float %b) {
+; GCN-LABEL: fsub_fneg_fabs_s32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    v_add_f32_e64 v0, v0, |v1|
+; GCN-NEXT:    ; return to shader part epilog
+  %abs = call float @llvm.fabs.f32(float %b)
+  %neg = fneg float %abs
+  %result = fsub float %a, %neg
+  ret float %result
+}
+
+declare float @llvm.fabs.f32(float)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsub.mir
index 33b39180d22ea..392bb486a80e2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsub.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsub.mir
@@ -1,6 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
-# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
 
 ---
 name: fsub_ss
diff --git a/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll b/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll
index 85286841cbcac..2629fb3ff0a73 100644
--- a/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll
@@ -2,6 +2,8 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=CHECK,SDAG %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=CHECK,GISEL %s
 
+; TODO: Switch test to use -new-reg-bank-select after adding G_FCANONICALIZE support.
+
 ; Test that fneg is folded into source modifiers when it wasn't
 ; possible to fold fsub to fneg without context.
 

@vangthao95 vangthao95 merged commit 854ef8d into llvm:main Dec 11, 2025
10 checks passed
@vangthao95 vangthao95 deleted the globalisel-fsub branch December 11, 2025 19:55
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants