Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -948,6 +948,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
hasSALUFloat)
.Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});

addRulesForGOpcs({G_FSUB}, Standard)
.Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
.Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
.Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
.Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
.Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
.Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat);

// FNEG and FABS are either folded as source modifiers or can be selected as
// bitwise XOR and AND with Mask. XOR and AND are available on SALU but for
// targets without SALU float we still select them as VGPR since there would
Expand Down
220 changes: 220 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/fsub.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-FAKE16 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-TRUE16 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=-real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-FAKE16 %s
; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mattr=+real-true16 -mcpu=gfx1200 -o - %s | FileCheck -check-prefixes=GCN,GFX12,GFX12-TRUE16 %s

define amdgpu_ps half @fsub_s16_uniform(half inreg %a, half inreg %b) {
; GFX11-FAKE16-LABEL: fsub_s16_uniform:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: v_sub_f16_e64 v0, s0, s1
; GFX11-FAKE16-NEXT: ; return to shader part epilog
;
; GFX11-TRUE16-LABEL: fsub_s16_uniform:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: v_sub_f16_e64 v0.l, s0, s1
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: fsub_s16_uniform:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_sub_f16 s0, s0, s1
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
%fsub = fsub half %a, %b
ret half %fsub
}

define amdgpu_ps half @fsub_s16_div(half %a, half %b) {
; GFX11-FAKE16-LABEL: fsub_s16_div:
; GFX11-FAKE16: ; %bb.0:
; GFX11-FAKE16-NEXT: v_sub_f16_e32 v0, v0, v1
; GFX11-FAKE16-NEXT: ; return to shader part epilog
;
; GFX11-TRUE16-LABEL: fsub_s16_div:
; GFX11-TRUE16: ; %bb.0:
; GFX11-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.l, v1.l
; GFX11-TRUE16-NEXT: ; return to shader part epilog
;
; GFX12-FAKE16-LABEL: fsub_s16_div:
; GFX12-FAKE16: ; %bb.0:
; GFX12-FAKE16-NEXT: v_sub_f16_e32 v0, v0, v1
; GFX12-FAKE16-NEXT: ; return to shader part epilog
;
; GFX12-TRUE16-LABEL: fsub_s16_div:
; GFX12-TRUE16: ; %bb.0:
; GFX12-TRUE16-NEXT: v_sub_f16_e32 v0.l, v0.l, v1.l
; GFX12-TRUE16-NEXT: ; return to shader part epilog
%fsub = fsub half %a, %b
ret half %fsub
}

define amdgpu_ps float @fsub_s32_uniform(float inreg %a, float inreg %b) {
; GFX11-LABEL: fsub_s32_uniform:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_sub_f32_e64 v0, s0, s1
; GFX11-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: fsub_s32_uniform:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_sub_f32 s0, s0, s1
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
%fsub = fsub float %a, %b
ret float %fsub
}

define amdgpu_ps float @fsub_s32_div(float %a, float %b) {
; GCN-LABEL: fsub_s32_div:
; GCN: ; %bb.0:
; GCN-NEXT: v_sub_f32_e32 v0, v0, v1
; GCN-NEXT: ; return to shader part epilog
%fsub = fsub float %a, %b
ret float %fsub
}

define amdgpu_ps void @fsub_s64_uniform(double inreg %a, double inreg %b, ptr addrspace(1) %ptr) {
; GFX11-LABEL: fsub_s64_uniform:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_add_f64 v[2:3], s[0:1], -s[2:3]
; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: fsub_s64_uniform:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_add_f64_e64 v[2:3], s[0:1], -s[2:3]
; GFX12-NEXT: global_store_b64 v[0:1], v[2:3], off
; GFX12-NEXT: s_endpgm
%fsub = fsub double %a, %b
store double %fsub, ptr addrspace(1) %ptr
ret void
}

define amdgpu_ps void @fsub_s64_div(double %a, double %b, ptr addrspace(1) %ptr) {
; GFX11-LABEL: fsub_s64_div:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3]
; GFX11-NEXT: global_store_b64 v[4:5], v[0:1], off
; GFX11-NEXT: s_endpgm
;
; GFX12-LABEL: fsub_s64_div:
; GFX12: ; %bb.0:
; GFX12-NEXT: v_add_f64_e64 v[0:1], v[0:1], -v[2:3]
; GFX12-NEXT: global_store_b64 v[4:5], v[0:1], off
; GFX12-NEXT: s_endpgm
%fsub = fsub double %a, %b
store double %fsub, ptr addrspace(1) %ptr
ret void
}

define amdgpu_ps <2 x half> @fsub_v2s16_uniform(<2 x half> inreg %a, <2 x half> inreg %b) {
; GFX11-LABEL: fsub_v2s16_uniform:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_pk_add_f16 v0, s0, s1 neg_lo:[0,1] neg_hi:[0,1]
; GFX11-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: fsub_v2s16_uniform:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_lshr_b32 s2, s1, 16
; GFX12-NEXT: s_xor_b32 s1, s1, 0x8000
; GFX12-NEXT: s_xor_b32 s2, s2, 0x8000
; GFX12-NEXT: s_lshr_b32 s3, s0, 16
; GFX12-NEXT: s_add_f16 s0, s0, s1
; GFX12-NEXT: s_add_f16 s1, s3, s2
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX12-NEXT: s_pack_ll_b32_b16 s0, s0, s1
; GFX12-NEXT: v_mov_b32_e32 v0, s0
; GFX12-NEXT: ; return to shader part epilog
%fsub = fsub <2 x half> %a, %b
ret <2 x half> %fsub
}

define amdgpu_ps <2 x half> @fsub_v2s16_div(<2 x half> %a, <2 x half> %b) {
; GCN-LABEL: fsub_v2s16_div:
; GCN: ; %bb.0:
; GCN-NEXT: v_pk_add_f16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1]
; GCN-NEXT: ; return to shader part epilog
%fsub = fsub <2 x half> %a, %b
ret <2 x half> %fsub
}

define amdgpu_ps <2 x float> @fsub_v2s32_uniform(<2 x float> inreg %a, <2 x float> inreg %b) {
; GFX11-LABEL: fsub_v2s32_uniform:
; GFX11: ; %bb.0:
; GFX11-NEXT: v_sub_f32_e64 v0, s0, s2
; GFX11-NEXT: v_sub_f32_e64 v1, s1, s3
; GFX11-NEXT: ; return to shader part epilog
;
; GFX12-LABEL: fsub_v2s32_uniform:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_sub_f32 s0, s0, s2
; GFX12-NEXT: s_sub_f32 s1, s1, s3
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX12-NEXT: ; return to shader part epilog
%fsub = fsub <2 x float> %a, %b
ret <2 x float> %fsub
}

define amdgpu_ps <2 x float> @fsub_v2s32_div(<2 x float> %a, <2 x float> %b) {
; GCN-LABEL: fsub_v2s32_div:
; GCN: ; %bb.0:
; GCN-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
; GCN-NEXT: ; return to shader part epilog
%fsub = fsub <2 x float> %a, %b
ret <2 x float> %fsub
}

define amdgpu_ps float @fsub_fneg_s32(float %a, float %b) {
; GCN-LABEL: fsub_fneg_s32:
; GCN: ; %bb.0:
; GCN-NEXT: v_add_f32_e32 v0, v0, v1
; GCN-NEXT: ; return to shader part epilog
%neg = fneg float %b
%result = fsub float %a, %neg
ret float %result
}

define amdgpu_ps float @fneg_fsub_s32(float %a, float %b) {
; GCN-LABEL: fneg_fsub_s32:
; GCN: ; %bb.0:
; GCN-NEXT: v_sub_f32_e64 v0, -v0, v1
; GCN-NEXT: ; return to shader part epilog
%neg = fneg float %a
%result = fsub float %neg, %b
ret float %result
}

define amdgpu_ps float @fsub_fabs_s32(float %a, float %b) {
; GCN-LABEL: fsub_fabs_s32:
; GCN: ; %bb.0:
; GCN-NEXT: v_sub_f32_e64 v0, v0, |v1|
; GCN-NEXT: ; return to shader part epilog
%abs = call float @llvm.fabs.f32(float %b)
%result = fsub float %a, %abs
ret float %result
}

define amdgpu_ps float @fabs_fsub_s32(float %a, float %b) {
; GCN-LABEL: fabs_fsub_s32:
; GCN: ; %bb.0:
; GCN-NEXT: v_sub_f32_e64 v0, |v0|, v1
; GCN-NEXT: ; return to shader part epilog
%abs = call float @llvm.fabs.f32(float %a)
%result = fsub float %abs, %b
ret float %result
}

define amdgpu_ps float @fsub_fneg_fabs_s32(float %a, float %b) {
; GCN-LABEL: fsub_fneg_fabs_s32:
; GCN: ; %bb.0:
; GCN-NEXT: v_add_f32_e64 v0, v0, |v1|
; GCN-NEXT: ; return to shader part epilog
%abs = call float @llvm.fabs.f32(float %b)
%neg = fneg float %abs
%result = fsub float %a, %neg
ret float %result
}

declare float @llvm.fabs.f32(float)
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-fsub.mir
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s

---
name: fsub_ss
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=CHECK,SDAG %s
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=CHECK,GISEL %s

; TODO: Switch test to use -new-reg-bank-select after adding G_FCANONICALIZE support.

; Test that fneg is folded into source modifiers when it wasn't
; possible to fold fsub to fneg without context.

Expand Down