Skip to content

Commit 03118af

Browse files
committed
[AArch64] Reorder Comparison Trees to Facilitate CSE
The AArch64 backend converts trees formed by conjunctions/disjunctions of comparisons into sequences of `CCMP` instructions. The implementation before this change checks whether a sub-tree must be processed first. If not, it processes the operations in the order they occur in the DAG. This may not be optimal if there is a corresponding `SUB` node for one of the comparisons. In this case, we should process this comparison first because we can then use the same instruction for the `SUB` node and the comparison. To achieve this, this commit comprises the following changes: - Extend `canEmitConjunction` with a new output parameter `PreferFirst`, which reports to the caller whether the sub-tree should preferably be processed first. - Set `PreferFirst` to `true` if we can find a corresponding `SUB` node in the DAG. - If we can process a sub-tree with `PreferFirst = true` first (i.e., we do not violate any `MustBeFirst` constraint by doing so), we swap the sub-trees. - The already existing code for performing the common subexpression elimination takes care to use only a single instruction for the comparison and the `SUB` node if possible. Closes #149685.
1 parent 39b54ba commit 03118af

File tree

2 files changed

+44
-29
lines changed

2 files changed

+44
-29
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3874,22 +3874,29 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
38743874
/// \param MustBeFirst Set to true if this subtree needs to be negated and we
38753875
/// cannot do the negation naturally. We are required to
38763876
/// emit the subtree first in this case.
3877+
/// \param PreferFirst Set to true if processing this subtree first may
3878+
/// result in more efficient code.
38773879
/// \param WillNegate Is true if are called when the result of this
38783880
/// subexpression must be negated. This happens when the
38793881
/// outer expression is an OR. We can use this fact to know
38803882
/// that we have a double negation (or (or ...) ...) that
38813883
/// can be implemented for free.
3882-
static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
3883-
bool &MustBeFirst, bool WillNegate,
3884+
static bool canEmitConjunction(SelectionDAG& DAG, const SDValue Val, bool &CanNegate,
3885+
bool &MustBeFirst, bool &PreferFirst, bool WillNegate,
38843886
unsigned Depth = 0) {
38853887
if (!Val.hasOneUse())
38863888
return false;
38873889
unsigned Opcode = Val->getOpcode();
38883890
if (Opcode == ISD::SETCC) {
3889-
if (Val->getOperand(0).getValueType() == MVT::f128)
3891+
EVT VT = Val->getOperand(0).getValueType();
3892+
if (VT == MVT::f128)
38903893
return false;
38913894
CanNegate = true;
38923895
MustBeFirst = false;
3896+
// Designate this operation as a preferred first operation if the result
3897+
// of a SUB operation can be reused.
3898+
PreferFirst = DAG.doesNodeExist(ISD::SUB, DAG.getVTList(VT),
3899+
{Val->getOperand(0), Val->getOperand(1)});
38933900
return true;
38943901
}
38953902
// Protect against exponential runtime and stack overflow.
@@ -3901,11 +3908,15 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
39013908
SDValue O1 = Val->getOperand(1);
39023909
bool CanNegateL;
39033910
bool MustBeFirstL;
3904-
if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
3911+
bool PreferFirstL;
3912+
if (!canEmitConjunction(DAG, O0, CanNegateL, MustBeFirstL, PreferFirstL,
3913+
IsOR, Depth + 1))
39053914
return false;
39063915
bool CanNegateR;
39073916
bool MustBeFirstR;
3908-
if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
3917+
bool PreferFirstR;
3918+
if (!canEmitConjunction(DAG, O1, CanNegateR, MustBeFirstR, PreferFirstR,
3919+
IsOR, Depth + 1))
39093920
return false;
39103921

39113922
if (MustBeFirstL && MustBeFirstR)
@@ -3928,6 +3939,7 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
39283939
CanNegate = false;
39293940
MustBeFirst = MustBeFirstL || MustBeFirstR;
39303941
}
3942+
PreferFirst = PreferFirstL || PreferFirstR;
39313943
return true;
39323944
}
39333945
return false;
@@ -3989,19 +4001,25 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
39894001
SDValue LHS = Val->getOperand(0);
39904002
bool CanNegateL;
39914003
bool MustBeFirstL;
3992-
bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
4004+
bool PreferFirstL;
4005+
bool ValidL = canEmitConjunction(DAG, LHS, CanNegateL, MustBeFirstL,
4006+
PreferFirstL, IsOR);
39934007
assert(ValidL && "Valid conjunction/disjunction tree");
39944008
(void)ValidL;
39954009

39964010
SDValue RHS = Val->getOperand(1);
39974011
bool CanNegateR;
39984012
bool MustBeFirstR;
3999-
bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
4013+
bool PreferFirstR;
4014+
bool ValidR = canEmitConjunction(DAG, RHS, CanNegateR, MustBeFirstR,
4015+
PreferFirstR, IsOR);
40004016
assert(ValidR && "Valid conjunction/disjunction tree");
40014017
(void)ValidR;
40024018

4003-
// Swap sub-tree that must come first to the right side.
4004-
if (MustBeFirstL) {
4019+
bool ShouldFirstL = PreferFirstL && !PreferFirstR && !MustBeFirstR;
4020+
4021+
// Swap sub-tree that must or should come first to the right side.
4022+
if (MustBeFirstL || ShouldFirstL) {
40054023
assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
40064024
std::swap(LHS, RHS);
40074025
std::swap(CanNegateL, CanNegateR);
@@ -4057,7 +4075,9 @@ static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
40574075
AArch64CC::CondCode &OutCC) {
40584076
bool DummyCanNegate;
40594077
bool DummyMustBeFirst;
4060-
if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
4078+
bool DummyPreferFirst;
4079+
if (!canEmitConjunction(DAG, Val, DummyCanNegate, DummyMustBeFirst,
4080+
DummyPreferFirst, false))
40614081
return SDValue();
40624082

40634083
return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);

llvm/test/CodeGen/AArch64/ccmp-cse.ll

Lines changed: 14 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,9 @@
44
define i64 @test_single_or(i64 %unrelated, i64 %x, i64 %y) nounwind {
55
; CHECK-LABEL: test_single_or:
66
; CHECK: // %bb.0:
7-
; CHECK-NEXT: cmp x2, x0
8-
; CHECK-NEXT: sub x8, x2, x1
9-
; CHECK-NEXT: ccmp x2, x1, #0, ls
10-
; CHECK-NEXT: csel x0, xzr, x8, lo
7+
; CHECK-NEXT: subs x8, x2, x1
8+
; CHECK-NEXT: ccmp x2, x0, #2, hs
9+
; CHECK-NEXT: csel x0, xzr, x8, hi
1110
; CHECK-NEXT: ret
1211
%cmp.match = icmp ult i64 %y, %x
1312
%cmp.nomatch = icmp ugt i64 %y, %unrelated
@@ -20,11 +19,10 @@ define i64 @test_single_or(i64 %unrelated, i64 %x, i64 %y) nounwind {
2019
define i64 @test_two_ors(i64 %unrelated, i64 %x, i64 %y) nounwind {
2120
; CHECK-LABEL: test_two_ors:
2221
; CHECK: // %bb.0:
23-
; CHECK-NEXT: cmp x2, x0
24-
; CHECK-NEXT: sub x8, x2, x1
25-
; CHECK-NEXT: ccmp x2, x1, #0, ls
22+
; CHECK-NEXT: subs x8, x2, x1
2623
; CHECK-NEXT: ccmp x0, x1, #0, hs
27-
; CHECK-NEXT: csel x0, xzr, x8, lo
24+
; CHECK-NEXT: ccmp x2, x0, #2, hs
25+
; CHECK-NEXT: csel x0, xzr, x8, hi
2826
; CHECK-NEXT: ret
2927
%cmp.match = icmp ult i64 %y, %x
3028
%cmp.nomatch1 = icmp ult i64 %unrelated, %x
@@ -39,11 +37,10 @@ define i64 @test_two_ors(i64 %unrelated, i64 %x, i64 %y) nounwind {
3937
define i64 @test_two_ors_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind {
4038
; CHECK-LABEL: test_two_ors_commuted:
4139
; CHECK: // %bb.0:
42-
; CHECK-NEXT: cmp x2, x0
43-
; CHECK-NEXT: sub x8, x2, x1
44-
; CHECK-NEXT: ccmp x2, x1, #0, ls
40+
; CHECK-NEXT: subs x8, x2, x1
4541
; CHECK-NEXT: ccmp x0, x1, #0, hs
46-
; CHECK-NEXT: csel x0, xzr, x8, lo
42+
; CHECK-NEXT: ccmp x2, x0, #2, hs
43+
; CHECK-NEXT: csel x0, xzr, x8, hi
4744
; CHECK-NEXT: ret
4845
%cmp.match = icmp ult i64 %y, %x
4946
%cmp.nomatch1 = icmp ult i64 %unrelated, %x
@@ -58,10 +55,9 @@ define i64 @test_two_ors_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind {
5855
define i64 @test_single_and(i64 %unrelated, i64 %x, i64 %y) nounwind {
5956
; CHECK-LABEL: test_single_and:
6057
; CHECK: // %bb.0:
61-
; CHECK-NEXT: cmp x2, x0
62-
; CHECK-NEXT: sub x8, x2, x1
63-
; CHECK-NEXT: ccmp x2, x1, #2, hi
64-
; CHECK-NEXT: csel x0, xzr, x8, lo
58+
; CHECK-NEXT: subs x8, x2, x1
59+
; CHECK-NEXT: ccmp x2, x0, #0, lo
60+
; CHECK-NEXT: csel x0, xzr, x8, hi
6561
; CHECK-NEXT: ret
6662
%cmp.match = icmp ult i64 %y, %x
6763
%cmp.nomatch = icmp ugt i64 %y, %unrelated
@@ -74,9 +70,8 @@ define i64 @test_single_and(i64 %unrelated, i64 %x, i64 %y) nounwind {
7470
define i64 @test_single_or_sub_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind {
7571
; CHECK-LABEL: test_single_or_sub_commuted:
7672
; CHECK: // %bb.0:
77-
; CHECK-NEXT: cmp x2, x0
78-
; CHECK-NEXT: sub x8, x1, x2
79-
; CHECK-NEXT: ccmp x1, x2, #2, ls
73+
; CHECK-NEXT: subs x8, x1, x2
74+
; CHECK-NEXT: ccmp x2, x0, #2, ls
8075
; CHECK-NEXT: csel x0, xzr, x8, hi
8176
; CHECK-NEXT: ret
8277
%cmp.match = icmp ult i64 %y, %x

0 commit comments

Comments
 (0)