-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AArch64] Reorder Comparison Trees to Facilitate CSE #168064
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64] Reorder Comparison Trees to Facilitate CSE #168064
Conversation
|
@llvm/pr-subscribers-backend-aarch64 Author: Marius Kamp (mskamp) ChangesThe AArch64 backend converts trees formed by conjunctions/disjunctions This may not be optimal if there is a corresponding To achieve this, this commit comprises the following changes:
Closes #149685. Full diff: https://github.com/llvm/llvm-project/pull/168064.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8aea0d23ffc0a..7f55d579b5fde 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3874,22 +3874,29 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
/// \param MustBeFirst Set to true if this subtree needs to be negated and we
/// cannot do the negation naturally. We are required to
/// emit the subtree first in this case.
+/// \param PreferFirst Set to true if processing this subtree first may
+/// result in more efficient code.
/// \param WillNegate Is true if are called when the result of this
/// subexpression must be negated. This happens when the
/// outer expression is an OR. We can use this fact to know
/// that we have a double negation (or (or ...) ...) that
/// can be implemented for free.
-static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
- bool &MustBeFirst, bool WillNegate,
+static bool canEmitConjunction(SelectionDAG& DAG, const SDValue Val, bool &CanNegate,
+ bool &MustBeFirst, bool &PreferFirst, bool WillNegate,
unsigned Depth = 0) {
if (!Val.hasOneUse())
return false;
unsigned Opcode = Val->getOpcode();
if (Opcode == ISD::SETCC) {
- if (Val->getOperand(0).getValueType() == MVT::f128)
+ EVT VT = Val->getOperand(0).getValueType();
+ if (VT == MVT::f128)
return false;
CanNegate = true;
MustBeFirst = false;
+ // Designate this operation as a preferred first operation if the result
+ // of a SUB operation can be reused.
+ PreferFirst = DAG.doesNodeExist(ISD::SUB, DAG.getVTList(VT),
+ {Val->getOperand(0), Val->getOperand(1)});
return true;
}
// Protect against exponential runtime and stack overflow.
@@ -3901,11 +3908,15 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
SDValue O1 = Val->getOperand(1);
bool CanNegateL;
bool MustBeFirstL;
- if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
+ bool PreferFirstL;
+ if (!canEmitConjunction(DAG, O0, CanNegateL, MustBeFirstL, PreferFirstL,
+ IsOR, Depth + 1))
return false;
bool CanNegateR;
bool MustBeFirstR;
- if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
+ bool PreferFirstR;
+ if (!canEmitConjunction(DAG, O1, CanNegateR, MustBeFirstR, PreferFirstR,
+ IsOR, Depth + 1))
return false;
if (MustBeFirstL && MustBeFirstR)
@@ -3928,6 +3939,7 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
CanNegate = false;
MustBeFirst = MustBeFirstL || MustBeFirstR;
}
+ PreferFirst = PreferFirstL || PreferFirstR;
return true;
}
return false;
@@ -3989,19 +4001,25 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
SDValue LHS = Val->getOperand(0);
bool CanNegateL;
bool MustBeFirstL;
- bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
+ bool PreferFirstL;
+ bool ValidL = canEmitConjunction(DAG, LHS, CanNegateL, MustBeFirstL,
+ PreferFirstL, IsOR);
assert(ValidL && "Valid conjunction/disjunction tree");
(void)ValidL;
SDValue RHS = Val->getOperand(1);
bool CanNegateR;
bool MustBeFirstR;
- bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
+ bool PreferFirstR;
+ bool ValidR = canEmitConjunction(DAG, RHS, CanNegateR, MustBeFirstR,
+ PreferFirstR, IsOR);
assert(ValidR && "Valid conjunction/disjunction tree");
(void)ValidR;
- // Swap sub-tree that must come first to the right side.
- if (MustBeFirstL) {
+ bool ShouldFirstL = PreferFirstL && !PreferFirstR && !MustBeFirstR;
+
+ // Swap sub-tree that must or should come first to the right side.
+ if (MustBeFirstL || ShouldFirstL) {
assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
std::swap(LHS, RHS);
std::swap(CanNegateL, CanNegateR);
@@ -4057,7 +4075,9 @@ static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
AArch64CC::CondCode &OutCC) {
bool DummyCanNegate;
bool DummyMustBeFirst;
- if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
+ bool DummyPreferFirst;
+ if (!canEmitConjunction(DAG, Val, DummyCanNegate, DummyMustBeFirst,
+ DummyPreferFirst, false))
return SDValue();
return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
diff --git a/llvm/test/CodeGen/AArch64/ccmp-cse.ll b/llvm/test/CodeGen/AArch64/ccmp-cse.ll
new file mode 100644
index 0000000000000..657498172a04c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ccmp-cse.ll
@@ -0,0 +1,139 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
+
+define i64 @test_single_or(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_single_or:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs x8, x2, x1
+; CHECK-NEXT: ccmp x2, x0, #2, hs
+; CHECK-NEXT: csel x0, xzr, x8, hi
+; CHECK-NEXT: ret
+ %cmp.match = icmp ult i64 %y, %x
+ %cmp.nomatch = icmp ugt i64 %y, %unrelated
+ %or.cond = or i1 %cmp.match, %cmp.nomatch
+ %sub.reuse = sub nuw i64 %y, %x
+ %res = select i1 %or.cond, i64 0, i64 %sub.reuse
+ ret i64 %res
+}
+
+define i64 @test_two_ors(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_two_ors:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs x8, x2, x1
+; CHECK-NEXT: ccmp x0, x1, #0, hs
+; CHECK-NEXT: ccmp x2, x0, #2, hs
+; CHECK-NEXT: csel x0, xzr, x8, hi
+; CHECK-NEXT: ret
+ %cmp.match = icmp ult i64 %y, %x
+ %cmp.nomatch1 = icmp ult i64 %unrelated, %x
+ %cmp.nomatch2 = icmp ugt i64 %y, %unrelated
+ %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2
+ %or.cond = or i1 %cmp.match, %or.nomatch
+ %sub.reuse = sub nuw i64 %y, %x
+ %res = select i1 %or.cond, i64 0, i64 %sub.reuse
+ ret i64 %res
+}
+
+define i64 @test_two_ors_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_two_ors_commuted:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs x8, x2, x1
+; CHECK-NEXT: ccmp x0, x1, #0, hs
+; CHECK-NEXT: ccmp x2, x0, #2, hs
+; CHECK-NEXT: csel x0, xzr, x8, hi
+; CHECK-NEXT: ret
+ %cmp.match = icmp ult i64 %y, %x
+ %cmp.nomatch1 = icmp ult i64 %unrelated, %x
+ %cmp.nomatch2 = icmp ugt i64 %y, %unrelated
+ %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2
+ %or.cond = or i1 %or.nomatch, %cmp.match
+ %sub.reuse = sub nuw i64 %y, %x
+ %res = select i1 %or.cond, i64 0, i64 %sub.reuse
+ ret i64 %res
+}
+
+define i64 @test_single_and(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_single_and:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs x8, x2, x1
+; CHECK-NEXT: ccmp x2, x0, #0, lo
+; CHECK-NEXT: csel x0, xzr, x8, hi
+; CHECK-NEXT: ret
+ %cmp.match = icmp ult i64 %y, %x
+ %cmp.nomatch = icmp ugt i64 %y, %unrelated
+ %and.cond = and i1 %cmp.match, %cmp.nomatch
+ %sub.reuse = sub nuw i64 %y, %x
+ %res = select i1 %and.cond, i64 0, i64 %sub.reuse
+ ret i64 %res
+}
+
+define i64 @test_single_or_sub_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_single_or_sub_commuted:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs x8, x1, x2
+; CHECK-NEXT: ccmp x2, x0, #2, ls
+; CHECK-NEXT: csel x0, xzr, x8, hi
+; CHECK-NEXT: ret
+ %cmp.match = icmp ult i64 %y, %x
+ %cmp.nomatch = icmp ugt i64 %y, %unrelated
+ %or.cond = or i1 %cmp.match, %cmp.nomatch
+ %sub.reuse = sub nuw i64 %x, %y
+ %res = select i1 %or.cond, i64 0, i64 %sub.reuse
+ ret i64 %res
+}
+
+; Negative test: We must negate the or operation, hence this must come first.
+define i64 @test_mustbefirst_overrides_preferfirst_negative(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_mustbefirst_overrides_preferfirst_negative:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmp x2, x0
+; CHECK-NEXT: sub x8, x2, x1
+; CHECK-NEXT: ccmp x0, x1, #0, ls
+; CHECK-NEXT: ccmp x2, x1, #2, lo
+; CHECK-NEXT: csel x0, xzr, x8, lo
+; CHECK-NEXT: ret
+ %cmp.match = icmp ult i64 %y, %x
+ %cmp.nomatch1 = icmp ult i64 %unrelated, %x
+ %cmp.nomatch2 = icmp ugt i64 %y, %unrelated
+ %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2
+ %and.cond = and i1 %or.nomatch, %cmp.match
+ %sub.reuse = sub nuw i64 %y, %x
+ %res = select i1 %and.cond, i64 0, i64 %sub.reuse
+ ret i64 %res
+}
+
+; Negative test: There is no analogue of SUBS for floating point.
+define float @test_negative_float(float %unrelated, float %x, float %y) nounwind {
+; CHECK-LABEL: test_negative_float:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcmp s2, s0
+; CHECK-NEXT: fsub s0, s2, s1
+; CHECK-NEXT: movi d3, #0000000000000000
+; CHECK-NEXT: fccmp s2, s1, #8, le
+; CHECK-NEXT: fcsel s0, s3, s0, mi
+; CHECK-NEXT: ret
+ %cmp.nomatch1 = fcmp olt float %y, %x
+ %cmp.nomatch2 = fcmp ogt float %y, %unrelated
+ %or.cond = or i1 %cmp.nomatch1, %cmp.nomatch2
+ %sub.noreuse = fsub float %y, %x
+ %res = select i1 %or.cond, float 0.0, float %sub.noreuse
+ ret float %res
+}
+
+; Negative test: If both operands match a sub, do not reorder them.
+define i64 @test_prefer_right_negative(i64 %x, i64 %y, i64 %z) nounwind {
+; CHECK-LABEL: test_prefer_right_negative:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmp x2, x0
+; CHECK-NEXT: ccmp x2, x1, #0, ls
+; CHECK-NEXT: csel x8, x0, x1, lo
+; CHECK-NEXT: sub x0, x2, x8
+; CHECK-NEXT: ret
+ %cmp.match1 = icmp ult i64 %z, %y
+ %cmp.match2 = icmp ugt i64 %z, %x
+ %or.cond = or i1 %cmp.match1, %cmp.match2
+ %sub.reuse1 = sub nuw i64 %z, %y
+ %sub.reuse2 = sub nuw i64 %z, %x
+ %res = select i1 %or.cond, i64 %sub.reuse2, i64 %sub.reuse1
+ ret i64 %res
+}
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
The AArch64 backend converts trees formed by conjunctions/disjunctions of comparisons into sequences of `CCMP` instructions. The implementation before this change checks whether a sub-tree must be processed first. If not, it processes the operations in the order they occur in the DAG. This may not be optimal if there is a corresponding `SUB` node for one of the comparisons. In this case, we should process this comparison first because we can then use the same instruction for the `SUB` node and the comparison. To achieve this, this commit comprises the following changes: - Extend `canEmitConjunction` with a new output parameter `PreferFirst`, which reports to the caller whether the sub-tree should preferably be processed first. - Set `PreferFirst` to `true` if we can find a corresponding `SUB` node in the DAG. - If we can process a sub-tree with `PreferFirst = true` first (i.e., we do not violate any `MustBeFirst` constraint by doing so), we swap the sub-trees. - The already existing code for performing the common subexpression elimination takes care to use only a single instruction for the comparison and the `SUB` node if possible. Closes llvm#149685.
03118af to
282d182
Compare
davemgreen
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was a little sceptical as sometimes it is better to perform lots of little transforms, canEmitConjunction/emitConjunctionRec tries to do everything all at once. But this appears to work OK.
LGTM.
|
@davemgreen Thank you very much for your review of this PR. Since I don't have commit access, would you mind merging this when you have a chance? |
The AArch64 backend converts trees formed by conjunctions/disjunctions
of comparisons into sequences of
CCMPinstructions. The implementationbefore this change checks whether a sub-tree must be processed first. If
not, it processes the operations in the order they occur in the DAG.
This may not be optimal if there is a corresponding
SUBnode for oneof the comparisons. In this case, we should process this comparison
first because we can then use the same instruction for the
SUBnodeand the comparison.
To achieve this, this commit comprises the following changes:
canEmitConjunctionwith a new output parameterPreferFirst,which reports to the caller whether the sub-tree should preferably be
processed first.
PreferFirsttotrueif we can find a correspondingSUBnodein the DAG.
PreferFirst = truefirst (i.e., wedo not violate any
MustBeFirstconstraint by doing so), we swap thesub-trees.
elimination takes care to use only a single instruction for the
comparison and the
SUBnode if possible.Closes #149685.