Skip to content

Commit 11e457c

Browse files
authored
[X86] Use vectorized i256 bit counts when we know the source originated from the vector unit (#171589)
Currently we only permit i256 CTTZ/CTLZ AVX512 lowering when the source is loadable as GPR->FPU transition costs would outweigh the vectorization benefit. This patch checks for other cases where the source can avoid the GPR - a mayFoldToVector helper checks for a bitcast originally from a vector type, as well as constant values and the original mayFoldLoad check. There will be other cases for the mayFoldToVector helper, but I've just used this for CTTZ/CTLZ initially.
1 parent 9339601 commit 11e457c

File tree

2 files changed

+114
-229
lines changed

2 files changed

+114
-229
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2846,6 +2846,15 @@ bool X86::mayFoldIntoZeroExtend(SDValue Op) {
28462846
return false;
28472847
}
28482848

2849+
// Return true if its cheap to bitcast this to a vector type.
2850+
static bool mayFoldIntoVector(SDValue Op, const X86Subtarget &Subtarget) {
2851+
if (peekThroughBitcasts(Op).getValueType().isVector())
2852+
return true;
2853+
if (isa<ConstantSDNode>(Op) || isa<ConstantFPSDNode>(Op))
2854+
return true;
2855+
return X86::mayFoldLoad(Op, Subtarget);
2856+
}
2857+
28492858
static bool isLogicOp(unsigned Opcode) {
28502859
// TODO: Add support for X86ISD::FAND/FOR/FXOR/FANDN with test coverage.
28512860
return ISD::isBitwiseLogicOp(Opcode) || X86ISD::ANDNP == Opcode;
@@ -33958,7 +33967,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
3395833967
EVT VT = N->getValueType(0);
3395933968
assert(Subtarget.hasCDI() && "AVX512CD required");
3396033969
assert((VT == MVT::i256 || VT == MVT::i512) && "Unexpected VT!");
33961-
if (VT == MVT::i256 && !X86::mayFoldLoad(N0, Subtarget))
33970+
if (VT == MVT::i256 && !mayFoldIntoVector(N0, Subtarget))
3396233971
return;
3396333972

3396433973
unsigned SizeInBits = VT.getSizeInBits();

0 commit comments

Comments
 (0)