Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 10 additions & 14 deletions llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
Original file line number Diff line number Diff line change
Expand Up @@ -251,18 +251,15 @@ struct HistogramInfo {
/// induction variable and the different reduction variables.
class LoopVectorizationLegality {
public:
LoopVectorizationLegality(Loop *L, PredicatedScalarEvolution &PSE,
DominatorTree *DT, TargetTransformInfo *TTI,
TargetLibraryInfo *TLI, Function *F,
LoopAccessInfoManager &LAIs, LoopInfo *LI,
OptimizationRemarkEmitter *ORE,
LoopVectorizationRequirements *R,
LoopVectorizeHints *H, DemandedBits *DB,
AssumptionCache *AC, BlockFrequencyInfo *BFI,
ProfileSummaryInfo *PSI, AAResults *AA)
LoopVectorizationLegality(
Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT,
TargetTransformInfo *TTI, TargetLibraryInfo *TLI, Function *F,
LoopAccessInfoManager &LAIs, LoopInfo *LI, OptimizationRemarkEmitter *ORE,
LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB,
AssumptionCache *AC, bool AllowRuntimeSCEVChecks, AAResults *AA)
: TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT), LAIs(LAIs),
ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC), BFI(BFI), PSI(PSI),
AA(AA) {}
ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC),
AllowRuntimeSCEVChecks(AllowRuntimeSCEVChecks), AA(AA) {}

/// ReductionList contains the reduction descriptors for all
/// of the reductions that were found in the loop.
Expand Down Expand Up @@ -720,9 +717,8 @@ class LoopVectorizationLegality {
/// Hold potentially faulting loads.
SmallPtrSet<const Instruction *, 4> PotentiallyFaultingLoads;

/// BFI and PSI are used to check for profile guided size optimizations.
BlockFrequencyInfo *BFI;
ProfileSummaryInfo *PSI;
/// Whether or not creating SCEV predicates is allowed.
bool AllowRuntimeSCEVChecks;

// Alias Analysis results used to check for possible aliasing with loads
// used in uncountable exit conditions.
Expand Down
4 changes: 1 addition & 3 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -460,10 +460,8 @@ int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
const auto &Strides =
LAI ? LAI->getSymbolicStrides() : DenseMap<Value *, const SCEV *>();

bool CanAddPredicate = !llvm::shouldOptimizeForSize(
TheLoop->getHeader(), PSI, BFI, PGSOQueryType::IRPass);
int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, *DT, Strides,
CanAddPredicate, false)
AllowRuntimeSCEVChecks, false)
.value_or(0);
if (Stride == 1 || Stride == -1)
return Stride;
Expand Down
89 changes: 38 additions & 51 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -501,12 +501,11 @@ class InnerLoopVectorizer {
LoopInfo *LI, DominatorTree *DT,
const TargetTransformInfo *TTI, AssumptionCache *AC,
ElementCount VecWidth, unsigned UnrollFactor,
LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks,
VPlan &Plan)
LoopVectorizationCostModel *CM,
GeneratedRTChecks &RTChecks, VPlan &Plan)
: OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TTI(TTI), AC(AC),
VF(VecWidth), UF(UnrollFactor), Builder(PSE.getSE()->getContext()),
Cost(CM), BFI(BFI), PSI(PSI), RTChecks(RTChecks), Plan(Plan),
Cost(CM), RTChecks(RTChecks), Plan(Plan),
VectorPHVPBB(cast<VPBasicBlock>(
Plan.getVectorLoopRegion()->getSinglePredecessor())) {}

Expand Down Expand Up @@ -583,10 +582,6 @@ class InnerLoopVectorizer {
/// The profitablity analysis.
LoopVectorizationCostModel *Cost;

/// BFI and PSI are used to check for profile guided size optimizations.
BlockFrequencyInfo *BFI;
ProfileSummaryInfo *PSI;

/// Structure to hold information about generated runtime checks, responsible
/// for cleaning the checks, if vectorization turns out unprofitable.
GeneratedRTChecks &RTChecks;
Expand Down Expand Up @@ -635,11 +630,10 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer {
Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI,
DominatorTree *DT, const TargetTransformInfo *TTI, AssumptionCache *AC,
EpilogueLoopVectorizationInfo &EPI, LoopVectorizationCostModel *CM,
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
GeneratedRTChecks &Checks, VPlan &Plan, ElementCount VecWidth,
ElementCount MinProfitableTripCount, unsigned UnrollFactor)
: InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TTI, AC, VecWidth,
UnrollFactor, CM, BFI, PSI, Checks, Plan),
UnrollFactor, CM, Checks, Plan),
EPI(EPI), MinProfitableTripCount(MinProfitableTripCount) {}

/// Holds and updates state information required to vectorize the main loop
Expand All @@ -665,10 +659,9 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
AssumptionCache *AC,
EpilogueLoopVectorizationInfo &EPI,
LoopVectorizationCostModel *CM,
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
GeneratedRTChecks &Check, VPlan &Plan)
: InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TTI, AC, EPI, CM,
BFI, PSI, Check, Plan, EPI.MainLoopVF,
Check, Plan, EPI.MainLoopVF,
EPI.MainLoopVF, EPI.MainLoopUF) {}
/// Implements the interface for creating a vectorized skeleton using the
/// *main loop* strategy (i.e., the first pass of VPlan execution).
Expand Down Expand Up @@ -698,14 +691,15 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
// their epilogues.
class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
public:
EpilogueVectorizerEpilogueLoop(
Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI,
DominatorTree *DT, const TargetTransformInfo *TTI, AssumptionCache *AC,
EpilogueLoopVectorizationInfo &EPI, LoopVectorizationCostModel *CM,
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
GeneratedRTChecks &Checks, VPlan &Plan)
EpilogueVectorizerEpilogueLoop(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
LoopInfo *LI, DominatorTree *DT,
const TargetTransformInfo *TTI,
AssumptionCache *AC,
EpilogueLoopVectorizationInfo &EPI,
LoopVectorizationCostModel *CM,
GeneratedRTChecks &Checks, VPlan &Plan)
: InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TTI, AC, EPI, CM,
BFI, PSI, Checks, Plan, EPI.EpilogueVF,
Checks, Plan, EPI.EpilogueVF,
EPI.EpilogueVF, EPI.EpilogueUF) {}
/// Implements the interface for creating a vectorized skeleton using the
/// *epilogue loop* strategy (i.e., the second pass of VPlan execution).
Expand Down Expand Up @@ -881,18 +875,13 @@ class LoopVectorizationCostModel {
AssumptionCache *AC,
OptimizationRemarkEmitter *ORE, const Function *F,
const LoopVectorizeHints *Hints,
InterleavedAccessInfo &IAI,
ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
InterleavedAccessInfo &IAI, bool OptForSize)
: ScalarEpilogueStatus(SEL), TheLoop(L), PSE(PSE), LI(LI), Legal(Legal),
TTI(TTI), TLI(TLI), DB(DB), AC(AC), ORE(ORE), TheFunction(F),
Hints(Hints), InterleaveInfo(IAI) {
Hints(Hints), InterleaveInfo(IAI), OptForSize(OptForSize) {
if (TTI.supportsScalableVectors() || ForceTargetSupportsScalableVectors)
initializeVScaleForTuning();
CostKind = F->hasMinSize() ? TTI::TCK_CodeSize : TTI::TCK_RecipThroughput;
// Query this against the original loop and save it here because the profile
// of the original loop header may change as the transformation happens.
OptForSize = llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
PGSOQueryType::IRPass);
}

/// \return An upper bound for the vectorization factors (both fixed and
Expand Down Expand Up @@ -9048,20 +9037,13 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
// predication, and 4) a TTI hook that analyses whether the loop is suitable
// for predication.
static ScalarEpilogueLowering getScalarEpilogueLowering(
Function *F, Loop *L, LoopVectorizeHints &Hints, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI, TargetTransformInfo *TTI, TargetLibraryInfo *TLI,
Function *F, Loop *L, LoopVectorizeHints &Hints, bool OptForSize,
TargetTransformInfo *TTI, TargetLibraryInfo *TLI,
LoopVectorizationLegality &LVL, InterleavedAccessInfo *IAI) {
// 1) OptSize takes precedence over all other options, i.e. if this is set,
// don't look at hints or options, and don't request a scalar epilogue.
// (For PGSO, as shouldOptimizeForSize isn't currently accessible from
// LoopAccessInfo (due to code dependency and not being able to reliably get
// PSI/BFI from a loop analysis under NPM), we cannot suppress the collection
// of strides in LoopAccessInfo::analyzeLoop() and vectorize without
// versioning when the vectorization is forced, unlike hasOptSize. So revert
// back to the old way and vectorize with versioning when forced. See D81345.)
if (F->hasOptSize() || (llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
PGSOQueryType::IRPass) &&
Hints.getForce() != LoopVectorizeHints::FK_Enabled))
if (F->hasOptSize() ||
(OptForSize && Hints.getForce() != LoopVectorizeHints::FK_Enabled))
return CM_ScalarEpilogueNotAllowedOptSize;

// 2) If set, obey the directives
Expand Down Expand Up @@ -9100,8 +9082,7 @@ static bool processLoopInVPlanNativePath(
Loop *L, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT,
LoopVectorizationLegality *LVL, TargetTransformInfo *TTI,
TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC,
OptimizationRemarkEmitter *ORE, BlockFrequencyInfo *BFI,
ProfileSummaryInfo *PSI, LoopVectorizeHints &Hints,
OptimizationRemarkEmitter *ORE, bool OptForSize, LoopVectorizeHints &Hints,
LoopVectorizationRequirements &Requirements) {

if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
Expand All @@ -9113,10 +9094,10 @@ static bool processLoopInVPlanNativePath(
InterleavedAccessInfo IAI(PSE, L, DT, LI, LVL->getLAI());

ScalarEpilogueLowering SEL =
getScalarEpilogueLowering(F, L, Hints, PSI, BFI, TTI, TLI, *LVL, &IAI);
getScalarEpilogueLowering(F, L, Hints, OptForSize, TTI, TLI, *LVL, &IAI);

LoopVectorizationCostModel CM(SEL, L, PSE, LI, LVL, *TTI, TLI, DB, AC, ORE, F,
&Hints, IAI, PSI, BFI);
&Hints, IAI, OptForSize);
// Use the planner for outer loop vectorization.
// TODO: CM is not used at this point inside the planner. Turn CM into an
// optional argument if we don't need it in the future.
Expand All @@ -9142,7 +9123,7 @@ static bool processLoopInVPlanNativePath(
{
GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(), CM.CostKind);
InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, /*UF=*/1, &CM,
BFI, PSI, Checks, BestPlan);
Checks, BestPlan);
LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
<< L->getHeader()->getParent()->getName() << "\"\n");
LVP.addMinimumIterationCheck(BestPlan, VF.Width, /*UF=*/1,
Expand Down Expand Up @@ -9803,10 +9784,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {

PredicatedScalarEvolution PSE(*SE, *L);

// Query this against the original loop and save it here because the profile
// of the original loop header may change as the transformation happens.
bool OptForSize = llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
PGSOQueryType::IRPass);

// Check if it is legal to vectorize the loop.
LoopVectorizationRequirements Requirements;
LoopVectorizationLegality LVL(L, PSE, DT, TTI, TLI, F, *LAIs, LI, ORE,
&Requirements, &Hints, DB, AC, BFI, PSI, AA);
&Requirements, &Hints, DB, AC,
/*AllowRuntimeSCEVChecks=*/!OptForSize, AA);
if (!LVL.canVectorize(EnableVPlanNativePath)) {
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
Hints.emitRemarkWithHints();
Expand Down Expand Up @@ -9834,7 +9821,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// pipeline.
if (!L->isInnermost())
return processLoopInVPlanNativePath(L, PSE, LI, DT, &LVL, TTI, TLI, DB, AC,
ORE, BFI, PSI, Hints, Requirements);
ORE, OptForSize, Hints, Requirements);

assert(L->isInnermost() && "Inner loop expected.");

Expand Down Expand Up @@ -9864,7 +9851,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Check the function attributes and profiles to find out if this function
// should be optimized for size.
ScalarEpilogueLowering SEL =
getScalarEpilogueLowering(F, L, Hints, PSI, BFI, TTI, TLI, LVL, &IAI);
getScalarEpilogueLowering(F, L, Hints, OptForSize, TTI, TLI, LVL, &IAI);

// Check the loop for a trip count threshold: vectorize loops with a tiny trip
// count by optimizing for size, to minimize overheads.
Expand Down Expand Up @@ -9937,7 +9924,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {

// Use the cost model.
LoopVectorizationCostModel CM(SEL, L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE,
F, &Hints, IAI, PSI, BFI);
F, &Hints, IAI, OptForSize);
// Use the planner for vectorization.
LoopVectorizationPlanner LVP(L, LI, DT, TLI, *TTI, &LVL, CM, IAI, PSE, Hints,
ORE);
Expand Down Expand Up @@ -10139,16 +10126,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
preparePlanForMainVectorLoop(*BestMainPlan, BestEpiPlan);
EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1,
BestEpiPlan);
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TTI, AC, EPI, &CM, BFI,
PSI, Checks, *BestMainPlan);
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TTI, AC, EPI, &CM,
Checks, *BestMainPlan);
auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF,
*BestMainPlan, MainILV, DT, false);
++LoopsVectorized;

// Second pass vectorizes the epilogue and adjusts the control flow
// edges from the first pass.
EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TTI, AC, EPI, &CM,
BFI, PSI, Checks, BestEpiPlan);
Checks, BestEpiPlan);
SmallVector<Instruction *> InstsToMove = preparePlanForEpilogueVectorLoop(
BestEpiPlan, L, ExpandedSCEVs, EPI, CM, *PSE.getSE());
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV, DT,
Expand All @@ -10157,8 +10144,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
Checks, InstsToMove);
++LoopsEpilogueVectorized;
} else {
InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, IC, &CM, BFI, PSI,
Checks, BestPlan);
InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, IC, &CM, Checks,
BestPlan);
// TODO: Move to general VPlan pipeline once epilogue loops are also
// supported.
VPlanTransforms::runPass(
Expand Down
Loading