diff --git a/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h b/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h index b6bdfb1275c92..40c87b99af6a2 100644 --- a/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h +++ b/llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h @@ -52,11 +52,14 @@ class ICallPromotionAnalysis { /// The \p TotalCount and \p NumCandidates are set to the the total profile /// count of the indirect call \p I and the number of profitable candidates /// in the given array (which is sorted in reverse order of profitability). + /// The value of \p MaxNumValueData can be used to override the max set + /// from the -icp-max-prom option with a larger value. /// /// The returned array space is owned by this class, and overwritten on /// subsequent calls. MutableArrayRef getPromotionCandidatesForInstruction( - const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates); + const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates, + unsigned MaxNumValueData = 0); }; } // end namespace llvm diff --git a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp index 25e7a97065b27..6dc03bcfdf49c 100644 --- a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp +++ b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp @@ -93,9 +93,14 @@ uint32_t ICallPromotionAnalysis::getProfitablePromotionCandidates( MutableArrayRef ICallPromotionAnalysis::getPromotionCandidatesForInstruction( - const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates) { + const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates, + unsigned MaxNumValueData) { + // Use the max of the values specified by -icp-max-prom and the provided + // MaxNumValueData parameter. + if (MaxNumPromotions > MaxNumValueData) + MaxNumValueData = MaxNumPromotions; ValueDataArray = getValueProfDataFromInst(*I, IPVK_IndirectCallTarget, - MaxNumPromotions, TotalCount); + MaxNumValueData, TotalCount); if (ValueDataArray.empty()) { NumCandidates = 0; return MutableArrayRef(); diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp index 6cdf51a148e2d..382e60b04f6f4 100644 --- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -85,6 +85,15 @@ static cl::opt EnableMemProfIndirectCallSupport( cl::desc( "Enable MemProf support for summarizing and cloning indirect calls")); +// This can be used to override the number of callees created from VP metadata +// normally taken from the -icp-max-prom option with a larger amount, if useful +// for analysis. +cl::opt + MaxSummaryIndirectEdges("module-summary-max-indirect-edges", cl::init(0), + cl::Hidden, + cl::desc("Max number of summary edges added from " + "indirect call profile metadata")); + LLVM_ABI extern cl::opt ScalePartialSampleProfileWorkingSetSize; extern cl::opt MaxNumVTableAnnotations; @@ -494,8 +503,8 @@ static void computeFunctionSummary( } CandidateProfileData = - ICallAnalysis.getPromotionCandidatesForInstruction(&I, TotalCount, - NumCandidates); + ICallAnalysis.getPromotionCandidatesForInstruction( + &I, TotalCount, NumCandidates, MaxSummaryIndirectEdges); for (const auto &Candidate : CandidateProfileData) CallGraphEdges[Index.getOrInsertValueInfo(Candidate.Value)] .updateHotness(getHotness(Candidate.Count, PSI)); diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 0f4bc649df720..74daaae8a9952 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -235,6 +235,8 @@ cl::opt MemProfFixupImportant( "memprof-fixup-important", cl::init(true), cl::Hidden, cl::desc("Enables edge fixup for important contexts")); +extern cl::opt MaxSummaryIndirectEdges; + } // namespace llvm namespace { @@ -6067,8 +6069,8 @@ unsigned MemProfContextDisambiguation::recordICPInfo( uint32_t NumCandidates; uint64_t TotalCount; auto CandidateProfileData = - ICallAnalysis->getPromotionCandidatesForInstruction(CB, TotalCount, - NumCandidates); + ICallAnalysis->getPromotionCandidatesForInstruction( + CB, TotalCount, NumCandidates, MaxSummaryIndirectEdges); if (CandidateProfileData.empty()) return 0; diff --git a/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll b/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll index e98a2d1e6ff4d..01386bd35bf40 100644 --- a/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll +++ b/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll @@ -1,38 +1,57 @@ -; Promote at most one function and annotate at most one vtable. -; As a result, only one value (of each relevant kind) shows up in the function -; summary. +;; Check that the values of -icp-max-num-vtables, -icp-max-prom, and +;; -module-summary-max-indirect-edges affect the number of profiled +;; vtables and virtual functions propagated from the VP metadata to +;; the ThinLTO summary as expected. +;; First try with a max of 1 for both vtables and virtual functions. ; RUN: opt -module-summary -icp-max-num-vtables=1 -icp-max-prom=1 %s -o %t.o -; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s +; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s --check-prefix=SUMMARY ; RUN: llvm-dis -o - %t.o | FileCheck %s --check-prefix=DIS -; Round trip it through llvm-as +;; Round trip it through llvm-as ; RUN: llvm-dis -o - %t.o | llvm-as -o - | llvm-dis -o - | FileCheck %s --check-prefix=DIS -; CHECK: -; The `VALUE_GUID` below represents the "_ZTV4Base" referenced by the instruction -; that loads vtable pointers. -; CHECK-NEXT: -; The `VALUE_GUID` below represents the "_ZN4Base4funcEv" referenced by the -; indirect call instruction. -; CHECK-NEXT: -; NOTE vtables and functions from Derived class is dropped because -; `-icp-max-num-vtables` and `-icp-max-prom` are both set to one. -; has the format [valueid, flags, instcount, funcflags, -; numrefs, rorefcnt, worefcnt, -; m x valueid, -; n x (valueid, hotness+tailcall)] -; CHECK-NEXT: -; CHECK-NEXT: +;; Next check that a larger -module-summary-max-indirect-edges value overrides +;; -icp-max-prom when determining how many virtual functions to summarize. +; RUN: opt -module-summary -icp-max-num-vtables=1 -icp-max-prom=1 -module-summary-max-indirect-edges=2 %s -o %t2.o +; RUN: llvm-bcanalyzer -dump %t2.o | FileCheck %s --check-prefixes=SUMMARY,SUMMARY2 +; RUN: llvm-dis -o - %t2.o | FileCheck %s --check-prefixes=DIS,DIS2 + +; SUMMARY: + +;; The `VALUE_GUID` below represents the "_ZTV4Base" referenced by the instruction +;; that loads vtable pointers. +; SUMMARY-NEXT: +;; The `VALUE_GUID` below represents the "_ZN4Base4funcEv" referenced by the +;; indirect call instruction. +; SUMMARY-NEXT: +;; The `VALUE_GUID` below represents the "_ZN7Derived4funcEv" referenced by the +;; indirect call instruction. +; SUMMARY2-NEXT: + +;; has the format [valueid, flags, instcount, funcflags, +;; numrefs, rorefcnt, worefcnt, +;; m x valueid, +;; n x (valueid, hotness+tailcall)] +;; NOTE vtables and functions from Derived class are dropped in the base case +;; because `-icp-max-num-vtables` and `-icp-max-prom` are both set to one. +; SUMMARY-NEXT: +; SUMMARY-NEXT: target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; Function has one BB and an entry count of 150, so the BB is hot according to -; ProfileSummary and reflected so in the bitcode (see llvm-dis output). +;; Function has one BB and an entry count of 150, so the BB is hot according to +;; ProfileSummary and reflected so in the bitcode (see llvm-dis output). define i32 @_Z4testP4Base(ptr %0) !prof !15 { %2 = load ptr, ptr %0, !prof !16 %3 = load ptr, ptr %2 @@ -58,17 +77,20 @@ define i32 @_Z4testP4Base(ptr %0) !prof !15 { !14 = !{i32 999999, i64 1, i32 2} !15 = !{!"function_entry_count", i32 150} -; 1960855528937986108 is the MD5 hash of _ZTV4Base, and -; 13870436605473471591 is the MD5 hash of _ZTV7Derived +;; 1960855528937986108 is the MD5 hash of _ZTV4Base, and +;; 13870436605473471591 is the MD5 hash of _ZTV7Derived !16 = !{!"VP", i32 2, i64 150, i64 1960855528937986108, i64 100, i64 13870436605473471591, i64 50} -; 5459407273543877811 is the MD5 hash of _ZN4Base4funcEv, and -; 6174874150489409711 is the MD5 hash of _ZN7Derived4funcEv +;; 5459407273543877811 is the MD5 hash of _ZN4Base4funcEv, and +;; 6174874150489409711 is the MD5 hash of _ZN7Derived4funcEv !17 = !{!"VP", i32 0, i64 150, i64 5459407273543877811, i64 100, i64 6174874150489409711, i64 50} -; ModuleSummaryIndex stores map in std::map; so -; global value summares are printed out in the order that gv's guid increases. -; DIS: ^0 = module: (path: "{{.*}}", hash: (0, 0, 0, 0, 0)) -; DIS: ^1 = gv: (guid: 1960855528937986108) -; DIS: ^2 = gv: (guid: 5459407273543877811) -; DIS: ^3 = gv: (name: "_Z4testP4Base", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 4, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), calls: ((callee: ^2, hotness: hot)), refs: (readonly ^1)))) ; guid = 15857150948103218965 -; DIS: ^4 = blockcount: 0 +;; ModuleSummaryIndex stores map in std::map; so +;; global value summares are printed out in the order that gv's guid increases. +; DIS: ^[[VTABLEBASE2:[0-9]+]] = gv: (guid: 1960855528937986108) +; DIS: ^[[VFUNCBASE2:[0-9]+]] = gv: (guid: 5459407273543877811) +; DIS2: ^[[VFUNCDER2:[0-9]+]] = gv: (guid: 6174874150489409711) +; DIS: gv: (name: "_Z4testP4Base", {{.*}} calls: ((callee: ^[[VFUNCBASE2]], hotness: hot) +;; With -module-summary-max-indirect-edges=2 we get the Derived func. +; DIS2-SAME: (callee: ^[[VFUNCDER2]], hotness: none) +; DIS-NOT: callee +; DIS-SAME: ), refs: (readonly ^[[VTABLEBASE2]]) diff --git a/llvm/test/ThinLTO/X86/memprof-icp.ll b/llvm/test/ThinLTO/X86/memprof-icp.ll index 3a68cd89e0b16..407c1663e09f4 100644 --- a/llvm/test/ThinLTO/X86/memprof-icp.ll +++ b/llvm/test/ThinLTO/X86/memprof-icp.ll @@ -70,7 +70,9 @@ ; RUN: split-file %s %t ; RUN: opt -thinlto-bc %t/main.ll >%t/main.o -; RUN: opt -thinlto-bc %t/foo.ll >%t/foo.o +;; Check that -module-summary-max-indirect-edges correctly overrides +;; -icp-max-prom with a higher max when building summary. +; RUN: opt -thinlto-bc -icp-max-prom=1 -module-summary-max-indirect-edges=2 %t/foo.ll >%t/foo.o ;; Check that we get the synthesized callsite records. There should be 2, one ;; for each profiled target in the VP metadata. They will have the same stackIds @@ -86,6 +88,10 @@ ;; First perform in-process ThinLTO ; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \ ; RUN: -enable-memprof-indirect-call-support=true \ +;; Check that -module-summary-max-indirect-edges correctly overrides +;; -icp-max-prom with a higher max when performing memprof ICP. +; RUN: -icp-max-prom=1 \ +; RUN: -module-summary-max-indirect-edges=2 \ ; RUN: -supports-hot-cold-new \ ; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \ ; RUN: -r=%t/foo.o,_ZN2B03barEj.abc,plx \