Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion llvm/include/llvm/Analysis/IndirectCallPromotionAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,14 @@ class ICallPromotionAnalysis {
/// The \p TotalCount and \p NumCandidates are set to the the total profile
/// count of the indirect call \p I and the number of profitable candidates
/// in the given array (which is sorted in reverse order of profitability).
/// The value of \p MaxNumValueData can be used to override the max set
/// from the -icp-max-prom option with a larger value.
///
/// The returned array space is owned by this class, and overwritten on
/// subsequent calls.
MutableArrayRef<InstrProfValueData> getPromotionCandidatesForInstruction(
const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates);
const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates,
unsigned MaxNumValueData = 0);
};

} // end namespace llvm
Expand Down
9 changes: 7 additions & 2 deletions llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,14 @@ uint32_t ICallPromotionAnalysis::getProfitablePromotionCandidates(

MutableArrayRef<InstrProfValueData>
ICallPromotionAnalysis::getPromotionCandidatesForInstruction(
const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates) {
const Instruction *I, uint64_t &TotalCount, uint32_t &NumCandidates,
unsigned MaxNumValueData) {
// Use the max of the values specified by -icp-max-prom and the provided
// MaxNumValueData parameter.
if (MaxNumPromotions > MaxNumValueData)
MaxNumValueData = MaxNumPromotions;
ValueDataArray = getValueProfDataFromInst(*I, IPVK_IndirectCallTarget,
MaxNumPromotions, TotalCount);
MaxNumValueData, TotalCount);
if (ValueDataArray.empty()) {
NumCandidates = 0;
return MutableArrayRef<InstrProfValueData>();
Expand Down
13 changes: 11 additions & 2 deletions llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,15 @@ static cl::opt<bool> EnableMemProfIndirectCallSupport(
cl::desc(
"Enable MemProf support for summarizing and cloning indirect calls"));

// This can be used to override the number of callees created from VP metadata
// normally taken from the -icp-max-prom option with a larger amount, if useful
// for analysis.
cl::opt<unsigned>
MaxSummaryIndirectEdges("module-summary-max-indirect-edges", cl::init(0),
cl::Hidden,
cl::desc("Max number of summary edges added from "
"indirect call profile metadata"));

LLVM_ABI extern cl::opt<bool> ScalePartialSampleProfileWorkingSetSize;

extern cl::opt<unsigned> MaxNumVTableAnnotations;
Expand Down Expand Up @@ -494,8 +503,8 @@ static void computeFunctionSummary(
}

CandidateProfileData =
ICallAnalysis.getPromotionCandidatesForInstruction(&I, TotalCount,
NumCandidates);
ICallAnalysis.getPromotionCandidatesForInstruction(
&I, TotalCount, NumCandidates, MaxSummaryIndirectEdges);
for (const auto &Candidate : CandidateProfileData)
CallGraphEdges[Index.getOrInsertValueInfo(Candidate.Value)]
.updateHotness(getHotness(Candidate.Count, PSI));
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,8 @@ cl::opt<bool> MemProfFixupImportant(
"memprof-fixup-important", cl::init(true), cl::Hidden,
cl::desc("Enables edge fixup for important contexts"));

extern cl::opt<unsigned> MaxSummaryIndirectEdges;

} // namespace llvm

namespace {
Expand Down Expand Up @@ -6067,8 +6069,8 @@ unsigned MemProfContextDisambiguation::recordICPInfo(
uint32_t NumCandidates;
uint64_t TotalCount;
auto CandidateProfileData =
ICallAnalysis->getPromotionCandidatesForInstruction(CB, TotalCount,
NumCandidates);
ICallAnalysis->getPromotionCandidatesForInstruction(
CB, TotalCount, NumCandidates, MaxSummaryIndirectEdges);
if (CandidateProfileData.empty())
return 0;

Expand Down
92 changes: 57 additions & 35 deletions llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll
Original file line number Diff line number Diff line change
@@ -1,38 +1,57 @@
; Promote at most one function and annotate at most one vtable.
; As a result, only one value (of each relevant kind) shows up in the function
; summary.
;; Check that the values of -icp-max-num-vtables, -icp-max-prom, and
;; -module-summary-max-indirect-edges affect the number of profiled
;; vtables and virtual functions propagated from the VP metadata to
;; the ThinLTO summary as expected.

;; First try with a max of 1 for both vtables and virtual functions.
; RUN: opt -module-summary -icp-max-num-vtables=1 -icp-max-prom=1 %s -o %t.o

; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s
; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s --check-prefix=SUMMARY

; RUN: llvm-dis -o - %t.o | FileCheck %s --check-prefix=DIS
; Round trip it through llvm-as
;; Round trip it through llvm-as
; RUN: llvm-dis -o - %t.o | llvm-as -o - | llvm-dis -o - | FileCheck %s --check-prefix=DIS

; CHECK: <GLOBALVAL_SUMMARY_BLOCK
; CHECK-NEXT: <VERSION op0=
; CHECK-NEXT: <FLAGS op0=0/>
; The `VALUE_GUID` below represents the "_ZTV4Base" referenced by the instruction
; that loads vtable pointers.
; CHECK-NEXT: <VALUE_GUID {{.*}} op0=21 op1=456547254 op2=3929380924/>
; The `VALUE_GUID` below represents the "_ZN4Base4funcEv" referenced by the
; indirect call instruction.
; CHECK-NEXT: <VALUE_GUID {{.*}} op0=20 op1=1271117309 op2=2009351347/>
; NOTE vtables and functions from Derived class is dropped because
; `-icp-max-num-vtables` and `-icp-max-prom` are both set to one.
; <PERMODULE_PROFILE> has the format [valueid, flags, instcount, funcflags,
; numrefs, rorefcnt, worefcnt,
; m x valueid,
; n x (valueid, hotness+tailcall)]
; CHECK-NEXT: <PERMODULE_PROFILE {{.*}} op0=0 op1=0 op2=4 op3=256 op4=1 op5=1 op6=0 op7=21 op8=20 op9=3/>
; CHECK-NEXT: </GLOBALVAL_SUMMARY_BLOCK>
;; Next check that a larger -module-summary-max-indirect-edges value overrides
;; -icp-max-prom when determining how many virtual functions to summarize.
; RUN: opt -module-summary -icp-max-num-vtables=1 -icp-max-prom=1 -module-summary-max-indirect-edges=2 %s -o %t2.o
; RUN: llvm-bcanalyzer -dump %t2.o | FileCheck %s --check-prefixes=SUMMARY,SUMMARY2
; RUN: llvm-dis -o - %t2.o | FileCheck %s --check-prefixes=DIS,DIS2

; SUMMARY: <GLOBALVAL_SUMMARY_BLOCK
; SUMMARY-NEXT: <VERSION op0=
; SUMMARY-NEXT: <FLAGS op0=0/>

;; The `VALUE_GUID` below represents the "_ZTV4Base" referenced by the instruction
;; that loads vtable pointers.
; SUMMARY-NEXT: <VALUE_GUID {{.*}} op0=[[VTABLEBASE:[0-9]+]] op1=456547254 op2=3929380924/>
;; The `VALUE_GUID` below represents the "_ZN4Base4funcEv" referenced by the
;; indirect call instruction.
; SUMMARY-NEXT: <VALUE_GUID {{.*}} op0=[[VFUNCBASE:[0-9]+]] op1=1271117309 op2=2009351347/>
;; The `VALUE_GUID` below represents the "_ZN7Derived4funcEv" referenced by the
;; indirect call instruction.
; SUMMARY2-NEXT: <VALUE_GUID {{.*}} op0=[[VFUNCDER:[0-9]+]] op1=1437699922 op2=4037658799/>

;; <PERMODULE_PROFILE> has the format [valueid, flags, instcount, funcflags,
;; numrefs, rorefcnt, worefcnt,
;; m x valueid,
;; n x (valueid, hotness+tailcall)]
;; NOTE vtables and functions from Derived class are dropped in the base case
;; because `-icp-max-num-vtables` and `-icp-max-prom` are both set to one.
; SUMMARY-NEXT: <PERMODULE_PROFILE {{.*}} op0=0 op1=0 op2=4 op3=256 op4=1 op5=1 op6=0 op7=[[VTABLEBASE]] op8=[[VFUNCBASE]] op9=3
;; With -module-summary-max-indirect-edges=2 we do get the Derived class
;; function in the summary.
; SUMMARY2-SAME: op10=[[VFUNCDER]] op11=2
;; We should have no other ops before the end of the summary record.
; SUMMARY-NOT: op
; SUMMARY-SAME: />
; SUMMARY-NEXT: </GLOBALVAL_SUMMARY_BLOCK>

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function has one BB and an entry count of 150, so the BB is hot according to
; ProfileSummary and reflected so in the bitcode (see llvm-dis output).
;; Function has one BB and an entry count of 150, so the BB is hot according to
;; ProfileSummary and reflected so in the bitcode (see llvm-dis output).
define i32 @_Z4testP4Base(ptr %0) !prof !15 {
%2 = load ptr, ptr %0, !prof !16
%3 = load ptr, ptr %2
Expand All @@ -58,17 +77,20 @@ define i32 @_Z4testP4Base(ptr %0) !prof !15 {
!14 = !{i32 999999, i64 1, i32 2}

!15 = !{!"function_entry_count", i32 150}
; 1960855528937986108 is the MD5 hash of _ZTV4Base, and
; 13870436605473471591 is the MD5 hash of _ZTV7Derived
;; 1960855528937986108 is the MD5 hash of _ZTV4Base, and
;; 13870436605473471591 is the MD5 hash of _ZTV7Derived
!16 = !{!"VP", i32 2, i64 150, i64 1960855528937986108, i64 100, i64 13870436605473471591, i64 50}
; 5459407273543877811 is the MD5 hash of _ZN4Base4funcEv, and
; 6174874150489409711 is the MD5 hash of _ZN7Derived4funcEv
;; 5459407273543877811 is the MD5 hash of _ZN4Base4funcEv, and
;; 6174874150489409711 is the MD5 hash of _ZN7Derived4funcEv
!17 = !{!"VP", i32 0, i64 150, i64 5459407273543877811, i64 100, i64 6174874150489409711, i64 50}

; ModuleSummaryIndex stores <guid, global-value summary> map in std::map; so
; global value summares are printed out in the order that gv's guid increases.
; DIS: ^0 = module: (path: "{{.*}}", hash: (0, 0, 0, 0, 0))
; DIS: ^1 = gv: (guid: 1960855528937986108)
; DIS: ^2 = gv: (guid: 5459407273543877811)
; DIS: ^3 = gv: (name: "_Z4testP4Base", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0, importType: definition), insts: 4, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), calls: ((callee: ^2, hotness: hot)), refs: (readonly ^1)))) ; guid = 15857150948103218965
; DIS: ^4 = blockcount: 0
;; ModuleSummaryIndex stores <guid, global-value summary> map in std::map; so
;; global value summares are printed out in the order that gv's guid increases.
; DIS: ^[[VTABLEBASE2:[0-9]+]] = gv: (guid: 1960855528937986108)
; DIS: ^[[VFUNCBASE2:[0-9]+]] = gv: (guid: 5459407273543877811)
; DIS2: ^[[VFUNCDER2:[0-9]+]] = gv: (guid: 6174874150489409711)
; DIS: gv: (name: "_Z4testP4Base", {{.*}} calls: ((callee: ^[[VFUNCBASE2]], hotness: hot)
;; With -module-summary-max-indirect-edges=2 we get the Derived func.
; DIS2-SAME: (callee: ^[[VFUNCDER2]], hotness: none)
; DIS-NOT: callee
; DIS-SAME: ), refs: (readonly ^[[VTABLEBASE2]])
8 changes: 7 additions & 1 deletion llvm/test/ThinLTO/X86/memprof-icp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,9 @@
; RUN: split-file %s %t

; RUN: opt -thinlto-bc %t/main.ll >%t/main.o
; RUN: opt -thinlto-bc %t/foo.ll >%t/foo.o
;; Check that -module-summary-max-indirect-edges correctly overrides
;; -icp-max-prom with a higher max when building summary.
; RUN: opt -thinlto-bc -icp-max-prom=1 -module-summary-max-indirect-edges=2 %t/foo.ll >%t/foo.o

;; Check that we get the synthesized callsite records. There should be 2, one
;; for each profiled target in the VP metadata. They will have the same stackIds
Expand All @@ -86,6 +88,10 @@
;; First perform in-process ThinLTO
; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \
; RUN: -enable-memprof-indirect-call-support=true \
;; Check that -module-summary-max-indirect-edges correctly overrides
;; -icp-max-prom with a higher max when performing memprof ICP.
; RUN: -icp-max-prom=1 \
; RUN: -module-summary-max-indirect-edges=2 \
; RUN: -supports-hot-cold-new \
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \
; RUN: -r=%t/foo.o,_ZN2B03barEj.abc,plx \
Expand Down