Skip to content

Commit ce2b3ce

Browse files
authored
[BOLT] Improve profile quality reporting (#130810)
Improve profile quality reporting by 1) fixing a format issue for small binaries, 2) adding new stats for exception handling usage, 3) excluding selected blocks when computing the CFG flow conservation score. More specifically for 3), we are excluding blocks that satisfy at least one of the following characteristics: a) is a landing pad, b) has at least one landing pad with non-zero execution counts, c) ends with a recursive call. The reason for a) and b) is because the thrower --> landing pad edges are not explicitly represented in the CFG. The reason for c) is because the call-continuation fallthrough edge count is not important in case of recursive calls. Modified test `bolt/test/X86/profile-quality-reporting.test`. Added test `bolt/test/X86/profile-quality-reporting-small-binary.s`.
1 parent d6622df commit ce2b3ce

File tree

3 files changed

+233
-91
lines changed

3 files changed

+233
-91
lines changed

Diff for: bolt/lib/Passes/ProfileQualityStats.cpp

+197-90
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,16 @@ struct FlowInfo {
5252
FunctionFlowMapTy CallGraphIncomingFlows;
5353
};
5454

55+
// When reporting exception handling stats, we only consider functions with at
56+
// least MinLPECSum counts in landing pads to avoid false positives due to
57+
// sampling noise
58+
const uint16_t MinLPECSum = 50;
59+
60+
// When reporting CFG flow conservation stats, we only consider blocks with
61+
// execution counts > MinBlockCount when reporting the distribution of worst
62+
// gaps.
63+
const uint16_t MinBlockCount = 500;
64+
5565
template <typename T>
5666
void printDistribution(raw_ostream &OS, std::vector<T> &values,
5767
bool Fraction = false) {
@@ -91,8 +101,12 @@ void printCFGContinuityStats(raw_ostream &OS,
91101
std::vector<double> FractionECUnreachables;
92102

93103
for (const BinaryFunction *Function : Functions) {
94-
if (Function->size() <= 1)
104+
if (Function->size() <= 1) {
105+
NumUnreachables.push_back(0);
106+
SumECUnreachables.push_back(0);
107+
FractionECUnreachables.push_back(0.0);
95108
continue;
109+
}
96110

97111
// Compute the sum of all BB execution counts (ECs).
98112
size_t NumPosECBBs = 0;
@@ -142,8 +156,10 @@ void printCFGContinuityStats(raw_ostream &OS,
142156
const size_t NumPosECBBsUnreachableFromEntry =
143157
NumPosECBBs - NumReachableBBs;
144158
const size_t SumUnreachableBBEC = SumAllBBEC - SumReachableBBEC;
145-
const double FractionECUnreachable =
146-
(double)SumUnreachableBBEC / SumAllBBEC;
159+
160+
double FractionECUnreachable = 0.0;
161+
if (SumAllBBEC > 0)
162+
FractionECUnreachable = (double)SumUnreachableBBEC / SumAllBBEC;
147163

148164
if (opts::Verbosity >= 2 && FractionECUnreachable >= 0.05) {
149165
OS << "Non-trivial CFG discontinuity observed in function "
@@ -157,9 +173,6 @@ void printCFGContinuityStats(raw_ostream &OS,
157173
FractionECUnreachables.push_back(FractionECUnreachable);
158174
}
159175

160-
if (FractionECUnreachables.empty())
161-
return;
162-
163176
llvm::sort(FractionECUnreachables);
164177
const int Rank = int(FractionECUnreachables.size() *
165178
opts::PercentileForProfileQualityCheck / 100);
@@ -187,8 +200,10 @@ void printCallGraphFlowConservationStats(
187200
std::vector<double> CallGraphGaps;
188201

189202
for (const BinaryFunction *Function : Functions) {
190-
if (Function->size() <= 1 || !Function->isSimple())
203+
if (Function->size() <= 1 || !Function->isSimple()) {
204+
CallGraphGaps.push_back(0.0);
191205
continue;
206+
}
192207

193208
const uint64_t FunctionNum = Function->getFunctionNumber();
194209
std::vector<uint64_t> &IncomingFlows =
@@ -199,60 +214,63 @@ void printCallGraphFlowConservationStats(
199214
TotalFlowMap.CallGraphIncomingFlows;
200215

201216
// Only consider functions that are not a program entry.
202-
if (CallGraphIncomingFlows.find(FunctionNum) !=
217+
if (CallGraphIncomingFlows.find(FunctionNum) ==
203218
CallGraphIncomingFlows.end()) {
204-
uint64_t EntryInflow = 0;
205-
uint64_t EntryOutflow = 0;
206-
uint32_t NumConsideredEntryBlocks = 0;
207-
208-
Function->forEachEntryPoint([&](uint64_t Offset, const MCSymbol *Label) {
209-
const BinaryBasicBlock *EntryBB =
210-
Function->getBasicBlockAtOffset(Offset);
211-
if (!EntryBB || EntryBB->succ_size() == 0)
212-
return true;
213-
NumConsideredEntryBlocks++;
214-
EntryInflow += IncomingFlows[EntryBB->getLayoutIndex()];
215-
EntryOutflow += OutgoingFlows[EntryBB->getLayoutIndex()];
219+
CallGraphGaps.push_back(0.0);
220+
continue;
221+
}
222+
223+
uint64_t EntryInflow = 0;
224+
uint64_t EntryOutflow = 0;
225+
uint32_t NumConsideredEntryBlocks = 0;
226+
227+
Function->forEachEntryPoint([&](uint64_t Offset, const MCSymbol *Label) {
228+
const BinaryBasicBlock *EntryBB = Function->getBasicBlockAtOffset(Offset);
229+
if (!EntryBB || EntryBB->succ_size() == 0)
216230
return true;
217-
});
218-
219-
uint64_t NetEntryOutflow = 0;
220-
if (EntryOutflow < EntryInflow) {
221-
if (opts::Verbosity >= 2) {
222-
// We expect entry blocks' CFG outflow >= inflow, i.e., it has a
223-
// non-negative net outflow. If this is not the case, then raise a
224-
// warning if requested.
225-
OS << "BOLT WARNING: unexpected entry block CFG outflow < inflow "
226-
"in function "
227-
<< Function->getPrintName() << "\n";
228-
if (opts::Verbosity >= 3)
229-
Function->dump();
230-
}
231-
} else {
232-
NetEntryOutflow = EntryOutflow - EntryInflow;
233-
}
234-
if (NumConsideredEntryBlocks > 0) {
235-
const uint64_t CallGraphInflow =
236-
TotalFlowMap.CallGraphIncomingFlows[Function->getFunctionNumber()];
237-
const uint64_t Min = std::min(NetEntryOutflow, CallGraphInflow);
238-
const uint64_t Max = std::max(NetEntryOutflow, CallGraphInflow);
239-
const double CallGraphGap = 1 - (double)Min / Max;
240-
241-
if (opts::Verbosity >= 2 && CallGraphGap >= 0.5) {
242-
OS << "Nontrivial call graph gap of size "
243-
<< formatv("{0:P}", CallGraphGap) << " observed in function "
244-
<< Function->getPrintName() << "\n";
245-
if (opts::Verbosity >= 3)
246-
Function->dump();
247-
}
231+
NumConsideredEntryBlocks++;
232+
EntryInflow += IncomingFlows[EntryBB->getLayoutIndex()];
233+
EntryOutflow += OutgoingFlows[EntryBB->getLayoutIndex()];
234+
return true;
235+
});
248236

249-
CallGraphGaps.push_back(CallGraphGap);
237+
uint64_t NetEntryOutflow = 0;
238+
if (EntryOutflow < EntryInflow) {
239+
if (opts::Verbosity >= 2) {
240+
// We expect entry blocks' CFG outflow >= inflow, i.e., it has a
241+
// non-negative net outflow. If this is not the case, then raise a
242+
// warning if requested.
243+
OS << "BOLT WARNING: unexpected entry block CFG outflow < inflow "
244+
"in function "
245+
<< Function->getPrintName() << "\n";
246+
if (opts::Verbosity >= 3)
247+
Function->dump();
250248
}
249+
} else {
250+
NetEntryOutflow = EntryOutflow - EntryInflow;
251251
}
252-
}
252+
if (NumConsideredEntryBlocks > 0) {
253+
const uint64_t CallGraphInflow =
254+
TotalFlowMap.CallGraphIncomingFlows[Function->getFunctionNumber()];
255+
const uint64_t Min = std::min(NetEntryOutflow, CallGraphInflow);
256+
const uint64_t Max = std::max(NetEntryOutflow, CallGraphInflow);
257+
double CallGraphGap = 0.0;
258+
if (Max > 0)
259+
CallGraphGap = 1 - (double)Min / Max;
260+
261+
if (opts::Verbosity >= 2 && CallGraphGap >= 0.5) {
262+
OS << "Non-trivial call graph gap of size "
263+
<< formatv("{0:P}", CallGraphGap) << " observed in function "
264+
<< Function->getPrintName() << "\n";
265+
if (opts::Verbosity >= 3)
266+
Function->dump();
267+
}
253268

254-
if (CallGraphGaps.empty())
255-
return;
269+
CallGraphGaps.push_back(CallGraphGap);
270+
} else {
271+
CallGraphGaps.push_back(0.0);
272+
}
273+
}
256274

257275
llvm::sort(CallGraphGaps);
258276
const int Rank =
@@ -265,18 +283,19 @@ void printCallGraphFlowConservationStats(
265283
}
266284
}
267285

268-
void printCFGFlowConservationStats(raw_ostream &OS,
286+
void printCFGFlowConservationStats(const BinaryContext &BC, raw_ostream &OS,
269287
iterator_range<function_iterator> &Functions,
270288
FlowInfo &TotalFlowMap) {
271289
std::vector<double> CFGGapsWeightedAvg;
272290
std::vector<double> CFGGapsWorst;
273291
std::vector<uint64_t> CFGGapsWorstAbs;
274-
// We only consider blocks with execution counts > MinBlockCount when
275-
// reporting the distribution of worst gaps.
276-
const uint16_t MinBlockCount = 500;
277292
for (const BinaryFunction *Function : Functions) {
278-
if (Function->size() <= 1 || !Function->isSimple())
293+
if (Function->size() <= 1 || !Function->isSimple()) {
294+
CFGGapsWeightedAvg.push_back(0.0);
295+
CFGGapsWorst.push_back(0.0);
296+
CFGGapsWorstAbs.push_back(0);
279297
continue;
298+
}
280299

281300
const uint64_t FunctionNum = Function->getFunctionNumber();
282301
std::vector<uint64_t> &MaxCountMaps =
@@ -295,12 +314,34 @@ void printCFGFlowConservationStats(raw_ostream &OS,
295314
if (BB.isEntryPoint() || BB.succ_size() == 0)
296315
continue;
297316

317+
if (BB.getKnownExecutionCount() == 0 || BB.getNumNonPseudos() == 0)
318+
continue;
319+
320+
// We don't consider blocks that is a landing pad or has a
321+
// positive-execution-count landing pad
322+
if (BB.isLandingPad())
323+
continue;
324+
325+
if (llvm::any_of(BB.landing_pads(),
326+
std::mem_fn(&BinaryBasicBlock::getKnownExecutionCount)))
327+
continue;
328+
329+
// We don't consider blocks that end with a recursive call instruction
330+
const MCInst *Inst = BB.getLastNonPseudoInstr();
331+
if (BC.MIB->isCall(*Inst)) {
332+
const MCSymbol *DstSym = BC.MIB->getTargetSymbol(*Inst);
333+
const BinaryFunction *DstFunc =
334+
DstSym ? BC.getFunctionForSymbol(DstSym) : nullptr;
335+
if (DstFunc == Function)
336+
continue;
337+
}
338+
298339
const uint64_t Max = MaxCountMaps[BB.getLayoutIndex()];
299340
const uint64_t Min = MinCountMaps[BB.getLayoutIndex()];
300-
const double Gap = 1 - (double)Min / Max;
341+
double Gap = 0.0;
342+
if (Max > 0)
343+
Gap = 1 - (double)Min / Max;
301344
double Weight = BB.getKnownExecutionCount() * BB.getNumNonPseudos();
302-
if (Weight == 0)
303-
continue;
304345
// We use log to prevent the stats from being dominated by extremely hot
305346
// blocks
306347
Weight = log(Weight);
@@ -316,39 +357,36 @@ void printCFGFlowConservationStats(raw_ostream &OS,
316357
BBWorstGapAbs = &BB;
317358
}
318359
}
319-
if (WeightSum > 0) {
320-
const double WeightedGap = WeightedGapSum / WeightSum;
321-
if (opts::Verbosity >= 2 && (WeightedGap >= 0.1 || WorstGap >= 0.9)) {
322-
OS << "Nontrivial CFG gap observed in function "
323-
<< Function->getPrintName() << "\n"
324-
<< "Weighted gap: " << formatv("{0:P}", WeightedGap) << "\n";
325-
if (BBWorstGap)
326-
OS << "Worst gap: " << formatv("{0:P}", WorstGap)
327-
<< " at BB with input offset: 0x"
328-
<< Twine::utohexstr(BBWorstGap->getInputOffset()) << "\n";
329-
if (BBWorstGapAbs)
330-
OS << "Worst gap (absolute value): " << WorstGapAbs << " at BB with "
331-
<< "input offset 0x"
332-
<< Twine::utohexstr(BBWorstGapAbs->getInputOffset()) << "\n";
333-
if (opts::Verbosity >= 3)
334-
Function->dump();
335-
}
336-
337-
CFGGapsWeightedAvg.push_back(WeightedGap);
338-
CFGGapsWorst.push_back(WorstGap);
339-
CFGGapsWorstAbs.push_back(WorstGapAbs);
360+
double WeightedGap = WeightedGapSum;
361+
if (WeightSum > 0)
362+
WeightedGap /= WeightSum;
363+
if (opts::Verbosity >= 2 && WorstGap >= 0.9) {
364+
OS << "Non-trivial CFG gap observed in function "
365+
<< Function->getPrintName() << "\n"
366+
<< "Weighted gap: " << formatv("{0:P}", WeightedGap) << "\n";
367+
if (BBWorstGap)
368+
OS << "Worst gap: " << formatv("{0:P}", WorstGap)
369+
<< " at BB with input offset: 0x"
370+
<< Twine::utohexstr(BBWorstGap->getInputOffset()) << "\n";
371+
if (BBWorstGapAbs)
372+
OS << "Worst gap (absolute value): " << WorstGapAbs << " at BB with "
373+
<< "input offset 0x"
374+
<< Twine::utohexstr(BBWorstGapAbs->getInputOffset()) << "\n";
375+
if (opts::Verbosity >= 3)
376+
Function->dump();
340377
}
378+
CFGGapsWeightedAvg.push_back(WeightedGap);
379+
CFGGapsWorst.push_back(WorstGap);
380+
CFGGapsWorstAbs.push_back(WorstGapAbs);
341381
}
342382

343-
if (CFGGapsWeightedAvg.empty())
344-
return;
345383
llvm::sort(CFGGapsWeightedAvg);
346384
const int RankWA = int(CFGGapsWeightedAvg.size() *
347385
opts::PercentileForProfileQualityCheck / 100);
348386
llvm::sort(CFGGapsWorst);
349387
const int RankW =
350388
int(CFGGapsWorst.size() * opts::PercentileForProfileQualityCheck / 100);
351-
OS << formatv("CFG flow conservation gap {0:P} (weighted) {1:P} (worst)\n",
389+
OS << formatv("CFG flow conservation gap {0:P} (weighted) {1:P} (worst); ",
352390
CFGGapsWeightedAvg[RankWA], CFGGapsWorst[RankW]);
353391
if (opts::Verbosity >= 1) {
354392
OS << "distribution of weighted CFG flow conservation gaps\n";
@@ -365,6 +403,74 @@ void printCFGFlowConservationStats(raw_ostream &OS,
365403
}
366404
}
367405

406+
void printExceptionHandlingStats(const BinaryContext &BC, raw_ostream &OS,
407+
iterator_range<function_iterator> &Functions) {
408+
std::vector<double> LPCountFractionsOfTotalBBEC;
409+
std::vector<double> LPCountFractionsOfTotalInvokeEC;
410+
for (const BinaryFunction *Function : Functions) {
411+
size_t LPECSum = 0;
412+
size_t BBECSum = 0;
413+
size_t InvokeECSum = 0;
414+
for (BinaryBasicBlock &BB : *Function) {
415+
const size_t BBEC = BB.getKnownExecutionCount();
416+
BBECSum += BBEC;
417+
if (BB.isLandingPad())
418+
LPECSum += BBEC;
419+
for (const MCInst &Inst : BB) {
420+
if (!BC.MIB->isInvoke(Inst))
421+
continue;
422+
const std::optional<MCPlus::MCLandingPad> EHInfo =
423+
BC.MIB->getEHInfo(Inst);
424+
if (EHInfo->first)
425+
InvokeECSum += BBEC;
426+
}
427+
}
428+
429+
if (LPECSum <= MinLPECSum) {
430+
LPCountFractionsOfTotalBBEC.push_back(0.0);
431+
LPCountFractionsOfTotalInvokeEC.push_back(0.0);
432+
continue;
433+
}
434+
double FracTotalBBEC = 0.0;
435+
if (BBECSum > 0)
436+
FracTotalBBEC = (double)LPECSum / BBECSum;
437+
double FracTotalInvokeEC = 0.0;
438+
if (InvokeECSum > 0)
439+
FracTotalInvokeEC = (double)LPECSum / InvokeECSum;
440+
LPCountFractionsOfTotalBBEC.push_back(FracTotalBBEC);
441+
LPCountFractionsOfTotalInvokeEC.push_back(FracTotalInvokeEC);
442+
443+
if (opts::Verbosity >= 2 && FracTotalInvokeEC >= 0.05) {
444+
OS << "Non-trivial usage of exception handling observed in function "
445+
<< Function->getPrintName() << "\n"
446+
<< formatv(
447+
"Fraction of total InvokeEC that goes to landing pads: {0:P}\n",
448+
FracTotalInvokeEC);
449+
if (opts::Verbosity >= 3)
450+
Function->dump();
451+
}
452+
}
453+
454+
llvm::sort(LPCountFractionsOfTotalBBEC);
455+
const int RankBBEC = int(LPCountFractionsOfTotalBBEC.size() *
456+
opts::PercentileForProfileQualityCheck / 100);
457+
llvm::sort(LPCountFractionsOfTotalInvokeEC);
458+
const int RankInvoke = int(LPCountFractionsOfTotalInvokeEC.size() *
459+
opts::PercentileForProfileQualityCheck / 100);
460+
OS << formatv("exception handling usage {0:P} (of total BBEC) {1:P} (of "
461+
"total InvokeEC)\n",
462+
LPCountFractionsOfTotalBBEC[RankBBEC],
463+
LPCountFractionsOfTotalInvokeEC[RankInvoke]);
464+
if (opts::Verbosity >= 1) {
465+
OS << "distribution of exception handling usage as a fraction of total "
466+
"BBEC of each function\n";
467+
printDistribution(OS, LPCountFractionsOfTotalBBEC, /*Fraction=*/true);
468+
OS << "distribution of exception handling usage as a fraction of total "
469+
"InvokeEC of each function\n";
470+
printDistribution(OS, LPCountFractionsOfTotalInvokeEC, /*Fraction=*/true);
471+
}
472+
}
473+
368474
void computeFlowMappings(const BinaryContext &BC, FlowInfo &TotalFlowMap) {
369475
// Increment block inflow and outflow with CFG jump counts.
370476
TotalFlowMapTy &TotalIncomingFlows = TotalFlowMap.TotalIncomingFlows;
@@ -519,8 +625,8 @@ void printAll(BinaryContext &BC, FunctionListType &ValidFunctions,
519625
100 - opts::PercentileForProfileQualityCheck);
520626
printCFGContinuityStats(BC.outs(), Functions);
521627
printCallGraphFlowConservationStats(BC.outs(), Functions, TotalFlowMap);
522-
printCFGFlowConservationStats(BC.outs(), Functions, TotalFlowMap);
523-
628+
printCFGFlowConservationStats(BC, BC.outs(), Functions, TotalFlowMap);
629+
printExceptionHandlingStats(BC, BC.outs(), Functions);
524630
// Print more detailed bucketed stats if requested.
525631
if (opts::Verbosity >= 1 && RealNumTopFunctions >= 5) {
526632
const size_t PerBucketSize = RealNumTopFunctions / 5;
@@ -550,7 +656,8 @@ void printAll(BinaryContext &BC, FunctionListType &ValidFunctions,
550656
MaxFunctionExecutionCount);
551657
printCFGContinuityStats(BC.outs(), Functions);
552658
printCallGraphFlowConservationStats(BC.outs(), Functions, TotalFlowMap);
553-
printCFGFlowConservationStats(BC.outs(), Functions, TotalFlowMap);
659+
printCFGFlowConservationStats(BC, BC.outs(), Functions, TotalFlowMap);
660+
printExceptionHandlingStats(BC, BC.outs(), Functions);
554661
}
555662
}
556663
}

0 commit comments

Comments
 (0)