Skip to content

Commit

Permalink
Merge pull request #457 from Xilinx/bump_to_2e43a304
Browse files Browse the repository at this point in the history
[AutoBump] Merge with 2e43a30 (Oct 25) (1)
  • Loading branch information
jorickert authored Feb 3, 2025
2 parents 47a1830 + 0a6b32b commit e8be3be
Show file tree
Hide file tree
Showing 3,408 changed files with 366,239 additions and 25,369 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
6 changes: 6 additions & 0 deletions bolt/include/bolt/Core/BinaryFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,9 @@ class BinaryFunction {
/// Raw branch count for this function in the profile.
uint64_t RawBranchCount{0};

/// Dynamically executed function bytes, used for density computation.
uint64_t SampleCountInBytes{0};

/// Indicates the type of profile the function is using.
uint16_t ProfileFlags{PF_NONE};

Expand Down Expand Up @@ -1844,6 +1847,9 @@ class BinaryFunction {
/// to this function.
void setRawBranchCount(uint64_t Count) { RawBranchCount = Count; }

/// Return the number of dynamically executed bytes, from raw perf data.
uint64_t getSampleCountInBytes() const { return SampleCountInBytes; }

/// Return the execution count for functions with known profile.
/// Return 0 if the function has no profile.
uint64_t getKnownExecutionCount() const {
Expand Down
1 change: 1 addition & 0 deletions bolt/include/bolt/Utils/CommandLineOpts.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ extern llvm::cl::opt<bool> PrintSections;
enum ProfileFormatKind { PF_Fdata, PF_YAML };

extern llvm::cl::opt<ProfileFormatKind> ProfileFormat;
extern llvm::cl::opt<bool> ShowDensity;
extern llvm::cl::opt<bool> SplitEH;
extern llvm::cl::opt<bool> StrictMode;
extern llvm::cl::opt<bool> TimeOpts;
Expand Down
3 changes: 3 additions & 0 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2577,6 +2577,7 @@ struct CFISnapshot {
case MCCFIInstruction::OpAdjustCfaOffset:
case MCCFIInstruction::OpWindowSave:
case MCCFIInstruction::OpNegateRAState:
case MCCFIInstruction::OpNegateRAStateWithPC:
case MCCFIInstruction::OpLLVMDefAspaceCfa:
case MCCFIInstruction::OpLabel:
llvm_unreachable("unsupported CFI opcode");
Expand Down Expand Up @@ -2715,6 +2716,7 @@ struct CFISnapshotDiff : public CFISnapshot {
case MCCFIInstruction::OpAdjustCfaOffset:
case MCCFIInstruction::OpWindowSave:
case MCCFIInstruction::OpNegateRAState:
case MCCFIInstruction::OpNegateRAStateWithPC:
case MCCFIInstruction::OpLLVMDefAspaceCfa:
case MCCFIInstruction::OpLabel:
llvm_unreachable("unsupported CFI opcode");
Expand Down Expand Up @@ -2864,6 +2866,7 @@ BinaryFunction::unwindCFIState(int32_t FromState, int32_t ToState,
case MCCFIInstruction::OpAdjustCfaOffset:
case MCCFIInstruction::OpWindowSave:
case MCCFIInstruction::OpNegateRAState:
case MCCFIInstruction::OpNegateRAStateWithPC:
case MCCFIInstruction::OpLLVMDefAspaceCfa:
case MCCFIInstruction::OpLabel:
llvm_unreachable("unsupported CFI opcode");
Expand Down
74 changes: 74 additions & 0 deletions bolt/lib/Passes/BinaryPasses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "bolt/Core/ParallelUtilities.h"
#include "bolt/Passes/ReorderAlgorithm.h"
#include "bolt/Passes/ReorderFunctions.h"
#include "bolt/Utils/CommandLineOpts.h"
#include "llvm/Support/CommandLine.h"
#include <atomic>
#include <mutex>
Expand Down Expand Up @@ -223,6 +224,18 @@ static cl::opt<unsigned> TopCalledLimit(
"functions section"),
cl::init(100), cl::Hidden, cl::cat(BoltCategory));

// Profile density options, synced with llvm-profgen/ProfileGenerator.cpp
static cl::opt<int> ProfileDensityCutOffHot(
"profile-density-cutoff-hot", cl::init(990000),
cl::desc("Total samples cutoff for functions used to calculate "
"profile density."));

static cl::opt<double> ProfileDensityThreshold(
"profile-density-threshold", cl::init(60),
cl::desc("If the profile density is below the given threshold, it "
"will be suggested to increase the sampling rate."),
cl::Optional);

} // namespace opts

namespace llvm {
Expand Down Expand Up @@ -1383,6 +1396,7 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
uint64_t StaleSampleCount = 0;
uint64_t InferredSampleCount = 0;
std::vector<const BinaryFunction *> ProfiledFunctions;
std::vector<std::pair<double, uint64_t>> FuncDensityList;
const char *StaleFuncsHeader = "BOLT-INFO: Functions with stale profile:\n";
for (auto &BFI : BC.getBinaryFunctions()) {
const BinaryFunction &Function = BFI.second;
Expand Down Expand Up @@ -1441,6 +1455,22 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
StaleSampleCount += SampleCount;
++NumAllStaleFunctions;
}

if (opts::ShowDensity) {
uint64_t Size = Function.getSize();
// In case of BOLT split functions registered in BAT, executed traces are
// automatically attributed to the main fragment. Add up function sizes
// for all fragments.
if (IsHotParentOfBOLTSplitFunction)
for (const BinaryFunction *Fragment : Function.getFragments())
Size += Fragment->getSize();
double Density = (double)1.0 * Function.getSampleCountInBytes() / Size;
FuncDensityList.emplace_back(Density, SampleCount);
LLVM_DEBUG(BC.outs() << Function << ": executed bytes "
<< Function.getSampleCountInBytes() << ", size (b) "
<< Size << ", density " << Density
<< ", sample count " << SampleCount << '\n');
}
}
BC.NumProfiledFuncs = ProfiledFunctions.size();
BC.NumStaleProfileFuncs = NumStaleProfileFunctions;
Expand Down Expand Up @@ -1684,6 +1714,50 @@ Error PrintProgramStats::runOnFunctions(BinaryContext &BC) {
BC.outs() << ". Use -print-unknown to see the list.";
BC.outs() << '\n';
}

if (opts::ShowDensity) {
double Density = 0.0;
// Sorted by the density in descending order.
llvm::stable_sort(FuncDensityList,
[&](const std::pair<double, uint64_t> &A,
const std::pair<double, uint64_t> &B) {
if (A.first != B.first)
return A.first > B.first;
return A.second < B.second;
});

uint64_t AccumulatedSamples = 0;
uint32_t I = 0;
assert(opts::ProfileDensityCutOffHot <= 1000000 &&
"The cutoff value is greater than 1000000(100%)");
while (AccumulatedSamples <
TotalSampleCount *
static_cast<float>(opts::ProfileDensityCutOffHot) /
1000000 &&
I < FuncDensityList.size()) {
AccumulatedSamples += FuncDensityList[I].second;
Density = FuncDensityList[I].first;
I++;
}
if (Density == 0.0) {
BC.errs() << "BOLT-WARNING: the output profile is empty or the "
"--profile-density-cutoff-hot option is "
"set too low. Please check your command.\n";
} else if (Density < opts::ProfileDensityThreshold) {
BC.errs()
<< "BOLT-WARNING: BOLT is estimated to optimize better with "
<< format("%.1f", opts::ProfileDensityThreshold / Density)
<< "x more samples. Please consider increasing sampling rate or "
"profiling for longer duration to get more samples.\n";
}

BC.outs() << "BOLT-INFO: Functions with density >= "
<< format("%.1f", Density) << " account for "
<< format("%.2f",
static_cast<double>(opts::ProfileDensityCutOffHot) /
10000)
<< "% total sample counts.\n";
}
return Error::success();
}

Expand Down
15 changes: 12 additions & 3 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -638,8 +638,12 @@ void DataAggregator::processProfile(BinaryContext &BC) {
: BinaryFunction::PF_LBR;
for (auto &BFI : BC.getBinaryFunctions()) {
BinaryFunction &BF = BFI.second;
if (getBranchData(BF) || getFuncSampleData(BF.getNames()))
FuncBranchData *FBD = getBranchData(BF);
if (FBD || getFuncSampleData(BF.getNames())) {
BF.markProfiled(Flags);
if (FBD)
BF.RawBranchCount = FBD->getNumExecutedBranches();
}
}

for (auto &FuncBranches : NamesToBranches)
Expand Down Expand Up @@ -845,6 +849,12 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
return false;
}

// Set ParentFunc to BAT parent function or FromFunc itself.
BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc);
if (!ParentFunc)
ParentFunc = FromFunc;
ParentFunc->SampleCountInBytes += Count * (Second.From - First.To);

std::optional<BoltAddressTranslation::FallthroughListTy> FTs =
BAT ? BAT->getFallthroughsInTrace(FromFunc->getAddress(), First.To,
Second.From)
Expand All @@ -864,13 +874,12 @@ bool DataAggregator::doTrace(const LBREntry &First, const LBREntry &Second,
<< FromFunc->getPrintName() << ":"
<< Twine::utohexstr(First.To) << " to "
<< Twine::utohexstr(Second.From) << ".\n");
BinaryFunction *ParentFunc = getBATParentFunction(*FromFunc);
for (auto [From, To] : *FTs) {
if (BAT) {
From = BAT->translate(FromFunc->getAddress(), From, /*IsBranchSrc=*/true);
To = BAT->translate(FromFunc->getAddress(), To, /*IsBranchSrc=*/false);
}
doIntraBranch(ParentFunc ? *ParentFunc : *FromFunc, From, To, Count, false);
doIntraBranch(*ParentFunc, From, To, Count, false);
}

return true;
Expand Down
4 changes: 4 additions & 0 deletions bolt/lib/Utils/CommandLineOpts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,10 @@ cl::opt<std::string> SaveProfile("w",
cl::desc("save recorded profile to a file"),
cl::cat(BoltOutputCategory));

cl::opt<bool> ShowDensity("show-density",
cl::desc("show profile density details"),
cl::Optional, cl::cat(AggregatorCategory));

cl::opt<bool> SplitEH("split-eh", cl::desc("split C++ exception handling code"),
cl::Hidden, cl::cat(BoltOptCategory));

Expand Down
16 changes: 15 additions & 1 deletion bolt/test/X86/pre-aggregated-perf.test
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,21 @@ REQUIRES: system-linux

RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe
RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated.txt -w %t.new \
RUN: --profile-use-dfs | FileCheck %s
RUN: --show-density \
RUN: --profile-density-threshold=9 --profile-density-cutoff-hot=970000 \
RUN: --profile-use-dfs | FileCheck %s --check-prefix=CHECK-P2B

CHECK-P2B: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
CHECK-P2B: BOLT-INFO: Functions with density >= 21.7 account for 97.00% total sample counts.

RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated.txt -w %t.new \
RUN: --show-density \
RUN: --profile-density-cutoff-hot=970000 \
RUN: --profile-use-dfs 2>&1 | FileCheck %s --check-prefix=CHECK-WARNING

CHECK-WARNING: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
CHECK-WARNING: BOLT-WARNING: BOLT is estimated to optimize better with 2.8x more samples.
CHECK-WARNING: BOLT-INFO: Functions with density >= 21.7 account for 97.00% total sample counts.

RUN: llvm-bolt %t.exe -data %t -o %t.null | FileCheck %s
RUN: llvm-bolt %t.exe -data %t.new -o %t.null | FileCheck %s
Expand Down
1 change: 1 addition & 0 deletions bolt/tools/driver/llvm-bolt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ void perf2boltMode(int argc, char **argv) {
exit(1);
}
opts::AggregateOnly = true;
opts::ShowDensity = true;
}

void boltDiffMode(int argc, char **argv) {
Expand Down
1 change: 0 additions & 1 deletion clang-tools-extra/clang-tidy/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ clang_target_link_libraries(clangTidy
clangFrontend
clangLex
clangRewrite
clangSema
clangSerialization
clangTooling
clangToolingCore
Expand Down
1 change: 1 addition & 0 deletions clang-tools-extra/clang-tidy/ClangTidy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,7 @@ class ClangTidyASTConsumer : public MultiplexConsumer {
std::unique_ptr<ClangTidyProfiling> Profiling;
std::unique_ptr<ast_matchers::MatchFinder> Finder;
std::vector<std::unique_ptr<ClangTidyCheck>> Checks;
void anchor() override {};
};

} // namespace
Expand Down
3 changes: 3 additions & 0 deletions clang-tools-extra/clang-tidy/bugprone/BugproneTidyModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
#include "MultipleStatementMacroCheck.h"
#include "NoEscapeCheck.h"
#include "NonZeroEnumToBoolConversionCheck.h"
#include "NondeterministicPointerIterationOrderCheck.h"
#include "NotNullTerminatedResultCheck.h"
#include "OptionalValueConversionCheck.h"
#include "ParentVirtualCallCheck.h"
Expand Down Expand Up @@ -174,6 +175,8 @@ class BugproneModule : public ClangTidyModule {
"bugprone-multiple-new-in-one-expression");
CheckFactories.registerCheck<MultipleStatementMacroCheck>(
"bugprone-multiple-statement-macro");
CheckFactories.registerCheck<NondeterministicPointerIterationOrderCheck>(
"bugprone-nondeterministic-pointer-iteration-order");
CheckFactories.registerCheck<OptionalValueConversionCheck>(
"bugprone-optional-value-conversion");
CheckFactories.registerCheck<PointerArithmeticOnPolymorphicObjectCheck>(
Expand Down
1 change: 1 addition & 0 deletions clang-tools-extra/clang-tidy/bugprone/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ add_clang_library(clangTidyBugproneModule STATIC
MultipleNewInOneExpressionCheck.cpp
MultipleStatementMacroCheck.cpp
NoEscapeCheck.cpp
NondeterministicPointerIterationOrderCheck.cpp
NonZeroEnumToBoolConversionCheck.cpp
NotNullTerminatedResultCheck.cpp
OptionalValueConversionCheck.cpp
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
//===----- NondeterministicPointerIterationOrderCheck.cpp - clang-tidy ----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "NondeterministicPointerIterationOrderCheck.h"
#include "clang/AST/ASTContext.h"
#include "clang/Lex/Lexer.h"

using namespace clang::ast_matchers;

namespace clang::tidy::bugprone {

void NondeterministicPointerIterationOrderCheck::registerMatchers(
MatchFinder *Finder) {

auto LoopVariable = varDecl(hasType(
qualType(hasCanonicalType(anyOf(referenceType(), pointerType())))));

auto RangeInit = declRefExpr(to(varDecl(
hasType(recordDecl(hasAnyName("std::unordered_set", "std::unordered_map",
"std::unordered_multiset",
"std::unordered_multimap"))
.bind("recorddecl")))));

Finder->addMatcher(cxxForRangeStmt(hasLoopVariable(LoopVariable),
hasRangeInit(RangeInit.bind("rangeinit")))
.bind("cxxForRangeStmt"),
this);

auto SortFuncM = callee(functionDecl(hasAnyName(
"std::is_sorted", "std::nth_element", "std::sort", "std::partial_sort",
"std::partition", "std::stable_partition", "std::stable_sort")));

auto IteratesPointerEltsM = hasArgument(
0,
cxxMemberCallExpr(on(hasType(cxxRecordDecl(has(fieldDecl(hasType(qualType(
hasCanonicalType(pointsTo(hasCanonicalType(pointerType()))))))))))));

Finder->addMatcher(
callExpr(allOf(SortFuncM, IteratesPointerEltsM)).bind("sortsemantic"),
this);
}

void NondeterministicPointerIterationOrderCheck::check(
const MatchFinder::MatchResult &Result) {
const auto *ForRangePointers =
Result.Nodes.getNodeAs<CXXForRangeStmt>("cxxForRangeStmt");

if ((ForRangePointers) && !(ForRangePointers->getBeginLoc().isMacroID())) {
const auto *RangeInit = Result.Nodes.getNodeAs<Stmt>("rangeinit");
if (const auto *ClassTemplate =
Result.Nodes.getNodeAs<ClassTemplateSpecializationDecl>(
"recorddecl")) {
const TemplateArgumentList &TemplateArgs =
ClassTemplate->getTemplateArgs();
const bool IsAlgoArgPointer =
TemplateArgs[0].getAsType()->isPointerType();

if (IsAlgoArgPointer) {
SourceRange R = RangeInit->getSourceRange();
diag(R.getBegin(), "iteration of pointers is nondeterministic") << R;
}
}
return;
}
const auto *SortPointers = Result.Nodes.getNodeAs<Stmt>("sortsemantic");

if ((SortPointers) && !(SortPointers->getBeginLoc().isMacroID())) {
SourceRange R = SortPointers->getSourceRange();
diag(R.getBegin(), "sorting pointers is nondeterministic") << R;
}
}

} // namespace clang::tidy::bugprone
Loading

0 comments on commit e8be3be

Please sign in to comment.