| //===- bolt/Profile/DataReader.cpp - Perf data reader ---------------------===// | 
 | // | 
 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | 
 | // See https://llvm.org/LICENSE.txt for license information. | 
 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | 
 | // | 
 | //===----------------------------------------------------------------------===// | 
 | // | 
 | // This family of functions reads profile data written by the perf2bolt | 
 | // utility and stores it in memory for llvm-bolt consumption. | 
 | // | 
 | //===----------------------------------------------------------------------===// | 
 |  | 
 | #include "bolt/Profile/DataReader.h" | 
 | #include "bolt/Core/BinaryFunction.h" | 
 | #include "bolt/Passes/MCF.h" | 
 | #include "bolt/Utils/Utils.h" | 
 | #include "llvm/Support/CommandLine.h" | 
 | #include "llvm/Support/Debug.h" | 
 | #include "llvm/Support/Errc.h" | 
 | #include <map> | 
 |  | 
 | #undef  DEBUG_TYPE | 
 | #define DEBUG_TYPE "bolt-prof" | 
 |  | 
 | using namespace llvm; | 
 |  | 
 | namespace opts { | 
 |  | 
 | extern cl::OptionCategory BoltCategory; | 
 | extern llvm::cl::opt<unsigned> Verbosity; | 
 |  | 
 | static cl::opt<bool> | 
 | DumpData("dump-data", | 
 |   cl::desc("dump parsed bolt data for debugging"), | 
 |   cl::Hidden, | 
 |   cl::cat(BoltCategory)); | 
 |  | 
 | } // namespace opts | 
 |  | 
 | namespace llvm { | 
 | namespace bolt { | 
 |  | 
 | namespace { | 
 |  | 
 | /// Return true if the function name can change across compilations. | 
 | bool hasVolatileName(const BinaryFunction &BF) { | 
 |   for (const StringRef &Name : BF.getNames()) | 
 |     if (getLTOCommonName(Name)) | 
 |       return true; | 
 |  | 
 |   return false; | 
 | } | 
 |  | 
 | /// Return standard escaped name of the function possibly renamed by BOLT. | 
 | std::string normalizeName(StringRef NameRef) { | 
 |   // Strip "PG." prefix used for globalized locals. | 
 |   NameRef = NameRef.starts_with("PG.") ? NameRef.substr(2) : NameRef; | 
 |   return getEscapedName(NameRef); | 
 | } | 
 |  | 
 | } // anonymous namespace | 
 |  | 
 | raw_ostream &operator<<(raw_ostream &OS, const Location &Loc) { | 
 |   if (Loc.IsSymbol) { | 
 |     OS << Loc.Name; | 
 |     if (Loc.Offset) | 
 |       OS << "+" << Twine::utohexstr(Loc.Offset); | 
 |   } else { | 
 |     OS << Twine::utohexstr(Loc.Offset); | 
 |   } | 
 |   return OS; | 
 | } | 
 |  | 
 | void FuncBranchData::appendFrom(const FuncBranchData &FBD, uint64_t Offset) { | 
 |   Data.insert(Data.end(), FBD.Data.begin(), FBD.Data.end()); | 
 |   for (auto I = Data.begin(), E = Data.end(); I != E; ++I) { | 
 |     if (I->From.Name == FBD.Name) { | 
 |       I->From.Name = this->Name; | 
 |       I->From.Offset += Offset; | 
 |     } | 
 |     if (I->To.Name == FBD.Name) { | 
 |       I->To.Name = this->Name; | 
 |       I->To.Offset += Offset; | 
 |     } | 
 |   } | 
 |   llvm::stable_sort(Data); | 
 |   ExecutionCount += FBD.ExecutionCount; | 
 |   for (auto I = FBD.EntryData.begin(), E = FBD.EntryData.end(); I != E; ++I) { | 
 |     assert(I->To.Name == FBD.Name); | 
 |     auto NewElmt = EntryData.insert(EntryData.end(), *I); | 
 |     NewElmt->To.Name = this->Name; | 
 |     NewElmt->To.Offset += Offset; | 
 |   } | 
 | } | 
 |  | 
 | uint64_t FuncBranchData::getNumExecutedBranches() const { | 
 |   uint64_t ExecutedBranches = 0; | 
 |   for (const BranchInfo &BI : Data) { | 
 |     int64_t BranchCount = BI.Branches; | 
 |     assert(BranchCount >= 0 && "branch execution count should not be negative"); | 
 |     ExecutedBranches += BranchCount; | 
 |   } | 
 |   return ExecutedBranches; | 
 | } | 
 |  | 
 | void SampleInfo::mergeWith(const SampleInfo &SI) { Hits += SI.Hits; } | 
 |  | 
 | void SampleInfo::print(raw_ostream &OS) const { | 
 |   OS << Loc.IsSymbol << " " << Loc.Name << " " << Twine::utohexstr(Loc.Offset) | 
 |      << " " << Hits << "\n"; | 
 | } | 
 |  | 
 | uint64_t FuncSampleData::getSamples(uint64_t Start, uint64_t End) const { | 
 |   assert(llvm::is_sorted(Data)); | 
 |   struct Compare { | 
 |     bool operator()(const SampleInfo &SI, const uint64_t Val) const { | 
 |       return SI.Loc.Offset < Val; | 
 |     } | 
 |     bool operator()(const uint64_t Val, const SampleInfo &SI) const { | 
 |       return Val < SI.Loc.Offset; | 
 |     } | 
 |   }; | 
 |   uint64_t Result = 0; | 
 |   for (auto I = llvm::lower_bound(Data, Start, Compare()), | 
 |             E = llvm::lower_bound(Data, End, Compare()); | 
 |        I != E; ++I) | 
 |     Result += I->Hits; | 
 |   return Result; | 
 | } | 
 |  | 
 | void FuncSampleData::bumpCount(uint64_t Offset, uint64_t Count) { | 
 |   auto Iter = Index.find(Offset); | 
 |   if (Iter == Index.end()) { | 
 |     Data.emplace_back(Location(true, Name, Offset), Count); | 
 |     Index[Offset] = Data.size() - 1; | 
 |     return; | 
 |   } | 
 |   SampleInfo &SI = Data[Iter->second]; | 
 |   SI.Hits += Count; | 
 | } | 
 |  | 
 | void FuncBranchData::bumpBranchCount(uint64_t OffsetFrom, uint64_t OffsetTo, | 
 |                                      uint64_t Count, uint64_t Mispreds) { | 
 |   auto Iter = IntraIndex[OffsetFrom].find(OffsetTo); | 
 |   if (Iter == IntraIndex[OffsetFrom].end()) { | 
 |     Data.emplace_back(Location(true, Name, OffsetFrom), | 
 |                       Location(true, Name, OffsetTo), Mispreds, Count); | 
 |     IntraIndex[OffsetFrom][OffsetTo] = Data.size() - 1; | 
 |     return; | 
 |   } | 
 |   BranchInfo &BI = Data[Iter->second]; | 
 |   BI.Branches += Count; | 
 |   BI.Mispreds += Mispreds; | 
 | } | 
 |  | 
 | void FuncBranchData::bumpCallCount(uint64_t OffsetFrom, const Location &To, | 
 |                                    uint64_t Count, uint64_t Mispreds) { | 
 |   auto Iter = InterIndex[OffsetFrom].find(To); | 
 |   if (Iter == InterIndex[OffsetFrom].end()) { | 
 |     Data.emplace_back(Location(true, Name, OffsetFrom), To, Mispreds, Count); | 
 |     InterIndex[OffsetFrom][To] = Data.size() - 1; | 
 |     return; | 
 |   } | 
 |   BranchInfo &BI = Data[Iter->second]; | 
 |   BI.Branches += Count; | 
 |   BI.Mispreds += Mispreds; | 
 | } | 
 |  | 
 | void FuncBranchData::bumpEntryCount(const Location &From, uint64_t OffsetTo, | 
 |                                     uint64_t Count, uint64_t Mispreds) { | 
 |   auto Iter = EntryIndex[OffsetTo].find(From); | 
 |   if (Iter == EntryIndex[OffsetTo].end()) { | 
 |     EntryData.emplace_back(From, Location(true, Name, OffsetTo), Mispreds, | 
 |                            Count); | 
 |     EntryIndex[OffsetTo][From] = EntryData.size() - 1; | 
 |     return; | 
 |   } | 
 |   BranchInfo &BI = EntryData[Iter->second]; | 
 |   BI.Branches += Count; | 
 |   BI.Mispreds += Mispreds; | 
 | } | 
 |  | 
 | void BranchInfo::mergeWith(const BranchInfo &BI) { | 
 |   Branches += BI.Branches; | 
 |   Mispreds += BI.Mispreds; | 
 | } | 
 |  | 
 | void BranchInfo::print(raw_ostream &OS) const { | 
 |   OS << From.IsSymbol << " " << From.Name << " " | 
 |      << Twine::utohexstr(From.Offset) << " " << To.IsSymbol << " " << To.Name | 
 |      << " " << Twine::utohexstr(To.Offset) << " " << Mispreds << " " << Branches | 
 |      << '\n'; | 
 | } | 
 |  | 
 | ErrorOr<const BranchInfo &> FuncBranchData::getBranch(uint64_t From, | 
 |                                                       uint64_t To) const { | 
 |   for (const BranchInfo &I : Data) | 
 |     if (I.From.Offset == From && I.To.Offset == To && I.From.Name == I.To.Name) | 
 |       return I; | 
 |  | 
 |   return make_error_code(llvm::errc::invalid_argument); | 
 | } | 
 |  | 
 | ErrorOr<const BranchInfo &> | 
 | FuncBranchData::getDirectCallBranch(uint64_t From) const { | 
 |   // Commented out because it can be expensive. | 
 |   // assert(std::is_sorted(Data.begin(), Data.end())); | 
 |   struct Compare { | 
 |     bool operator()(const BranchInfo &BI, const uint64_t Val) const { | 
 |       return BI.From.Offset < Val; | 
 |     } | 
 |     bool operator()(const uint64_t Val, const BranchInfo &BI) const { | 
 |       return Val < BI.From.Offset; | 
 |     } | 
 |   }; | 
 |   auto Range = std::equal_range(Data.begin(), Data.end(), From, Compare()); | 
 |   for (const auto &RI : llvm::make_range(Range)) | 
 |     if (RI.From.Name != RI.To.Name) | 
 |       return RI; | 
 |  | 
 |   return make_error_code(llvm::errc::invalid_argument); | 
 | } | 
 |  | 
 | void MemInfo::print(raw_ostream &OS) const { | 
 |   OS << (Offset.IsSymbol + 3) << " " << Offset.Name << " " | 
 |      << Twine::utohexstr(Offset.Offset) << " " << (Addr.IsSymbol + 3) << " " | 
 |      << Addr.Name << " " << Twine::utohexstr(Addr.Offset) << " " << Count | 
 |      << "\n"; | 
 | } | 
 |  | 
 | void MemInfo::prettyPrint(raw_ostream &OS) const { | 
 |   OS << "(PC: " << Offset << ", M: " << Addr << ", C: " << Count << ")"; | 
 | } | 
 |  | 
 | void FuncMemData::update(const Location &Offset, const Location &Addr) { | 
 |   auto Iter = EventIndex[Offset.Offset].find(Addr); | 
 |   if (Iter == EventIndex[Offset.Offset].end()) { | 
 |     Data.emplace_back(MemInfo(Offset, Addr, 1)); | 
 |     EventIndex[Offset.Offset][Addr] = Data.size() - 1; | 
 |     return; | 
 |   } | 
 |   ++Data[Iter->second].Count; | 
 | } | 
 |  | 
 | Error DataReader::preprocessProfile(BinaryContext &BC) { | 
 |   if (std::error_code EC = parseInput()) | 
 |     return errorCodeToError(EC); | 
 |  | 
 |   if (opts::DumpData) | 
 |     dump(); | 
 |  | 
 |   if (collectedInBoltedBinary()) | 
 |     outs() << "BOLT-INFO: profile collection done on a binary already " | 
 |               "processed by BOLT\n"; | 
 |  | 
 |   for (auto &BFI : BC.getBinaryFunctions()) { | 
 |     BinaryFunction &Function = BFI.second; | 
 |     if (FuncMemData *MemData = getMemDataForNames(Function.getNames())) { | 
 |       setMemData(Function, MemData); | 
 |       MemData->Used = true; | 
 |     } | 
 |     if (FuncBranchData *FuncData = getBranchDataForNames(Function.getNames())) { | 
 |       setBranchData(Function, FuncData); | 
 |       Function.ExecutionCount = FuncData->ExecutionCount; | 
 |       FuncData->Used = true; | 
 |     } | 
 |   } | 
 |  | 
 |   for (auto &BFI : BC.getBinaryFunctions()) { | 
 |     BinaryFunction &Function = BFI.second; | 
 |     matchProfileMemData(Function); | 
 |   } | 
 |  | 
 |   return Error::success(); | 
 | } | 
 |  | 
 | Error DataReader::readProfilePreCFG(BinaryContext &BC) { | 
 |   for (auto &BFI : BC.getBinaryFunctions()) { | 
 |     BinaryFunction &Function = BFI.second; | 
 |     FuncMemData *MemoryData = getMemData(Function); | 
 |     if (!MemoryData) | 
 |       continue; | 
 |  | 
 |     for (MemInfo &MI : MemoryData->Data) { | 
 |       const uint64_t Offset = MI.Offset.Offset; | 
 |       auto II = Function.Instructions.find(Offset); | 
 |       if (II == Function.Instructions.end()) { | 
 |         // Ignore bad instruction address. | 
 |         continue; | 
 |       } | 
 |  | 
 |       auto &MemAccessProfile = | 
 |           BC.MIB->getOrCreateAnnotationAs<MemoryAccessProfile>( | 
 |               II->second, "MemoryAccessProfile"); | 
 |       BinaryData *BD = nullptr; | 
 |       if (MI.Addr.IsSymbol) | 
 |         BD = BC.getBinaryDataByName(MI.Addr.Name); | 
 |       MemAccessProfile.AddressAccessInfo.push_back( | 
 |           {BD, MI.Addr.Offset, MI.Count}); | 
 |       auto NextII = std::next(II); | 
 |       if (NextII == Function.Instructions.end()) | 
 |         MemAccessProfile.NextInstrOffset = Function.getSize(); | 
 |       else | 
 |         MemAccessProfile.NextInstrOffset = II->first; | 
 |     } | 
 |     Function.HasMemoryProfile = true; | 
 |   } | 
 |  | 
 |   return Error::success(); | 
 | } | 
 |  | 
 | Error DataReader::readProfile(BinaryContext &BC) { | 
 |   for (auto &BFI : BC.getBinaryFunctions()) { | 
 |     BinaryFunction &Function = BFI.second; | 
 |     readProfile(Function); | 
 |   } | 
 |  | 
 |   uint64_t NumUnused = 0; | 
 |   for (const auto &KV : NamesToBranches) { | 
 |     const FuncBranchData &FBD = KV.second; | 
 |     if (!FBD.Used) | 
 |       ++NumUnused; | 
 |   } | 
 |   BC.setNumUnusedProfiledObjects(NumUnused); | 
 |  | 
 |   return Error::success(); | 
 | } | 
 |  | 
 | std::error_code DataReader::parseInput() { | 
 |   ErrorOr<std::unique_ptr<MemoryBuffer>> MB = | 
 |       MemoryBuffer::getFileOrSTDIN(Filename); | 
 |   if (std::error_code EC = MB.getError()) { | 
 |     Diag << "cannot open " << Filename << ": " << EC.message() << "\n"; | 
 |     return EC; | 
 |   } | 
 |   FileBuf = std::move(MB.get()); | 
 |   ParsingBuf = FileBuf->getBuffer(); | 
 |   if (std::error_code EC = parse()) | 
 |     return EC; | 
 |   if (!ParsingBuf.empty()) | 
 |     Diag << "WARNING: invalid profile data detected at line " << Line | 
 |          << ". Possibly corrupted profile.\n"; | 
 |  | 
 |   buildLTONameMaps(); | 
 |  | 
 |   return std::error_code(); | 
 | } | 
 |  | 
 | void DataReader::readProfile(BinaryFunction &BF) { | 
 |   if (BF.empty()) | 
 |     return; | 
 |  | 
 |   if (!hasLBR()) { | 
 |     BF.ProfileFlags = BinaryFunction::PF_SAMPLE; | 
 |     readSampleData(BF); | 
 |     return; | 
 |   } | 
 |  | 
 |   BF.ProfileFlags = BinaryFunction::PF_LBR; | 
 |  | 
 |   // Possibly assign/re-assign branch profile data. | 
 |   matchProfileData(BF); | 
 |  | 
 |   FuncBranchData *FBD = getBranchData(BF); | 
 |   if (!FBD) | 
 |     return; | 
 |  | 
 |   // Assign basic block counts to function entry points. These only include | 
 |   // counts for outside entries. | 
 |   // | 
 |   // There is a slight skew introduced here as branches originated from RETs | 
 |   // may be accounted for in the execution count of an entry block if the last | 
 |   // instruction in a predecessor fall-through block is a call. This situation | 
 |   // should rarely happen because there are few multiple-entry functions. | 
 |   for (const BranchInfo &BI : FBD->EntryData) { | 
 |     BinaryBasicBlock *BB = BF.getBasicBlockAtOffset(BI.To.Offset); | 
 |     if (BB && (BB->isEntryPoint() || BB->isLandingPad())) { | 
 |       uint64_t Count = BB->getExecutionCount(); | 
 |       if (Count == BinaryBasicBlock::COUNT_NO_PROFILE) | 
 |         Count = 0; | 
 |       BB->setExecutionCount(Count + BI.Branches); | 
 |     } | 
 |   } | 
 |  | 
 |   for (const BranchInfo &BI : FBD->Data) { | 
 |     if (BI.From.Name != BI.To.Name) | 
 |       continue; | 
 |  | 
 |     if (!recordBranch(BF, BI.From.Offset, BI.To.Offset, BI.Branches, | 
 |                       BI.Mispreds)) { | 
 |       LLVM_DEBUG(dbgs() << "bad branch : " << BI.From.Offset << " -> " | 
 |                         << BI.To.Offset << '\n'); | 
 |     } | 
 |   } | 
 |  | 
 |   // Convert branch data into annotations. | 
 |   convertBranchData(BF); | 
 | } | 
 |  | 
 | void DataReader::matchProfileData(BinaryFunction &BF) { | 
 |   // This functionality is available for LBR-mode only | 
 |   // TODO: Implement evaluateProfileData() for samples, checking whether | 
 |   // sample addresses match instruction addresses in the function | 
 |   if (!hasLBR()) | 
 |     return; | 
 |  | 
 |   FuncBranchData *FBD = getBranchData(BF); | 
 |   if (FBD) { | 
 |     BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD); | 
 |     BF.RawBranchCount = FBD->getNumExecutedBranches(); | 
 |     if (BF.ProfileMatchRatio == 1.0f) { | 
 |       if (fetchProfileForOtherEntryPoints(BF)) { | 
 |         BF.ProfileMatchRatio = evaluateProfileData(BF, *FBD); | 
 |         BF.ExecutionCount = FBD->ExecutionCount; | 
 |         BF.RawBranchCount = FBD->getNumExecutedBranches(); | 
 |       } | 
 |       return; | 
 |     } | 
 |   } | 
 |  | 
 |   // Check if the function name can fluctuate between several compilations | 
 |   // possibly triggered by minor unrelated code changes in the source code | 
 |   // of the input binary. | 
 |   if (!hasVolatileName(BF)) | 
 |     return; | 
 |  | 
 |   // Check for a profile that matches with 100% confidence. | 
 |   const std::vector<FuncBranchData *> AllBranchData = | 
 |       getBranchDataForNamesRegex(BF.getNames()); | 
 |   for (FuncBranchData *NewBranchData : AllBranchData) { | 
 |     // Prevent functions from sharing the same profile. | 
 |     if (NewBranchData->Used) | 
 |       continue; | 
 |  | 
 |     if (evaluateProfileData(BF, *NewBranchData) != 1.0f) | 
 |       continue; | 
 |  | 
 |     if (FBD) | 
 |       FBD->Used = false; | 
 |  | 
 |     // Update function profile data with the new set. | 
 |     setBranchData(BF, NewBranchData); | 
 |     NewBranchData->Used = true; | 
 |     BF.ExecutionCount = NewBranchData->ExecutionCount; | 
 |     BF.ProfileMatchRatio = 1.0f; | 
 |     break; | 
 |   } | 
 | } | 
 |  | 
 | void DataReader::matchProfileMemData(BinaryFunction &BF) { | 
 |   const std::vector<FuncMemData *> AllMemData = | 
 |       getMemDataForNamesRegex(BF.getNames()); | 
 |   for (FuncMemData *NewMemData : AllMemData) { | 
 |     // Prevent functions from sharing the same profile. | 
 |     if (NewMemData->Used) | 
 |       continue; | 
 |  | 
 |     if (FuncMemData *MD = getMemData(BF)) | 
 |       MD->Used = false; | 
 |  | 
 |     // Update function profile data with the new set. | 
 |     setMemData(BF, NewMemData); | 
 |     NewMemData->Used = true; | 
 |     break; | 
 |   } | 
 | } | 
 |  | 
 | bool DataReader::fetchProfileForOtherEntryPoints(BinaryFunction &BF) { | 
 |   BinaryContext &BC = BF.getBinaryContext(); | 
 |  | 
 |   FuncBranchData *FBD = getBranchData(BF); | 
 |   if (!FBD) | 
 |     return false; | 
 |  | 
 |   // Check if we are missing profiling data for secondary entry points | 
 |   bool First = true; | 
 |   bool Updated = false; | 
 |   for (BinaryBasicBlock *BB : BF.BasicBlocks) { | 
 |     if (First) { | 
 |       First = false; | 
 |       continue; | 
 |     } | 
 |     if (BB->isEntryPoint()) { | 
 |       uint64_t EntryAddress = BB->getOffset() + BF.getAddress(); | 
 |       // Look for branch data associated with this entry point | 
 |       if (BinaryData *BD = BC.getBinaryDataAtAddress(EntryAddress)) { | 
 |         if (FuncBranchData *Data = getBranchDataForSymbols(BD->getSymbols())) { | 
 |           FBD->appendFrom(*Data, BB->getOffset()); | 
 |           Data->Used = true; | 
 |           Updated = true; | 
 |         } | 
 |       } | 
 |     } | 
 |   } | 
 |  | 
 |   return Updated; | 
 | } | 
 |  | 
 | float DataReader::evaluateProfileData(BinaryFunction &BF, | 
 |                                       const FuncBranchData &BranchData) const { | 
 |   BinaryContext &BC = BF.getBinaryContext(); | 
 |  | 
 |   // Until we define a minimal profile, we consider an empty branch data to be | 
 |   // a valid profile. It could happen to a function without branches when we | 
 |   // still have an EntryData for the execution count. | 
 |   if (BranchData.Data.empty()) | 
 |     return 1.0f; | 
 |  | 
 |   uint64_t NumMatchedBranches = 0; | 
 |   for (const BranchInfo &BI : BranchData.Data) { | 
 |     bool IsValid = false; | 
 |     if (BI.From.Name == BI.To.Name) { | 
 |       // Try to record information with 0 count. | 
 |       IsValid = recordBranch(BF, BI.From.Offset, BI.To.Offset, 0); | 
 |     } else if (collectedInBoltedBinary()) { | 
 |       // We can't check branch source for collections in bolted binaries because | 
 |       // the source of the branch may be mapped to the first instruction in a BB | 
 |       // instead of the original branch (which may not exist in the source bin). | 
 |       IsValid = true; | 
 |     } else { | 
 |       // The branch has to originate from this function. | 
 |       // Check for calls, tail calls, rets and indirect branches. | 
 |       // When matching profiling info, we did not reach the stage | 
 |       // when we identify tail calls, so they are still represented | 
 |       // by regular branch instructions and we need isBranch() here. | 
 |       MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset); | 
 |       // If it's a prefix - skip it. | 
 |       if (Instr && BC.MIB->isPrefix(*Instr)) | 
 |         Instr = BF.getInstructionAtOffset(BI.From.Offset + 1); | 
 |       if (Instr && (BC.MIB->isCall(*Instr) || BC.MIB->isBranch(*Instr) || | 
 |                     BC.MIB->isReturn(*Instr))) | 
 |         IsValid = true; | 
 |     } | 
 |  | 
 |     if (IsValid) { | 
 |       ++NumMatchedBranches; | 
 |       continue; | 
 |     } | 
 |  | 
 |     LLVM_DEBUG(dbgs() << "\tinvalid branch in " << BF << " : 0x" | 
 |                       << Twine::utohexstr(BI.From.Offset) << " -> "; | 
 |                if (BI.From.Name == BI.To.Name) dbgs() | 
 |                << "0x" << Twine::utohexstr(BI.To.Offset) << '\n'; | 
 |                else dbgs() << "<outbounds>\n";); | 
 |   } | 
 |  | 
 |   const float MatchRatio = (float)NumMatchedBranches / BranchData.Data.size(); | 
 |   if (opts::Verbosity >= 2 && NumMatchedBranches < BranchData.Data.size()) | 
 |     errs() << "BOLT-WARNING: profile branches match only " | 
 |            << format("%.1f%%", MatchRatio * 100.0f) << " (" | 
 |            << NumMatchedBranches << '/' << BranchData.Data.size() | 
 |            << ") for function " << BF << '\n'; | 
 |  | 
 |   return MatchRatio; | 
 | } | 
 |  | 
 | void DataReader::readSampleData(BinaryFunction &BF) { | 
 |   FuncSampleData *SampleDataOrErr = getFuncSampleData(BF.getNames()); | 
 |   if (!SampleDataOrErr) | 
 |     return; | 
 |  | 
 |   // Basic samples mode territory (without LBR info) | 
 |   // First step is to assign BB execution count based on samples from perf | 
 |   BF.ProfileMatchRatio = 1.0f; | 
 |   BF.removeTagsFromProfile(); | 
 |   bool NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions"); | 
 |   bool NormalizeByCalls = usesEvent("branches"); | 
 |   static bool NagUser = true; | 
 |   if (NagUser) { | 
 |     outs() | 
 |         << "BOLT-INFO: operating with basic samples profiling data (no LBR).\n"; | 
 |     if (NormalizeByInsnCount) | 
 |       outs() << "BOLT-INFO: normalizing samples by instruction count.\n"; | 
 |     else if (NormalizeByCalls) | 
 |       outs() << "BOLT-INFO: normalizing samples by branches.\n"; | 
 |  | 
 |     NagUser = false; | 
 |   } | 
 |   uint64_t LastOffset = BF.getSize(); | 
 |   uint64_t TotalEntryCount = 0; | 
 |   for (BinaryFunction::BasicBlockOffset &BBOffset : | 
 |        llvm::reverse(BF.BasicBlockOffsets)) { | 
 |     uint64_t CurOffset = BBOffset.first; | 
 |     // Always work with samples multiplied by 1000 to avoid losing them if we | 
 |     // later need to normalize numbers | 
 |     uint64_t NumSamples = | 
 |         SampleDataOrErr->getSamples(CurOffset, LastOffset) * 1000; | 
 |     if (NormalizeByInsnCount && BBOffset.second->getNumNonPseudos()) { | 
 |       NumSamples /= BBOffset.second->getNumNonPseudos(); | 
 |     } else if (NormalizeByCalls) { | 
 |       uint32_t NumCalls = BBOffset.second->getNumCalls(); | 
 |       NumSamples /= NumCalls + 1; | 
 |     } | 
 |     BBOffset.second->setExecutionCount(NumSamples); | 
 |     if (BBOffset.second->isEntryPoint()) | 
 |       TotalEntryCount += NumSamples; | 
 |     LastOffset = CurOffset; | 
 |   } | 
 |  | 
 |   BF.ExecutionCount = TotalEntryCount; | 
 |  | 
 |   estimateEdgeCounts(BF); | 
 | } | 
 |  | 
 | void DataReader::convertBranchData(BinaryFunction &BF) const { | 
 |   BinaryContext &BC = BF.getBinaryContext(); | 
 |  | 
 |   if (BF.empty()) | 
 |     return; | 
 |  | 
 |   FuncBranchData *FBD = getBranchData(BF); | 
 |   if (!FBD) | 
 |     return; | 
 |  | 
 |   // Profile information for calls. | 
 |   // | 
 |   // There are 3 cases that we annotate differently: | 
 |   //   1) Conditional tail calls that could be mispredicted. | 
 |   //   2) Indirect calls to multiple destinations with mispredictions. | 
 |   //      Before we validate CFG we have to handle indirect branches here too. | 
 |   //   3) Regular direct calls. The count could be different from containing | 
 |   //      basic block count. Keep this data in case we find it useful. | 
 |   // | 
 |   for (BranchInfo &BI : FBD->Data) { | 
 |     // Ignore internal branches. | 
 |     if (BI.To.IsSymbol && BI.To.Name == BI.From.Name && BI.To.Offset != 0) | 
 |       continue; | 
 |  | 
 |     MCInst *Instr = BF.getInstructionAtOffset(BI.From.Offset); | 
 |     if (!Instr || | 
 |         (!BC.MIB->isCall(*Instr) && !BC.MIB->isIndirectBranch(*Instr))) | 
 |       continue; | 
 |  | 
 |     auto setOrUpdateAnnotation = [&](StringRef Name, uint64_t Count) { | 
 |       if (opts::Verbosity >= 1 && BC.MIB->hasAnnotation(*Instr, Name)) | 
 |         errs() << "BOLT-WARNING: duplicate " << Name << " info for offset 0x" | 
 |                << Twine::utohexstr(BI.From.Offset) << " in function " << BF | 
 |                << '\n'; | 
 |       auto &Value = BC.MIB->getOrCreateAnnotationAs<uint64_t>(*Instr, Name); | 
 |       Value += Count; | 
 |     }; | 
 |  | 
 |     if (BC.MIB->isIndirectCall(*Instr) || BC.MIB->isIndirectBranch(*Instr)) { | 
 |       IndirectCallSiteProfile &CSP = | 
 |           BC.MIB->getOrCreateAnnotationAs<IndirectCallSiteProfile>( | 
 |               *Instr, "CallProfile"); | 
 |       MCSymbol *CalleeSymbol = nullptr; | 
 |       if (BI.To.IsSymbol) { | 
 |         if (BinaryData *BD = BC.getBinaryDataByName(BI.To.Name)) | 
 |           CalleeSymbol = BD->getSymbol(); | 
 |       } | 
 |       CSP.emplace_back(CalleeSymbol, BI.Branches, BI.Mispreds); | 
 |     } else if (BC.MIB->getConditionalTailCall(*Instr)) { | 
 |       setOrUpdateAnnotation("CTCTakenCount", BI.Branches); | 
 |       setOrUpdateAnnotation("CTCMispredCount", BI.Mispreds); | 
 |     } else { | 
 |       setOrUpdateAnnotation("Count", BI.Branches); | 
 |     } | 
 |   } | 
 | } | 
 |  | 
 | bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To, | 
 |                               uint64_t Count, uint64_t Mispreds) const { | 
 |   BinaryContext &BC = BF.getBinaryContext(); | 
 |  | 
 |   BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From); | 
 |   const BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To); | 
 |  | 
 |   if (!FromBB || !ToBB) { | 
 |     LLVM_DEBUG(dbgs() << "failed to get block for recorded branch\n"); | 
 |     return false; | 
 |   } | 
 |  | 
 |   // Could be bad LBR data; ignore the branch. In the case of data collected | 
 |   // in binaries optimized by BOLT, a source BB may be mapped to two output | 
 |   // BBs as a result of optimizations. In that case, a branch between these | 
 |   // two will be recorded as a branch from A going to A in the source address | 
 |   // space. Keep processing. | 
 |   if (From == To) | 
 |     return true; | 
 |  | 
 |   // Return from a tail call. | 
 |   if (FromBB->succ_size() == 0) | 
 |     return true; | 
 |  | 
 |   // Very rarely we will see ignored branches. Do a linear check. | 
 |   for (std::pair<uint32_t, uint32_t> &Branch : BF.IgnoredBranches) | 
 |     if (Branch == | 
 |         std::make_pair(static_cast<uint32_t>(From), static_cast<uint32_t>(To))) | 
 |       return true; | 
 |  | 
 |   bool OffsetMatches = !!(To == ToBB->getOffset()); | 
 |   if (!OffsetMatches) { | 
 |     // Skip the nops to support old .fdata | 
 |     uint64_t Offset = ToBB->getOffset(); | 
 |     for (const MCInst &Instr : *ToBB) { | 
 |       if (!BC.MIB->isNoop(Instr)) | 
 |         break; | 
 |  | 
 |       if (std::optional<uint32_t> Size = BC.MIB->getSize(Instr)) | 
 |         Offset += *Size; | 
 |     } | 
 |  | 
 |     if (To == Offset) | 
 |       OffsetMatches = true; | 
 |   } | 
 |  | 
 |   if (!OffsetMatches) { | 
 |     // "To" could be referring to nop instructions in between 2 basic blocks. | 
 |     // While building the CFG we make sure these nops are attributed to the | 
 |     // previous basic block, thus we check if the destination belongs to the | 
 |     // gap past the last instruction. | 
 |     const MCInst *LastInstr = ToBB->getLastNonPseudoInstr(); | 
 |     if (LastInstr) { | 
 |       const uint32_t LastInstrOffset = | 
 |           BC.MIB->getOffsetWithDefault(*LastInstr, 0); | 
 |  | 
 |       // With old .fdata we are getting FT branches for "jcc,jmp" sequences. | 
 |       if (To == LastInstrOffset && BC.MIB->isUnconditionalBranch(*LastInstr)) | 
 |         return true; | 
 |  | 
 |       if (To <= LastInstrOffset) { | 
 |         LLVM_DEBUG(dbgs() << "branch recorded into the middle of the block" | 
 |                           << " in " << BF << " : " << From << " -> " << To | 
 |                           << '\n'); | 
 |         return false; | 
 |       } | 
 |     } | 
 |  | 
 |     // The real destination is the layout successor of the detected ToBB. | 
 |     if (ToBB == BF.getLayout().block_back()) | 
 |       return false; | 
 |     const BinaryBasicBlock *NextBB = | 
 |         BF.getLayout().getBlock(ToBB->getIndex() + 1); | 
 |     assert((NextBB && NextBB->getOffset() > ToBB->getOffset()) && "bad layout"); | 
 |     ToBB = NextBB; | 
 |   } | 
 |  | 
 |   // If there's no corresponding instruction for 'From', we have probably | 
 |   // discarded it as a FT from __builtin_unreachable. | 
 |   MCInst *FromInstruction = BF.getInstructionAtOffset(From); | 
 |   if (!FromInstruction) { | 
 |     // If the data was collected in a bolted binary, the From addresses may be | 
 |     // translated to the first instruction of the source BB if BOLT inserted | 
 |     // a new branch that did not exist in the source (we can't map it to the | 
 |     // source instruction, so we map it to the first instr of source BB). | 
 |     // We do not keep offsets for random instructions. So the check above will | 
 |     // evaluate to true if the first instr is not a branch (call/jmp/ret/etc) | 
 |     if (collectedInBoltedBinary()) { | 
 |       if (FromBB->getInputOffset() != From) { | 
 |         LLVM_DEBUG(dbgs() << "offset " << From << " does not match a BB in " | 
 |                           << BF << '\n'); | 
 |         return false; | 
 |       } | 
 |       FromInstruction = nullptr; | 
 |     } else { | 
 |       LLVM_DEBUG(dbgs() << "no instruction for offset " << From << " in " << BF | 
 |                         << '\n'); | 
 |       return false; | 
 |     } | 
 |   } | 
 |  | 
 |   if (!FromBB->getSuccessor(ToBB->getLabel())) { | 
 |     // Check if this is a recursive call or a return from a recursive call. | 
 |     if (FromInstruction && ToBB->isEntryPoint() && | 
 |         (BC.MIB->isCall(*FromInstruction) || | 
 |          BC.MIB->isIndirectBranch(*FromInstruction))) { | 
 |       // Execution count is already accounted for. | 
 |       return true; | 
 |     } | 
 |     // For data collected in a bolted binary, we may have created two output BBs | 
 |     // that map to one original block. Branches between these two blocks will | 
 |     // appear here as one BB jumping to itself, even though it has no loop | 
 |     // edges. Ignore these. | 
 |     if (collectedInBoltedBinary() && FromBB == ToBB) | 
 |       return true; | 
 |  | 
 |     BinaryBasicBlock *FTSuccessor = FromBB->getConditionalSuccessor(false); | 
 |     if (FTSuccessor && FTSuccessor->succ_size() == 1 && | 
 |         FTSuccessor->getSuccessor(ToBB->getLabel())) { | 
 |       BinaryBasicBlock::BinaryBranchInfo &FTBI = | 
 |           FTSuccessor->getBranchInfo(*ToBB); | 
 |       FTBI.Count += Count; | 
 |       if (Count) | 
 |         FTBI.MispredictedCount += Mispreds; | 
 |       ToBB = FTSuccessor; | 
 |     } else { | 
 |       LLVM_DEBUG(dbgs() << "invalid branch in " << BF | 
 |                         << formatv(": {0:x} -> {1:x}\n", From, To)); | 
 |       return false; | 
 |     } | 
 |   } | 
 |  | 
 |   BinaryBasicBlock::BinaryBranchInfo &BI = FromBB->getBranchInfo(*ToBB); | 
 |   BI.Count += Count; | 
 |   // Only update mispredicted count if it the count was real. | 
 |   if (Count) { | 
 |     BI.MispredictedCount += Mispreds; | 
 |   } | 
 |  | 
 |   return true; | 
 | } | 
 |  | 
 | void DataReader::reportError(StringRef ErrorMsg) { | 
 |   Diag << "Error reading BOLT data input file: line " << Line << ", column " | 
 |        << Col << ": " << ErrorMsg << '\n'; | 
 | } | 
 |  | 
 | bool DataReader::expectAndConsumeFS() { | 
 |   if (ParsingBuf[0] != FieldSeparator) { | 
 |     reportError("expected field separator"); | 
 |     return false; | 
 |   } | 
 |   ParsingBuf = ParsingBuf.drop_front(1); | 
 |   Col += 1; | 
 |   return true; | 
 | } | 
 |  | 
 | void DataReader::consumeAllRemainingFS() { | 
 |   while (ParsingBuf[0] == FieldSeparator) { | 
 |     ParsingBuf = ParsingBuf.drop_front(1); | 
 |     Col += 1; | 
 |   } | 
 | } | 
 |  | 
 | bool DataReader::checkAndConsumeNewLine() { | 
 |   if (ParsingBuf[0] != '\n') | 
 |     return false; | 
 |  | 
 |   ParsingBuf = ParsingBuf.drop_front(1); | 
 |   Col = 0; | 
 |   Line += 1; | 
 |   return true; | 
 | } | 
 |  | 
 | ErrorOr<StringRef> DataReader::parseString(char EndChar, bool EndNl) { | 
 |   if (EndChar == '\\') { | 
 |     reportError("EndChar could not be backslash"); | 
 |     return make_error_code(llvm::errc::io_error); | 
 |   } | 
 |  | 
 |   std::string EndChars(1, EndChar); | 
 |   EndChars.push_back('\\'); | 
 |   if (EndNl) | 
 |     EndChars.push_back('\n'); | 
 |  | 
 |   size_t StringEnd = 0; | 
 |   do { | 
 |     StringEnd = ParsingBuf.find_first_of(EndChars, StringEnd); | 
 |     if (StringEnd == StringRef::npos || | 
 |         (StringEnd == 0 && ParsingBuf[StringEnd] != '\\')) { | 
 |       reportError("malformed field"); | 
 |       return make_error_code(llvm::errc::io_error); | 
 |     } | 
 |  | 
 |     if (ParsingBuf[StringEnd] != '\\') | 
 |       break; | 
 |  | 
 |     StringEnd += 2; | 
 |   } while (true); | 
 |  | 
 |   StringRef Str = ParsingBuf.substr(0, StringEnd); | 
 |  | 
 |   // If EndNl was set and nl was found instead of EndChar, do not consume the | 
 |   // new line. | 
 |   bool EndNlInsteadOfEndChar = ParsingBuf[StringEnd] == '\n' && EndChar != '\n'; | 
 |   unsigned End = EndNlInsteadOfEndChar ? StringEnd : StringEnd + 1; | 
 |  | 
 |   ParsingBuf = ParsingBuf.drop_front(End); | 
 |   if (EndChar == '\n') { | 
 |     Col = 0; | 
 |     Line += 1; | 
 |   } else { | 
 |     Col += End; | 
 |   } | 
 |   return Str; | 
 | } | 
 |  | 
 | ErrorOr<int64_t> DataReader::parseNumberField(char EndChar, bool EndNl) { | 
 |   ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl); | 
 |   if (std::error_code EC = NumStrRes.getError()) | 
 |     return EC; | 
 |   StringRef NumStr = NumStrRes.get(); | 
 |   int64_t Num; | 
 |   if (NumStr.getAsInteger(10, Num)) { | 
 |     reportError("expected decimal number"); | 
 |     Diag << "Found: " << NumStr << "\n"; | 
 |     return make_error_code(llvm::errc::io_error); | 
 |   } | 
 |   return Num; | 
 | } | 
 |  | 
 | ErrorOr<uint64_t> DataReader::parseHexField(char EndChar, bool EndNl) { | 
 |   ErrorOr<StringRef> NumStrRes = parseString(EndChar, EndNl); | 
 |   if (std::error_code EC = NumStrRes.getError()) | 
 |     return EC; | 
 |   StringRef NumStr = NumStrRes.get(); | 
 |   uint64_t Num; | 
 |   if (NumStr.getAsInteger(16, Num)) { | 
 |     reportError("expected hexidecimal number"); | 
 |     Diag << "Found: " << NumStr << "\n"; | 
 |     return make_error_code(llvm::errc::io_error); | 
 |   } | 
 |   return Num; | 
 | } | 
 |  | 
 | ErrorOr<Location> DataReader::parseLocation(char EndChar, bool EndNl, | 
 |                                             bool ExpectMemLoc) { | 
 |   // Read whether the location of the branch should be DSO or a symbol | 
 |   // 0 means it is a DSO. 1 means it is a global symbol. 2 means it is a local | 
 |   // symbol. | 
 |   // The symbol flag is also used to tag memory load events by adding 3 to the | 
 |   // base values, i.e. 3 not a symbol, 4 global symbol and 5 local symbol. | 
 |   if (!ExpectMemLoc && ParsingBuf[0] != '0' && ParsingBuf[0] != '1' && | 
 |       ParsingBuf[0] != '2') { | 
 |     reportError("expected 0, 1 or 2"); | 
 |     return make_error_code(llvm::errc::io_error); | 
 |   } | 
 |  | 
 |   if (ExpectMemLoc && ParsingBuf[0] != '3' && ParsingBuf[0] != '4' && | 
 |       ParsingBuf[0] != '5') { | 
 |     reportError("expected 3, 4 or 5"); | 
 |     return make_error_code(llvm::errc::io_error); | 
 |   } | 
 |  | 
 |   bool IsSymbol = | 
 |       (!ExpectMemLoc && (ParsingBuf[0] == '1' || ParsingBuf[0] == '2')) || | 
 |       (ExpectMemLoc && (ParsingBuf[0] == '4' || ParsingBuf[0] == '5')); | 
 |   ParsingBuf = ParsingBuf.drop_front(1); | 
 |   Col += 1; | 
 |  | 
 |   if (!expectAndConsumeFS()) | 
 |     return make_error_code(llvm::errc::io_error); | 
 |   consumeAllRemainingFS(); | 
 |  | 
 |   // Read the string containing the symbol or the DSO name | 
 |   ErrorOr<StringRef> NameRes = parseString(FieldSeparator); | 
 |   if (std::error_code EC = NameRes.getError()) | 
 |     return EC; | 
 |   StringRef Name = NameRes.get(); | 
 |   consumeAllRemainingFS(); | 
 |  | 
 |   // Read the offset | 
 |   ErrorOr<uint64_t> Offset = parseHexField(EndChar, EndNl); | 
 |   if (std::error_code EC = Offset.getError()) | 
 |     return EC; | 
 |  | 
 |   return Location(IsSymbol, Name, Offset.get()); | 
 | } | 
 |  | 
 | ErrorOr<BranchInfo> DataReader::parseBranchInfo() { | 
 |   ErrorOr<Location> Res = parseLocation(FieldSeparator); | 
 |   if (std::error_code EC = Res.getError()) | 
 |     return EC; | 
 |   Location From = Res.get(); | 
 |  | 
 |   consumeAllRemainingFS(); | 
 |   Res = parseLocation(FieldSeparator); | 
 |   if (std::error_code EC = Res.getError()) | 
 |     return EC; | 
 |   Location To = Res.get(); | 
 |  | 
 |   consumeAllRemainingFS(); | 
 |   ErrorOr<int64_t> MRes = parseNumberField(FieldSeparator); | 
 |   if (std::error_code EC = MRes.getError()) | 
 |     return EC; | 
 |   int64_t NumMispreds = MRes.get(); | 
 |  | 
 |   consumeAllRemainingFS(); | 
 |   ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true); | 
 |   if (std::error_code EC = BRes.getError()) | 
 |     return EC; | 
 |   int64_t NumBranches = BRes.get(); | 
 |  | 
 |   consumeAllRemainingFS(); | 
 |   if (!checkAndConsumeNewLine()) { | 
 |     reportError("expected end of line"); | 
 |     return make_error_code(llvm::errc::io_error); | 
 |   } | 
 |  | 
 |   return BranchInfo(std::move(From), std::move(To), NumMispreds, NumBranches); | 
 | } | 
 |  | 
 | ErrorOr<MemInfo> DataReader::parseMemInfo() { | 
 |   ErrorOr<Location> Res = parseMemLocation(FieldSeparator); | 
 |   if (std::error_code EC = Res.getError()) | 
 |     return EC; | 
 |   Location Offset = Res.get(); | 
 |  | 
 |   consumeAllRemainingFS(); | 
 |   Res = parseMemLocation(FieldSeparator); | 
 |   if (std::error_code EC = Res.getError()) | 
 |     return EC; | 
 |   Location Addr = Res.get(); | 
 |  | 
 |   consumeAllRemainingFS(); | 
 |   ErrorOr<int64_t> CountRes = parseNumberField(FieldSeparator, true); | 
 |   if (std::error_code EC = CountRes.getError()) | 
 |     return EC; | 
 |  | 
 |   consumeAllRemainingFS(); | 
 |   if (!checkAndConsumeNewLine()) { | 
 |     reportError("expected end of line"); | 
 |     return make_error_code(llvm::errc::io_error); | 
 |   } | 
 |  | 
 |   return MemInfo(Offset, Addr, CountRes.get()); | 
 | } | 
 |  | 
 | ErrorOr<SampleInfo> DataReader::parseSampleInfo() { | 
 |   ErrorOr<Location> Res = parseLocation(FieldSeparator); | 
 |   if (std::error_code EC = Res.getError()) | 
 |     return EC; | 
 |   Location Address = Res.get(); | 
 |  | 
 |   consumeAllRemainingFS(); | 
 |   ErrorOr<int64_t> BRes = parseNumberField(FieldSeparator, /* EndNl = */ true); | 
 |   if (std::error_code EC = BRes.getError()) | 
 |     return EC; | 
 |   int64_t Occurrences = BRes.get(); | 
 |  | 
 |   consumeAllRemainingFS(); | 
 |   if (!checkAndConsumeNewLine()) { | 
 |     reportError("expected end of line"); | 
 |     return make_error_code(llvm::errc::io_error); | 
 |   } | 
 |  | 
 |   return SampleInfo(std::move(Address), Occurrences); | 
 | } | 
 |  | 
 | ErrorOr<bool> DataReader::maybeParseNoLBRFlag() { | 
 |   if (ParsingBuf.size() < 6 || ParsingBuf.substr(0, 6) != "no_lbr") | 
 |     return false; | 
 |   ParsingBuf = ParsingBuf.drop_front(6); | 
 |   Col += 6; | 
 |  | 
 |   if (ParsingBuf.size() > 0 && ParsingBuf[0] == ' ') | 
 |     ParsingBuf = ParsingBuf.drop_front(1); | 
 |  | 
 |   while (ParsingBuf.size() > 0 && ParsingBuf[0] != '\n') { | 
 |     ErrorOr<StringRef> EventName = parseString(' ', true); | 
 |     if (!EventName) | 
 |       return make_error_code(llvm::errc::io_error); | 
 |     EventNames.insert(EventName.get()); | 
 |   } | 
 |  | 
 |   if (!checkAndConsumeNewLine()) { | 
 |     reportError("malformed no_lbr line"); | 
 |     return make_error_code(llvm::errc::io_error); | 
 |   } | 
 |   return true; | 
 | } | 
 |  | 
 | ErrorOr<bool> DataReader::maybeParseBATFlag() { | 
 |   if (ParsingBuf.size() < 16 || ParsingBuf.substr(0, 16) != "boltedcollection") | 
 |     return false; | 
 |   ParsingBuf = ParsingBuf.drop_front(16); | 
 |   Col += 16; | 
 |  | 
 |   if (!checkAndConsumeNewLine()) { | 
 |     reportError("malformed boltedcollection line"); | 
 |     return make_error_code(llvm::errc::io_error); | 
 |   } | 
 |   return true; | 
 | } | 
 |  | 
 | bool DataReader::hasBranchData() { | 
 |   if (ParsingBuf.size() == 0) | 
 |     return false; | 
 |  | 
 |   if (ParsingBuf[0] == '0' || ParsingBuf[0] == '1' || ParsingBuf[0] == '2') | 
 |     return true; | 
 |   return false; | 
 | } | 
 |  | 
 | bool DataReader::hasMemData() { | 
 |   if (ParsingBuf.size() == 0) | 
 |     return false; | 
 |  | 
 |   if (ParsingBuf[0] == '3' || ParsingBuf[0] == '4' || ParsingBuf[0] == '5') | 
 |     return true; | 
 |   return false; | 
 | } | 
 |  | 
 | std::error_code DataReader::parseInNoLBRMode() { | 
 |   auto GetOrCreateFuncEntry = [&](StringRef Name) { | 
 |     auto I = NamesToSamples.find(Name); | 
 |     if (I == NamesToSamples.end()) { | 
 |       bool Success; | 
 |       std::tie(I, Success) = NamesToSamples.insert(std::make_pair( | 
 |           Name, FuncSampleData(Name, FuncSampleData::ContainerTy()))); | 
 |  | 
 |       assert(Success && "unexpected result of insert"); | 
 |     } | 
 |     return I; | 
 |   }; | 
 |  | 
 |   auto GetOrCreateFuncMemEntry = [&](StringRef Name) { | 
 |     auto I = NamesToMemEvents.find(Name); | 
 |     if (I == NamesToMemEvents.end()) { | 
 |       bool Success; | 
 |       std::tie(I, Success) = NamesToMemEvents.insert( | 
 |           std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy()))); | 
 |       assert(Success && "unexpected result of insert"); | 
 |     } | 
 |     return I; | 
 |   }; | 
 |  | 
 |   while (hasBranchData()) { | 
 |     ErrorOr<SampleInfo> Res = parseSampleInfo(); | 
 |     if (std::error_code EC = Res.getError()) | 
 |       return EC; | 
 |  | 
 |     SampleInfo SI = Res.get(); | 
 |  | 
 |     // Ignore samples not involving known locations | 
 |     if (!SI.Loc.IsSymbol) | 
 |       continue; | 
 |  | 
 |     auto I = GetOrCreateFuncEntry(SI.Loc.Name); | 
 |     I->second.Data.emplace_back(std::move(SI)); | 
 |   } | 
 |  | 
 |   while (hasMemData()) { | 
 |     ErrorOr<MemInfo> Res = parseMemInfo(); | 
 |     if (std::error_code EC = Res.getError()) | 
 |       return EC; | 
 |  | 
 |     MemInfo MI = Res.get(); | 
 |  | 
 |     // Ignore memory events not involving known pc. | 
 |     if (!MI.Offset.IsSymbol) | 
 |       continue; | 
 |  | 
 |     auto I = GetOrCreateFuncMemEntry(MI.Offset.Name); | 
 |     I->second.Data.emplace_back(std::move(MI)); | 
 |   } | 
 |  | 
 |   for (auto &FuncSamples : NamesToSamples) | 
 |     llvm::stable_sort(FuncSamples.second.Data); | 
 |  | 
 |   for (auto &MemEvents : NamesToMemEvents) | 
 |     llvm::stable_sort(MemEvents.second.Data); | 
 |  | 
 |   return std::error_code(); | 
 | } | 
 |  | 
 | std::error_code DataReader::parse() { | 
 |   auto GetOrCreateFuncEntry = [&](StringRef Name) { | 
 |     auto I = NamesToBranches.find(Name); | 
 |     if (I == NamesToBranches.end()) { | 
 |       bool Success; | 
 |       std::tie(I, Success) = NamesToBranches.insert(std::make_pair( | 
 |           Name, FuncBranchData(Name, FuncBranchData::ContainerTy(), | 
 |                                FuncBranchData::ContainerTy()))); | 
 |       assert(Success && "unexpected result of insert"); | 
 |     } | 
 |     return I; | 
 |   }; | 
 |  | 
 |   auto GetOrCreateFuncMemEntry = [&](StringRef Name) { | 
 |     auto I = NamesToMemEvents.find(Name); | 
 |     if (I == NamesToMemEvents.end()) { | 
 |       bool Success; | 
 |       std::tie(I, Success) = NamesToMemEvents.insert( | 
 |           std::make_pair(Name, FuncMemData(Name, FuncMemData::ContainerTy()))); | 
 |       assert(Success && "unexpected result of insert"); | 
 |     } | 
 |     return I; | 
 |   }; | 
 |  | 
 |   Col = 0; | 
 |   Line = 1; | 
 |   ErrorOr<bool> FlagOrErr = maybeParseNoLBRFlag(); | 
 |   if (!FlagOrErr) | 
 |     return FlagOrErr.getError(); | 
 |   NoLBRMode = *FlagOrErr; | 
 |  | 
 |   ErrorOr<bool> BATFlagOrErr = maybeParseBATFlag(); | 
 |   if (!BATFlagOrErr) | 
 |     return BATFlagOrErr.getError(); | 
 |   BATMode = *BATFlagOrErr; | 
 |  | 
 |   if (!hasBranchData() && !hasMemData()) { | 
 |     Diag << "ERROR: no valid profile data found\n"; | 
 |     return make_error_code(llvm::errc::io_error); | 
 |   } | 
 |  | 
 |   if (NoLBRMode) | 
 |     return parseInNoLBRMode(); | 
 |  | 
 |   while (hasBranchData()) { | 
 |     ErrorOr<BranchInfo> Res = parseBranchInfo(); | 
 |     if (std::error_code EC = Res.getError()) | 
 |       return EC; | 
 |  | 
 |     BranchInfo BI = Res.get(); | 
 |  | 
 |     // Ignore branches not involving known location. | 
 |     if (!BI.From.IsSymbol && !BI.To.IsSymbol) | 
 |       continue; | 
 |  | 
 |     auto I = GetOrCreateFuncEntry(BI.From.Name); | 
 |     I->second.Data.emplace_back(std::move(BI)); | 
 |  | 
 |     // Add entry data for branches to another function or branches | 
 |     // to entry points (including recursive calls) | 
 |     if (BI.To.IsSymbol && | 
 |         (!BI.From.Name.equals(BI.To.Name) || BI.To.Offset == 0)) { | 
 |       I = GetOrCreateFuncEntry(BI.To.Name); | 
 |       I->second.EntryData.emplace_back(std::move(BI)); | 
 |     } | 
 |  | 
 |     // If destination is the function start - update execution count. | 
 |     // NB: the data is skewed since we cannot tell tail recursion from | 
 |     //     branches to the function start. | 
 |     if (BI.To.IsSymbol && BI.To.Offset == 0) { | 
 |       I = GetOrCreateFuncEntry(BI.To.Name); | 
 |       I->second.ExecutionCount += BI.Branches; | 
 |     } | 
 |   } | 
 |  | 
 |   while (hasMemData()) { | 
 |     ErrorOr<MemInfo> Res = parseMemInfo(); | 
 |     if (std::error_code EC = Res.getError()) | 
 |       return EC; | 
 |  | 
 |     MemInfo MI = Res.get(); | 
 |  | 
 |     // Ignore memory events not involving known pc. | 
 |     if (!MI.Offset.IsSymbol) | 
 |       continue; | 
 |  | 
 |     auto I = GetOrCreateFuncMemEntry(MI.Offset.Name); | 
 |     I->second.Data.emplace_back(std::move(MI)); | 
 |   } | 
 |  | 
 |   for (auto &FuncBranches : NamesToBranches) | 
 |     llvm::stable_sort(FuncBranches.second.Data); | 
 |  | 
 |   for (auto &MemEvents : NamesToMemEvents) | 
 |     llvm::stable_sort(MemEvents.second.Data); | 
 |  | 
 |   return std::error_code(); | 
 | } | 
 |  | 
 | void DataReader::buildLTONameMaps() { | 
 |   for (auto &FuncData : NamesToBranches) { | 
 |     const StringRef FuncName = FuncData.first; | 
 |     const std::optional<StringRef> CommonName = getLTOCommonName(FuncName); | 
 |     if (CommonName) | 
 |       LTOCommonNameMap[*CommonName].push_back(&FuncData.second); | 
 |   } | 
 |  | 
 |   for (auto &FuncData : NamesToMemEvents) { | 
 |     const StringRef FuncName = FuncData.first; | 
 |     const std::optional<StringRef> CommonName = getLTOCommonName(FuncName); | 
 |     if (CommonName) | 
 |       LTOCommonNameMemMap[*CommonName].push_back(&FuncData.second); | 
 |   } | 
 | } | 
 |  | 
 | template <typename MapTy> | 
 | static typename MapTy::mapped_type * | 
 | fetchMapEntry(MapTy &Map, const std::vector<MCSymbol *> &Symbols) { | 
 |   // Do a reverse order iteration since the name in profile has a higher chance | 
 |   // of matching a name at the end of the list. | 
 |   for (const MCSymbol *Symbol : llvm::reverse(Symbols)) { | 
 |     auto I = Map.find(normalizeName(Symbol->getName())); | 
 |     if (I != Map.end()) | 
 |       return &I->second; | 
 |   } | 
 |   return nullptr; | 
 | } | 
 |  | 
 | template <typename MapTy> | 
 | static typename MapTy::mapped_type * | 
 | fetchMapEntry(MapTy &Map, const std::vector<StringRef> &FuncNames) { | 
 |   // Do a reverse order iteration since the name in profile has a higher chance | 
 |   // of matching a name at the end of the list. | 
 |   for (StringRef Name : llvm::reverse(FuncNames)) { | 
 |     auto I = Map.find(normalizeName(Name)); | 
 |     if (I != Map.end()) | 
 |       return &I->second; | 
 |   } | 
 |   return nullptr; | 
 | } | 
 |  | 
 | template <typename MapTy> | 
 | static std::vector<typename MapTy::mapped_type *> | 
 | fetchMapEntriesRegex(MapTy &Map, | 
 |                      const StringMap<std::vector<typename MapTy::mapped_type *>> | 
 |                          <OCommonNameMap, | 
 |                      const std::vector<StringRef> &FuncNames) { | 
 |   std::vector<typename MapTy::mapped_type *> AllData; | 
 |   // Do a reverse order iteration since the name in profile has a higher chance | 
 |   // of matching a name at the end of the list. | 
 |   for (StringRef FuncName : llvm::reverse(FuncNames)) { | 
 |     std::string Name = normalizeName(FuncName); | 
 |     const std::optional<StringRef> LTOCommonName = getLTOCommonName(Name); | 
 |     if (LTOCommonName) { | 
 |       auto I = LTOCommonNameMap.find(*LTOCommonName); | 
 |       if (I != LTOCommonNameMap.end()) { | 
 |         const std::vector<typename MapTy::mapped_type *> &CommonData = | 
 |             I->second; | 
 |         AllData.insert(AllData.end(), CommonData.begin(), CommonData.end()); | 
 |       } | 
 |     } else { | 
 |       auto I = Map.find(Name); | 
 |       if (I != Map.end()) | 
 |         return {&I->second}; | 
 |     } | 
 |   } | 
 |   return AllData; | 
 | } | 
 |  | 
 | bool DataReader::mayHaveProfileData(const BinaryFunction &Function) { | 
 |   if (getBranchData(Function) || getMemData(Function)) | 
 |     return true; | 
 |  | 
 |   if (getFuncSampleData(Function.getNames()) || | 
 |       getBranchDataForNames(Function.getNames()) || | 
 |       getMemDataForNames(Function.getNames())) | 
 |     return true; | 
 |  | 
 |   if (!hasVolatileName(Function)) | 
 |     return false; | 
 |  | 
 |   const std::vector<FuncBranchData *> AllBranchData = | 
 |       getBranchDataForNamesRegex(Function.getNames()); | 
 |   if (!AllBranchData.empty()) | 
 |     return true; | 
 |  | 
 |   const std::vector<FuncMemData *> AllMemData = | 
 |       getMemDataForNamesRegex(Function.getNames()); | 
 |   if (!AllMemData.empty()) | 
 |     return true; | 
 |  | 
 |   return false; | 
 | } | 
 |  | 
 | FuncBranchData * | 
 | DataReader::getBranchDataForNames(const std::vector<StringRef> &FuncNames) { | 
 |   return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, FuncNames); | 
 | } | 
 |  | 
 | FuncBranchData * | 
 | DataReader::getBranchDataForSymbols(const std::vector<MCSymbol *> &Symbols) { | 
 |   return fetchMapEntry<NamesToBranchesMapTy>(NamesToBranches, Symbols); | 
 | } | 
 |  | 
 | FuncMemData * | 
 | DataReader::getMemDataForNames(const std::vector<StringRef> &FuncNames) { | 
 |   return fetchMapEntry<NamesToMemEventsMapTy>(NamesToMemEvents, FuncNames); | 
 | } | 
 |  | 
 | FuncSampleData * | 
 | DataReader::getFuncSampleData(const std::vector<StringRef> &FuncNames) { | 
 |   return fetchMapEntry<NamesToSamplesMapTy>(NamesToSamples, FuncNames); | 
 | } | 
 |  | 
 | std::vector<FuncBranchData *> DataReader::getBranchDataForNamesRegex( | 
 |     const std::vector<StringRef> &FuncNames) { | 
 |   return fetchMapEntriesRegex(NamesToBranches, LTOCommonNameMap, FuncNames); | 
 | } | 
 |  | 
 | std::vector<FuncMemData *> | 
 | DataReader::getMemDataForNamesRegex(const std::vector<StringRef> &FuncNames) { | 
 |   return fetchMapEntriesRegex(NamesToMemEvents, LTOCommonNameMemMap, FuncNames); | 
 | } | 
 |  | 
 | bool DataReader::hasLocalsWithFileName() const { | 
 |   for (const auto &Func : NamesToBranches) { | 
 |     const StringRef &FuncName = Func.first; | 
 |     if (FuncName.count('/') == 2 && FuncName[0] != '/') | 
 |       return true; | 
 |   } | 
 |   return false; | 
 | } | 
 |  | 
 | void DataReader::dump() const { | 
 |   for (const auto &KV : NamesToBranches) { | 
 |     const StringRef Name = KV.first; | 
 |     const FuncBranchData &FBD = KV.second; | 
 |     Diag << Name << " branches:\n"; | 
 |     for (const BranchInfo &BI : FBD.Data) | 
 |       Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " " | 
 |            << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n"; | 
 |     Diag << Name << " entry points:\n"; | 
 |     for (const BranchInfo &BI : FBD.EntryData) | 
 |       Diag << BI.From.Name << " " << BI.From.Offset << " " << BI.To.Name << " " | 
 |            << BI.To.Offset << " " << BI.Mispreds << " " << BI.Branches << "\n"; | 
 |   } | 
 |  | 
 |   for (auto I = EventNames.begin(), E = EventNames.end(); I != E; ++I) { | 
 |     StringRef Event = I->getKey(); | 
 |     Diag << "Data was collected with event: " << Event << "\n"; | 
 |   } | 
 |   for (const auto &KV : NamesToSamples) { | 
 |     const StringRef Name = KV.first; | 
 |     const FuncSampleData &FSD = KV.second; | 
 |     Diag << Name << " samples:\n"; | 
 |     for (const SampleInfo &SI : FSD.Data) | 
 |       Diag << SI.Loc.Name << " " << SI.Loc.Offset << " " << SI.Hits << "\n"; | 
 |   } | 
 |  | 
 |   for (const auto &KV : NamesToMemEvents) { | 
 |     const StringRef Name = KV.first; | 
 |     const FuncMemData &FMD = KV.second; | 
 |     Diag << "Memory events for " << Name; | 
 |     Location LastOffset(0); | 
 |     for (const MemInfo &MI : FMD.Data) { | 
 |       if (MI.Offset == LastOffset) | 
 |         Diag << ", " << MI.Addr << "/" << MI.Count; | 
 |       else | 
 |         Diag << "\n" << MI.Offset << ": " << MI.Addr << "/" << MI.Count; | 
 |       LastOffset = MI.Offset; | 
 |     } | 
 |     Diag << "\n"; | 
 |   } | 
 | } | 
 |  | 
 | } // namespace bolt | 
 | } // namespace llvm |