Skip to content

Commit 2411ba2

Browse files
wdx727lifengxiang1025zcfh
committed
Adding Matching and Inference Functionality to Propeller-PR2: Compute basic block hash and emit to ELF.
Co-authored-by: lifengxiang1025 <[email protected]> Co-authored-by: zcfh <[email protected]>
1 parent 9a46060 commit 2411ba2

File tree

8 files changed

+372
-1
lines changed

8 files changed

+372
-1
lines changed
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
//===- llvm/CodeGen/MachineBlockHashInfo.h ----------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Compute the hashes of basic blocks.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#ifndef LLVM_CODEGEN_MACHINEBLOCKHASHINFO_H
14+
#define LLVM_CODEGEN_MACHINEBLOCKHASHINFO_H
15+
16+
#include "llvm/CodeGen/MachineFunctionPass.h"
17+
18+
namespace llvm {
19+
20+
/// An object wrapping several components of a basic block hash. The combined
21+
/// (blended) hash is represented and stored as one uint64_t, while individual
22+
/// components are of smaller size (e.g., uint16_t or uint8_t).
23+
struct BlendedBlockHash {
24+
private:
25+
static uint64_t combineHashes(uint16_t Hash1, uint16_t Hash2, uint16_t Hash3,
26+
uint16_t Hash4) {
27+
uint64_t Hash = 0;
28+
29+
Hash |= uint64_t(Hash4);
30+
Hash <<= 16;
31+
32+
Hash |= uint64_t(Hash3);
33+
Hash <<= 16;
34+
35+
Hash |= uint64_t(Hash2);
36+
Hash <<= 16;
37+
38+
Hash |= uint64_t(Hash1);
39+
40+
return Hash;
41+
}
42+
43+
static void parseHashes(uint64_t Hash, uint16_t &Hash1, uint16_t &Hash2,
44+
uint16_t &Hash3, uint16_t &Hash4) {
45+
Hash1 = Hash & 0xffff;
46+
Hash >>= 16;
47+
48+
Hash2 = Hash & 0xffff;
49+
Hash >>= 16;
50+
51+
Hash3 = Hash & 0xffff;
52+
Hash >>= 16;
53+
54+
Hash4 = Hash & 0xffff;
55+
Hash >>= 16;
56+
}
57+
58+
public:
59+
explicit BlendedBlockHash() {}
60+
61+
explicit BlendedBlockHash(uint64_t CombinedHash) {
62+
parseHashes(CombinedHash, Offset, OpcodeHash, InstrHash, NeighborHash);
63+
}
64+
65+
/// Combine the blended hash into uint64_t.
66+
uint64_t combine() const {
67+
return combineHashes(Offset, OpcodeHash, InstrHash, NeighborHash);
68+
}
69+
70+
/// Compute a distance between two given blended hashes. The smaller the
71+
/// distance, the more similar two blocks are. For identical basic blocks,
72+
/// the distance is zero.
73+
uint64_t distance(const BlendedBlockHash &BBH) const {
74+
assert(OpcodeHash == BBH.OpcodeHash &&
75+
"incorrect blended hash distance computation");
76+
uint64_t Dist = 0;
77+
// Account for NeighborHash
78+
Dist += NeighborHash == BBH.NeighborHash ? 0 : 1;
79+
Dist <<= 16;
80+
// Account for InstrHash
81+
Dist += InstrHash == BBH.InstrHash ? 0 : 1;
82+
Dist <<= 16;
83+
// Account for Offset
84+
Dist += (Offset >= BBH.Offset ? Offset - BBH.Offset : BBH.Offset - Offset);
85+
return Dist;
86+
}
87+
88+
/// The offset of the basic block from the function start.
89+
uint16_t Offset{0};
90+
/// (Loose) Hash of the basic block instructions, excluding operands.
91+
uint16_t OpcodeHash{0};
92+
/// (Strong) Hash of the basic block instructions, including opcodes and
93+
/// operands.
94+
uint16_t InstrHash{0};
95+
/// Hash of the (loose) basic block together with (loose) hashes of its
96+
/// successors and predecessors.
97+
uint16_t NeighborHash{0};
98+
};
99+
100+
class MachineBlockHashInfo : public MachineFunctionPass {
101+
DenseMap<unsigned, uint64_t> MBBHashInfo;
102+
103+
public:
104+
static char ID;
105+
MachineBlockHashInfo();
106+
107+
StringRef getPassName() const override { return "Basic Block Hash Compute"; }
108+
109+
void getAnalysisUsage(AnalysisUsage &AU) const override;
110+
111+
bool runOnMachineFunction(MachineFunction &F) override;
112+
113+
uint64_t getMBBHash(const MachineBasicBlock &MBB);
114+
};
115+
116+
} // end namespace llvm
117+
118+
#endif // LLVM_CODEGEN_MACHINEBLOCKHASHINFO_H

llvm/include/llvm/CodeGen/Passes.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ LLVM_ABI MachineFunctionPass *createBasicBlockSectionsPass();
6969

7070
LLVM_ABI MachineFunctionPass *createBasicBlockPathCloningPass();
7171

72+
/// createMachineBlockHashInfoPass - This pass computes basic block hashes.
73+
LLVM_ABI MachineFunctionPass *createMachineBlockHashInfoPass();
74+
7275
/// createMachineFunctionSplitterPass - This pass splits machine functions
7376
/// using profile information.
7477
LLVM_ABI MachineFunctionPass *createMachineFunctionSplitterPass();

llvm/include/llvm/InitializePasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ LLVM_ABI void initializeBasicBlockSectionsPass(PassRegistry &);
6262
LLVM_ABI void initializeBarrierNoopPass(PassRegistry &);
6363
LLVM_ABI void initializeBasicAAWrapperPassPass(PassRegistry &);
6464
LLVM_ABI void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry &);
65+
LLVM_ABI void initializeMachineBlockHashInfoPass(PassRegistry &);
6566
LLVM_ABI void initializeBranchFolderLegacyPass(PassRegistry &);
6667
LLVM_ABI void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry &);
6768
LLVM_ABI void initializeBranchRelaxationLegacyPass(PassRegistry &);

llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include "llvm/CodeGen/GCMetadataPrinter.h"
4242
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
4343
#include "llvm/CodeGen/MachineBasicBlock.h"
44+
#include "llvm/CodeGen/MachineBlockHashInfo.h"
4445
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
4546
#include "llvm/CodeGen/MachineConstantPool.h"
4647
#include "llvm/CodeGen/MachineDominators.h"
@@ -183,6 +184,8 @@ static cl::opt<bool> PrintLatency(
183184
cl::desc("Print instruction latencies as verbose asm comments"), cl::Hidden,
184185
cl::init(false));
185186

187+
extern cl::opt<bool> EmitBBHash;
188+
186189
STATISTIC(EmittedInsts, "Number of machine instrs printed");
187190

188191
char AsmPrinter::ID = 0;
@@ -473,6 +476,8 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
473476
AU.addRequired<GCModuleInfo>();
474477
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
475478
AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
479+
if (EmitBBHash)
480+
AU.addRequired<MachineBlockHashInfo>();
476481
}
477482

478483
bool AsmPrinter::doInitialization(Module &M) {
@@ -1438,7 +1443,7 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges,
14381443
MF.hasBBSections() && NumMBBSectionRanges > 1,
14391444
static_cast<bool>(BBAddrMapSkipEmitBBEntries),
14401445
HasCalls,
1441-
false};
1446+
static_cast<bool>(EmitBBHash)};
14421447
}
14431448

14441449
void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
@@ -1497,6 +1502,9 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
14971502
PrevMBBEndSymbol = MBBSymbol;
14981503
}
14991504

1505+
auto MBHI =
1506+
Features.BBHash ? &getAnalysis<MachineBlockHashInfo>() : nullptr;
1507+
15001508
if (!Features.OmitBBEntries) {
15011509
OutStreamer->AddComment("BB id");
15021510
// Emit the BB ID for this basic block.
@@ -1524,6 +1532,10 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
15241532
emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), CurrentLabel);
15251533
// Emit the Metadata.
15261534
OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB));
1535+
// Emit the Hash.
1536+
if (MBHI) {
1537+
OutStreamer->emitInt64(MBHI->getMBBHash(MBB));
1538+
}
15271539
}
15281540
PrevMBBEndSymbol = MBB.getEndSymbol();
15291541
}

llvm/lib/CodeGen/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ add_llvm_component_library(LLVMCodeGen
108108
LowerEmuTLS.cpp
109109
MachineBasicBlock.cpp
110110
MachineBlockFrequencyInfo.cpp
111+
MachineBlockHashInfo.cpp
111112
MachineBlockPlacement.cpp
112113
MachineBranchProbabilityInfo.cpp
113114
MachineCFGPrinter.cpp
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
//===- llvm/CodeGen/MachineBlockHashInfo.cpp---------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Compute the hashes of basic blocks.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "llvm/CodeGen/MachineBlockHashInfo.h"
14+
#include "llvm/CodeGen/Passes.h"
15+
#include "llvm/InitializePasses.h"
16+
#include "llvm/Target/TargetMachine.h"
17+
#include "llvm/CodeGen/TargetInstrInfo.h"
18+
#include "llvm/CodeGen/TargetSubtargetInfo.h"
19+
20+
using namespace llvm;
21+
22+
using InstToStringFuncTy =
23+
function_ref<std::string(const MachineInstr &)>;
24+
25+
uint64_t hashBlock(const MachineBasicBlock &MBB,
26+
InstToStringFuncTy InstToStringFunc) {
27+
std::string HashStr;
28+
for (const MachineInstr &MI : MBB) {
29+
if (MI.isMetaInstruction())
30+
continue;
31+
// Ignore terminator instruction
32+
if (MI.isTerminator())
33+
continue;
34+
HashStr.append(InstToStringFunc(MI));
35+
}
36+
return static_cast<uint64_t>(hash_value(HashStr));
37+
}
38+
39+
/// Hashing a 64-bit integer to a 16-bit one.
40+
uint16_t hash_64_to_16(const uint64_t Hash) {
41+
uint16_t Res = (uint16_t)(Hash & 0xFFFF);
42+
Res ^= (uint16_t)((Hash >> 16) & 0xFFFF);
43+
Res ^= (uint16_t)((Hash >> 32) & 0xFFFF);
44+
Res ^= (uint16_t)((Hash >> 48) & 0xFFFF);
45+
return Res;
46+
}
47+
48+
std::string InstOpcodeToString(const MachineInstr &MI) {
49+
const TargetInstrInfo *TII = nullptr;
50+
if (const MachineBasicBlock *MBB = MI.getParent())
51+
if (const MachineFunction *MF = MBB->getParent())
52+
TII = MF->getSubtarget().getInstrInfo();
53+
if (!TII)
54+
return "UNKNOWN";
55+
return TII->getName(MI.getOpcode()).str();
56+
}
57+
58+
std::string InstToString(const MachineInstr &MI) {
59+
std::string str;
60+
raw_string_ostream OS(str);
61+
MI.print(OS);
62+
return OS.str();
63+
}
64+
65+
INITIALIZE_PASS(MachineBlockHashInfo, "machine-block-hash",
66+
"Machine Block Hash Analysis", true, true)
67+
68+
char MachineBlockHashInfo::ID = 0;
69+
70+
MachineBlockHashInfo::MachineBlockHashInfo() : MachineFunctionPass(ID) {
71+
initializeMachineBlockHashInfoPass(*PassRegistry::getPassRegistry());
72+
}
73+
74+
void MachineBlockHashInfo::getAnalysisUsage(AnalysisUsage &AU) const {
75+
AU.setPreservesAll();
76+
MachineFunctionPass::getAnalysisUsage(AU);
77+
}
78+
79+
bool MachineBlockHashInfo::runOnMachineFunction(MachineFunction &F) {
80+
DenseMap<MachineBasicBlock *, BlendedBlockHash> BlendedHashes;
81+
DenseMap<MachineBasicBlock *, uint64_t> OpcodeHashes;
82+
uint16_t Offset = 0;
83+
// Initialize hash components
84+
for (MachineBasicBlock &MBB : F) {
85+
BlendedBlockHash BlendedHash;
86+
// offset of the machine basic block
87+
BlendedHash.Offset = Offset;
88+
Offset += MBB.size();
89+
// Hashing opcodes
90+
uint64_t OpcodeHash = hashBlock(MBB, InstOpcodeToString);
91+
OpcodeHashes[&MBB] = OpcodeHash;
92+
BlendedHash.OpcodeHash = hash_64_to_16(OpcodeHash);
93+
// Hash complete instructions
94+
uint64_t InstrHash = hashBlock(MBB, InstToString);
95+
BlendedHash.InstrHash = hash_64_to_16(InstrHash);
96+
BlendedHashes[&MBB] = BlendedHash;
97+
}
98+
99+
// Initialize neighbor hash
100+
for (MachineBasicBlock &MBB : F) {
101+
uint64_t Hash = OpcodeHashes[&MBB];
102+
// Append hashes of successors
103+
for (MachineBasicBlock *SuccMBB : MBB.successors()) {
104+
uint64_t SuccHash = OpcodeHashes[SuccMBB];
105+
Hash = hashing::detail::hash_16_bytes(Hash, SuccHash);
106+
}
107+
// Append hashes of predecessors
108+
for (MachineBasicBlock *PredMBB : MBB.predecessors()) {
109+
uint64_t PredHash = OpcodeHashes[PredMBB];
110+
Hash = hashing::detail::hash_16_bytes(Hash, PredHash);
111+
}
112+
BlendedHashes[&MBB].NeighborHash = hash_64_to_16(Hash);
113+
}
114+
115+
// Assign hashes
116+
for (MachineBasicBlock &MBB : F) {
117+
if (MBB.getBBID()) {
118+
MBBHashInfo[MBB.getBBID()->BaseID] = BlendedHashes[&MBB].combine();
119+
}
120+
}
121+
122+
return false;
123+
}
124+
125+
uint64_t MachineBlockHashInfo::getMBBHash(const MachineBasicBlock &MBB) {
126+
if (MBB.getBBID()) {
127+
return MBBHashInfo[MBB.getBBID()->BaseID];
128+
}
129+
return 0;
130+
}
131+
132+
MachineFunctionPass *llvm::createMachineBlockHashInfoPass() {
133+
return new MachineBlockHashInfo();
134+
}

llvm/lib/CodeGen/TargetPassConfig.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,12 @@ static cl::opt<bool>
272272
cl::desc("Split static data sections into hot and cold "
273273
"sections using profile information"));
274274

275+
cl::opt<bool> EmitBBHash(
276+
"emit-bb-hash",
277+
cl::desc(
278+
"Emit the hash of basic block in the SHT_LLVM_BB_ADDR_MAP section."),
279+
cl::init(false), cl::Optional);
280+
275281
/// Allow standard passes to be disabled by command line options. This supports
276282
/// simple binary flags that either suppress the pass or do nothing.
277283
/// i.e. -disable-mypass=false has no effect.
@@ -1281,6 +1287,8 @@ void TargetPassConfig::addMachinePasses() {
12811287
// address map (or both).
12821288
if (TM->getBBSectionsType() != llvm::BasicBlockSection::None ||
12831289
TM->Options.BBAddrMap) {
1290+
if (EmitBBHash)
1291+
addPass(llvm::createMachineBlockHashInfoPass());
12841292
if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) {
12851293
addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass(
12861294
TM->getBBSectionsFuncListBuf()));

0 commit comments

Comments
 (0)