Skip to content

Commit 2c908bd

Browse files
committed
Adding Matching and Inference Functionality to Propeller-PR1: Emit basic block hash.
1 parent f6fc5e8 commit 2c908bd

22 files changed

+495
-23
lines changed
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
#ifndef LLVM_CODEGEN_MACHINEBLOCKHASHINFO_H
2+
#define LLVM_CODEGEN_MACHINEBLOCKHASHINFO_H
3+
4+
#include "llvm/CodeGen/MachineFunctionPass.h"
5+
6+
namespace llvm {
7+
8+
/// An object wrapping several components of a basic block hash. The combined
9+
/// (blended) hash is represented and stored as one uint64_t, while individual
10+
/// components are of smaller size (e.g., uint16_t or uint8_t).
11+
struct BlendedBlockHash {
12+
private:
13+
static uint64_t combineHashes(uint16_t Hash1, uint16_t Hash2, uint16_t Hash3,
14+
uint16_t Hash4) {
15+
uint64_t Hash = 0;
16+
17+
Hash |= uint64_t(Hash4);
18+
Hash <<= 16;
19+
20+
Hash |= uint64_t(Hash3);
21+
Hash <<= 16;
22+
23+
Hash |= uint64_t(Hash2);
24+
Hash <<= 16;
25+
26+
Hash |= uint64_t(Hash1);
27+
28+
return Hash;
29+
}
30+
31+
static void parseHashes(uint64_t Hash, uint16_t &Hash1, uint16_t &Hash2,
32+
uint16_t &Hash3, uint16_t &Hash4) {
33+
Hash1 = Hash & 0xffff;
34+
Hash >>= 16;
35+
36+
Hash2 = Hash & 0xffff;
37+
Hash >>= 16;
38+
39+
Hash3 = Hash & 0xffff;
40+
Hash >>= 16;
41+
42+
Hash4 = Hash & 0xffff;
43+
Hash >>= 16;
44+
}
45+
46+
public:
47+
explicit BlendedBlockHash() {}
48+
49+
explicit BlendedBlockHash(uint64_t CombinedHash) {
50+
parseHashes(CombinedHash, Offset, OpcodeHash, InstrHash, NeighborHash);
51+
}
52+
53+
/// Combine the blended hash into uint64_t.
54+
uint64_t combine() const {
55+
return combineHashes(Offset, OpcodeHash, InstrHash, NeighborHash);
56+
}
57+
58+
/// Compute a distance between two given blended hashes. The smaller the
59+
/// distance, the more similar two blocks are. For identical basic blocks,
60+
/// the distance is zero.
61+
uint64_t distance(const BlendedBlockHash &BBH) const {
62+
assert(OpcodeHash == BBH.OpcodeHash &&
63+
"incorrect blended hash distance computation");
64+
uint64_t Dist = 0;
65+
// Account for NeighborHash
66+
Dist += NeighborHash == BBH.NeighborHash ? 0 : 1;
67+
Dist <<= 16;
68+
// Account for InstrHash
69+
Dist += InstrHash == BBH.InstrHash ? 0 : 1;
70+
Dist <<= 16;
71+
// Account for Offset
72+
Dist += (Offset >= BBH.Offset ? Offset - BBH.Offset : BBH.Offset - Offset);
73+
return Dist;
74+
}
75+
76+
/// The offset of the basic block from the function start.
77+
uint16_t Offset{0};
78+
/// (Loose) Hash of the basic block instructions, excluding operands.
79+
uint16_t OpcodeHash{0};
80+
/// (Strong) Hash of the basic block instructions, including opcodes and
81+
/// operands.
82+
uint16_t InstrHash{0};
83+
/// Hash of the (loose) basic block together with (loose) hashes of its
84+
/// successors and predecessors.
85+
uint16_t NeighborHash{0};
86+
};
87+
88+
class MachineBlockHashInfo : public MachineFunctionPass {
89+
DenseMap<unsigned, uint64_t> MBBHashInfo;
90+
91+
public:
92+
static char ID;
93+
MachineBlockHashInfo();
94+
95+
StringRef getPassName() const override { return "Basic Block Hash Compute"; }
96+
97+
void getAnalysisUsage(AnalysisUsage &AU) const override;
98+
99+
bool runOnMachineFunction(MachineFunction &F) override;
100+
101+
uint64_t getMBBHash(const MachineBasicBlock &MBB);
102+
};
103+
104+
} // end namespace llvm
105+
106+
#endif // LLVM_CODEGEN_MACHINEBLOCKHASHINFO_H

llvm/include/llvm/CodeGen/Passes.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ LLVM_ABI MachineFunctionPass *createBasicBlockSectionsPass();
6969

7070
LLVM_ABI MachineFunctionPass *createBasicBlockPathCloningPass();
7171

72+
/// createMachineBlockHashInfoPass - This pass computes basic block hashes.
73+
LLVM_ABI MachineFunctionPass *createMachineBlockHashInfoPass();
74+
7275
/// createMachineFunctionSplitterPass - This pass splits machine functions
7376
/// using profile information.
7477
LLVM_ABI MachineFunctionPass *createMachineFunctionSplitterPass();

llvm/include/llvm/InitializePasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ LLVM_ABI void initializeBasicBlockSectionsPass(PassRegistry &);
6262
LLVM_ABI void initializeBarrierNoopPass(PassRegistry &);
6363
LLVM_ABI void initializeBasicAAWrapperPassPass(PassRegistry &);
6464
LLVM_ABI void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry &);
65+
LLVM_ABI void initializeMachineBlockHashInfoPass(PassRegistry&);
6566
LLVM_ABI void initializeBranchFolderLegacyPass(PassRegistry &);
6667
LLVM_ABI void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry &);
6768
LLVM_ABI void initializeBranchRelaxationLegacyPass(PassRegistry &);

llvm/include/llvm/MC/MCContext.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ class MCContext {
175175
unsigned GetInstance(unsigned LocalLabelVal);
176176

177177
/// SHT_LLVM_BB_ADDR_MAP version to emit.
178-
uint8_t BBAddrMapVersion = 3;
178+
uint8_t BBAddrMapVersion = 4;
179179

180180
/// The file name of the log file from the environment variable
181181
/// AS_SECURE_LOG_FILE. Which must be set before the .secure_log_unique

llvm/include/llvm/Object/ELFTypes.h

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -833,6 +833,7 @@ struct BBAddrMap {
833833
bool MultiBBRange : 1;
834834
bool OmitBBEntries : 1;
835835
bool CallsiteEndOffsets : 1;
836+
bool BBHash : 1;
836837

837838
bool hasPGOAnalysis() const { return FuncEntryCount || BBFreq || BrProb; }
838839

@@ -845,7 +846,8 @@ struct BBAddrMap {
845846
(static_cast<uint8_t>(BrProb) << 2) |
846847
(static_cast<uint8_t>(MultiBBRange) << 3) |
847848
(static_cast<uint8_t>(OmitBBEntries) << 4) |
848-
(static_cast<uint8_t>(CallsiteEndOffsets) << 5);
849+
(static_cast<uint8_t>(CallsiteEndOffsets) << 5) |
850+
(static_cast<uint8_t>(BBHash) << 6);
849851
}
850852

851853
// Decodes from minimum bit width representation and validates no
@@ -854,7 +856,8 @@ struct BBAddrMap {
854856
Features Feat{
855857
static_cast<bool>(Val & (1 << 0)), static_cast<bool>(Val & (1 << 1)),
856858
static_cast<bool>(Val & (1 << 2)), static_cast<bool>(Val & (1 << 3)),
857-
static_cast<bool>(Val & (1 << 4)), static_cast<bool>(Val & (1 << 5))};
859+
static_cast<bool>(Val & (1 << 4)), static_cast<bool>(Val & (1 << 5)),
860+
static_cast<bool>(Val & (1 << 6))};
858861
if (Feat.encode() != Val)
859862
return createStringError(
860863
std::error_code(), "invalid encoding for BBAddrMap::Features: 0x%x",
@@ -864,10 +867,10 @@ struct BBAddrMap {
864867

865868
bool operator==(const Features &Other) const {
866869
return std::tie(FuncEntryCount, BBFreq, BrProb, MultiBBRange,
867-
OmitBBEntries, CallsiteEndOffsets) ==
870+
OmitBBEntries, CallsiteEndOffsets, BBHash) ==
868871
std::tie(Other.FuncEntryCount, Other.BBFreq, Other.BrProb,
869872
Other.MultiBBRange, Other.OmitBBEntries,
870-
Other.CallsiteEndOffsets);
873+
Other.CallsiteEndOffsets, Other.BBHash);
871874
}
872875
};
873876

@@ -920,17 +923,19 @@ struct BBAddrMap {
920923
false}; // Metdata for this basic block.
921924
// Offsets of end of call instructions, relative to the basic block start.
922925
SmallVector<uint32_t, 1> CallsiteEndOffsets;
926+
uint64_t Hash = 0; // Hash for this basic block.
923927

924928
BBEntry(uint32_t ID, uint32_t Offset, uint32_t Size, Metadata MD,
925-
SmallVector<uint32_t, 1> CallsiteEndOffsets)
929+
SmallVector<uint32_t, 1> CallsiteEndOffsets, uint64_t Hash = 0)
926930
: ID(ID), Offset(Offset), Size(Size), MD(MD),
927-
CallsiteEndOffsets(std::move(CallsiteEndOffsets)) {}
931+
CallsiteEndOffsets(std::move(CallsiteEndOffsets)), Hash(Hash) {}
928932

929933
UniqueBBID getID() const { return {ID, 0}; }
930934

931935
bool operator==(const BBEntry &Other) const {
932936
return ID == Other.ID && Offset == Other.Offset && Size == Other.Size &&
933-
MD == Other.MD && CallsiteEndOffsets == Other.CallsiteEndOffsets;
937+
MD == Other.MD && CallsiteEndOffsets == Other.CallsiteEndOffsets &&
938+
Hash == Other.Hash;
934939
}
935940

936941
bool hasReturn() const { return MD.HasReturn; }

llvm/include/llvm/ObjectYAML/ELFYAML.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ struct BBAddrMapEntry {
163163
llvm::yaml::Hex64 Size;
164164
llvm::yaml::Hex64 Metadata;
165165
std::optional<std::vector<llvm::yaml::Hex64>> CallsiteEndOffsets;
166+
std::optional<llvm::yaml::Hex64> Hash;
166167
};
167168
uint8_t Version;
168169
llvm::yaml::Hex8 Feature;

llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include "llvm/CodeGen/GCMetadataPrinter.h"
4141
#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
4242
#include "llvm/CodeGen/MachineBasicBlock.h"
43+
#include "llvm/CodeGen/MachineBlockHashInfo.h"
4344
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
4445
#include "llvm/CodeGen/MachineConstantPool.h"
4546
#include "llvm/CodeGen/MachineDominators.h"
@@ -182,6 +183,8 @@ static cl::opt<bool> PrintLatency(
182183
cl::desc("Print instruction latencies as verbose asm comments"), cl::Hidden,
183184
cl::init(false));
184185

186+
extern cl::opt<bool> EmitBBHash;
187+
185188
STATISTIC(EmittedInsts, "Number of machine instrs printed");
186189

187190
char AsmPrinter::ID = 0;
@@ -461,6 +464,8 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
461464
AU.addRequired<GCModuleInfo>();
462465
AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
463466
AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
467+
if (EmitBBHash)
468+
AU.addRequired<MachineBlockHashInfo>();
464469
}
465470

466471
bool AsmPrinter::doInitialization(Module &M) {
@@ -1427,7 +1432,8 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges,
14271432
BrProbEnabled,
14281433
MF.hasBBSections() && NumMBBSectionRanges > 1,
14291434
static_cast<bool>(BBAddrMapSkipEmitBBEntries),
1430-
HasCalls};
1435+
HasCalls,
1436+
static_cast<bool>(EmitBBHash)};
14311437
}
14321438

14331439
void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
@@ -1486,6 +1492,8 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
14861492
PrevMBBEndSymbol = MBBSymbol;
14871493
}
14881494

1495+
auto MBHI = Features.BBHash ? &getAnalysis<MachineBlockHashInfo>() : nullptr;
1496+
14891497
if (!Features.OmitBBEntries) {
14901498
OutStreamer->AddComment("BB id");
14911499
// Emit the BB ID for this basic block.
@@ -1513,6 +1521,10 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
15131521
emitLabelDifferenceAsULEB128(MBB.getEndSymbol(), CurrentLabel);
15141522
// Emit the Metadata.
15151523
OutStreamer->emitULEB128IntValue(getBBAddrMapMetadata(MBB));
1524+
// Emit the Hash.
1525+
if (MBHI) {
1526+
OutStreamer->emitULEB128IntValue(MBHI->getMBBHash(MBB));
1527+
}
15161528
}
15171529
PrevMBBEndSymbol = MBB.getEndSymbol();
15181530
}

llvm/lib/CodeGen/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ add_llvm_component_library(LLVMCodeGen
108108
LowerEmuTLS.cpp
109109
MachineBasicBlock.cpp
110110
MachineBlockFrequencyInfo.cpp
111+
MachineBlockHashInfo.cpp
111112
MachineBlockPlacement.cpp
112113
MachineBranchProbabilityInfo.cpp
113114
MachineCFGPrinter.cpp
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#include "llvm/CodeGen/MachineBlockHashInfo.h"
2+
#include "llvm/CodeGen/Passes.h"
3+
#include "llvm/InitializePasses.h"
4+
#include "llvm/Target/TargetMachine.h"
5+
6+
using namespace llvm;
7+
8+
using OperandHashFuncTy =
9+
function_ref<uint64_t(uint64_t &, const MachineOperand &)>;
10+
11+
uint64_t hashBlock(const MachineBasicBlock &MBB,
12+
OperandHashFuncTy OperandHashFunc) {
13+
uint64_t Hash = 0;
14+
for (const MachineInstr &MI : MBB) {
15+
if (MI.isMetaInstruction())
16+
continue;
17+
// Ignore terminator instruction
18+
if (MI.isTerminator())
19+
continue;
20+
Hash = hashing::detail::hash_16_bytes(Hash, MI.getOpcode());
21+
for (unsigned i = 0; i < MI.getNumOperands(); i++) {
22+
Hash = OperandHashFunc(Hash, MI.getOperand(i));
23+
}
24+
}
25+
return Hash;
26+
}
27+
28+
/// Hashing a 64-bit integer to a 16-bit one.
29+
uint16_t hash_64_to_16(const uint64_t Hash) {
30+
uint16_t Res = (uint16_t)(Hash & 0xFFFF);
31+
Res ^= (uint16_t)((Hash >> 16) & 0xFFFF);
32+
Res ^= (uint16_t)((Hash >> 32) & 0xFFFF);
33+
Res ^= (uint16_t)((Hash >> 48) & 0xFFFF);
34+
return Res;
35+
}
36+
37+
uint64_t hashInstOperand(uint64_t &Hash, const MachineOperand &Operand) {
38+
return hashing::detail::hash_16_bytes(Hash, hash_value(Operand));
39+
}
40+
41+
INITIALIZE_PASS(MachineBlockHashInfo, "machine-block-hash",
42+
"Machine Block Hash Analysis", true, true)
43+
44+
char MachineBlockHashInfo::ID = 0;
45+
46+
MachineBlockHashInfo::MachineBlockHashInfo() : MachineFunctionPass(ID) {
47+
initializeMachineBlockHashInfoPass(*PassRegistry::getPassRegistry());
48+
}
49+
50+
void MachineBlockHashInfo::getAnalysisUsage(AnalysisUsage &AU) const {
51+
AU.setPreservesAll();
52+
MachineFunctionPass::getAnalysisUsage(AU);
53+
}
54+
55+
bool MachineBlockHashInfo::runOnMachineFunction(MachineFunction &F) {
56+
DenseMap<MachineBasicBlock *, BlendedBlockHash> BlendedHashes;
57+
DenseMap<MachineBasicBlock *, uint64_t> OpcodeHashes;
58+
uint16_t Offset = 0;
59+
// Initialize hash components
60+
for (MachineBasicBlock &MBB : F) {
61+
BlendedBlockHash BlendedHash;
62+
// offset of the machine basic block
63+
BlendedHash.Offset = Offset;
64+
Offset += MBB.size();
65+
// Hashing opcodes
66+
uint64_t OpcodeHash = hashBlock(
67+
MBB, [](uint64_t &Hash, const MachineOperand &Op) { return Hash; });
68+
OpcodeHashes[&MBB] = OpcodeHash;
69+
BlendedHash.OpcodeHash = hash_64_to_16(OpcodeHash);
70+
// Hash complete instructions
71+
uint64_t InstrHash = hashBlock(MBB, hashInstOperand);
72+
BlendedHash.InstrHash = hash_64_to_16(InstrHash);
73+
BlendedHashes[&MBB] = BlendedHash;
74+
}
75+
76+
// Initialize neighbor hash
77+
for (MachineBasicBlock &MBB : F) {
78+
uint64_t Hash = OpcodeHashes[&MBB];
79+
// Append hashes of successors
80+
for (MachineBasicBlock *SuccMBB : MBB.successors()) {
81+
uint64_t SuccHash = OpcodeHashes[SuccMBB];
82+
Hash = hashing::detail::hash_16_bytes(Hash, SuccHash);
83+
}
84+
// Append hashes of predecessors
85+
for (MachineBasicBlock *PredMBB : MBB.predecessors()) {
86+
uint64_t PredHash = OpcodeHashes[PredMBB];
87+
Hash = hashing::detail::hash_16_bytes(Hash, PredHash);
88+
}
89+
BlendedHashes[&MBB].NeighborHash = hash_64_to_16(Hash);
90+
}
91+
92+
// Assign hashes
93+
for (MachineBasicBlock &MBB : F) {
94+
if (MBB.getBBID()) {
95+
MBBHashInfo[MBB.getBBID()->BaseID] = BlendedHashes[&MBB].combine();
96+
}
97+
}
98+
99+
return false;
100+
}
101+
102+
uint64_t MachineBlockHashInfo::getMBBHash(const MachineBasicBlock &MBB) {
103+
if (MBB.getBBID()) {
104+
return MBBHashInfo[MBB.getBBID()->BaseID];
105+
}
106+
return 0;
107+
}
108+
109+
MachineFunctionPass *llvm::createMachineBlockHashInfoPass() {
110+
return new MachineBlockHashInfo();
111+
}

0 commit comments

Comments
 (0)