-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[NFC][IR2Vec] Minor refactoring of opcode access in vocabulary #147585
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
svkeerthy
merged 1 commit into
main
from
users/svkeerthy/07-07-_nfc_ir2vec_minor_refactoring_of_opcode_access_in_vocabulary
Jul 14, 2025
Merged
[NFC][IR2Vec] Minor refactoring of opcode access in vocabulary #147585
svkeerthy
merged 1 commit into
main
from
users/svkeerthy/07-07-_nfc_ir2vec_minor_refactoring_of_opcode_access_in_vocabulary
Jul 14, 2025
+28
−22
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This was referenced Jul 8, 2025
5408737
to
80c29d9
Compare
31590b9
to
70dcd29
Compare
This was referenced Jul 9, 2025
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-mlgo Author: S. VenkataKeerthy (svkeerthy) ChangesRefactored IR2Vec vocabulary handling to improve code organization and error handling. This would help in upcoming PRs related to the IR2Vec tool. (Tracking issue - #141817) Full diff: https://github.com/llvm/llvm-project/pull/147585.diff 2 Files Affected:
diff --git a/llvm/include/llvm/Analysis/IR2Vec.h b/llvm/include/llvm/Analysis/IR2Vec.h
index f5a4e450cf160..176cdaf7b5378 100644
--- a/llvm/include/llvm/Analysis/IR2Vec.h
+++ b/llvm/include/llvm/Analysis/IR2Vec.h
@@ -163,15 +163,18 @@ class Vocabulary {
static constexpr unsigned MaxOperandKinds =
static_cast<unsigned>(OperandKind::MaxOperandKind);
+ /// Helper function to get vocabulary key for a given Opcode
+ static StringRef getVocabKeyForOpcode(unsigned Opcode);
+
+ /// Helper function to get vocabulary key for a given TypeID
+ static StringRef getVocabKeyForTypeID(Type::TypeID TypeID);
+
/// Helper function to get vocabulary key for a given OperandKind
static StringRef getVocabKeyForOperandKind(OperandKind Kind);
/// Helper function to classify an operand into OperandKind
static OperandKind getOperandKind(const Value *Op);
- /// Helper function to get vocabulary key for a given TypeID
- static StringRef getVocabKeyForTypeID(Type::TypeID TypeID);
-
public:
Vocabulary() = default;
Vocabulary(VocabVector &&Vocab);
diff --git a/llvm/lib/Analysis/IR2Vec.cpp b/llvm/lib/Analysis/IR2Vec.cpp
index d3dc2e36fd23e..f97644b93a3d4 100644
--- a/llvm/lib/Analysis/IR2Vec.cpp
+++ b/llvm/lib/Analysis/IR2Vec.cpp
@@ -243,6 +243,17 @@ const ir2vec::Embedding &Vocabulary::operator[](const Value *Arg) const {
return Vocab[MaxOpcodes + MaxTypeIDs + static_cast<unsigned>(ArgKind)];
}
+StringRef Vocabulary::getVocabKeyForOpcode(unsigned Opcode) {
+ assert(Opcode >= 1 && Opcode <= MaxOpcodes && "Invalid opcode");
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+ if (Opcode == NUM) { \
+ return #OPCODE; \
+ }
+#include "llvm/IR/Instruction.def"
+#undef HANDLE_INST
+ return "UnknownOpcode";
+}
+
StringRef Vocabulary::getVocabKeyForTypeID(Type::TypeID TypeID) {
switch (TypeID) {
case Type::VoidTyID:
@@ -280,6 +291,7 @@ StringRef Vocabulary::getVocabKeyForTypeID(Type::TypeID TypeID) {
default:
return "UnknownTy";
}
+ return "UnknownTy";
}
// Operand kinds supported by IR2Vec - string mappings
@@ -297,9 +309,9 @@ StringRef Vocabulary::getVocabKeyForOperandKind(Vocabulary::OperandKind Kind) {
OPERAND_KINDS
#undef OPERAND_KIND
case Vocabulary::OperandKind::MaxOperandKind:
- llvm_unreachable("Invalid OperandKind");
+ return "UnknownOperand";
}
- llvm_unreachable("Unknown OperandKind");
+ return "UnknownOperand";
}
#undef OPERAND_KINDS
@@ -332,14 +344,8 @@ StringRef Vocabulary::getStringKey(unsigned Pos) {
assert(Pos < MaxOpcodes + MaxTypeIDs + MaxOperandKinds &&
"Position out of bounds in vocabulary");
// Opcode
- if (Pos < MaxOpcodes) {
-#define HANDLE_INST(NUM, OPCODE, CLASS) \
- if (Pos == NUM - 1) { \
- return #OPCODE; \
- }
-#include "llvm/IR/Instruction.def"
-#undef HANDLE_INST
- }
+ if (Pos < MaxOpcodes)
+ return getVocabKeyForOpcode(Pos + 1);
// Type
if (Pos < MaxOpcodes + MaxTypeIDs)
return getVocabKeyForTypeID(static_cast<Type::TypeID>(Pos - MaxOpcodes));
@@ -447,21 +453,18 @@ void IR2VecVocabAnalysis::generateNumMappedVocab() {
// Handle Opcodes
std::vector<Embedding> NumericOpcodeEmbeddings(Vocabulary::MaxOpcodes,
Embedding(Dim, 0));
-#define HANDLE_INST(NUM, OPCODE, CLASS) \
- { \
- auto It = OpcVocab.find(#OPCODE); \
- if (It != OpcVocab.end()) \
- NumericOpcodeEmbeddings[NUM - 1] = It->second; \
- else \
- handleMissingEntity(#OPCODE); \
+ for (unsigned Opcode : seq(0u, Vocabulary::MaxOpcodes)) {
+ StringRef VocabKey = Vocabulary::getVocabKeyForOpcode(Opcode + 1);
+ auto It = OpcVocab.find(VocabKey.str());
+ if (It != OpcVocab.end())
+ NumericOpcodeEmbeddings[Opcode] = It->second;
+ else
+ handleMissingEntity(VocabKey.str());
}
-#include "llvm/IR/Instruction.def"
-#undef HANDLE_INST
Vocab.insert(Vocab.end(), NumericOpcodeEmbeddings.begin(),
NumericOpcodeEmbeddings.end());
- // Handle Types using direct iteration through TypeID enum
- // We iterate through all possible TypeID values and map them to embeddings
+ // Handle Types
std::vector<Embedding> NumericTypeEmbeddings(Vocabulary::MaxTypeIDs,
Embedding(Dim, 0));
for (unsigned TypeID : seq(0u, Vocabulary::MaxTypeIDs)) {
|
70dcd29
to
237e4d2
Compare
80c29d9
to
5eaecce
Compare
bebdb9e
to
28b3901
Compare
Merge activity
|
28b3901
to
ee12344
Compare
ee12344
to
a466c28
Compare
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Refactored IR2Vec vocabulary handling to improve code organization and error handling. This would help in upcoming PRs related to the IR2Vec tool.
(Tracking issue - #141817)