https://github.com/nishant-sachdeva updated 
https://github.com/llvm/llvm-project/pull/177361

>From 7e128fd03fc036753ef0c3a659f1b79e9303af68 Mon Sep 17 00:00:00 2001
From: nishant_sachdeva <[email protected]>
Date: Thu, 22 Jan 2026 19:07:42 +0530
Subject: [PATCH] Modifying llvm-ir2vec vocab reading pipeline to use
 Vocabulary::fromFile instead of a full pass invocation

---
 llvm/include/llvm/Analysis/IR2Vec.h    |  1 +
 llvm/lib/Analysis/IR2Vec.cpp           |  2 +-
 llvm/tools/llvm-ir2vec/lib/Utils.cpp   | 18 ++++++++++--------
 llvm/tools/llvm-ir2vec/lib/Utils.h     |  4 ++--
 llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp |  9 ++++++++-
 5 files changed, 22 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/Analysis/IR2Vec.h 
b/llvm/include/llvm/Analysis/IR2Vec.h
index 2bf1c2adc0d4f..a7f88d9efdc3d 100644
--- a/llvm/include/llvm/Analysis/IR2Vec.h
+++ b/llvm/include/llvm/Analysis/IR2Vec.h
@@ -77,6 +77,7 @@ LLVM_ABI extern cl::opt<float> OpcWeight;
 LLVM_ABI extern cl::opt<float> TypeWeight;
 LLVM_ABI extern cl::opt<float> ArgWeight;
 LLVM_ABI extern cl::opt<IR2VecKind> IR2VecEmbeddingKind;
+LLVM_ABI extern cl::opt<std::string> VocabFile;
 
 /// Embedding is a datatype that wraps std::vector<double>. It provides
 /// additional functionality for arithmetic and comparison operations.
diff --git a/llvm/lib/Analysis/IR2Vec.cpp b/llvm/lib/Analysis/IR2Vec.cpp
index 4c187fe9ce804..c421926d12ab1 100644
--- a/llvm/lib/Analysis/IR2Vec.cpp
+++ b/llvm/lib/Analysis/IR2Vec.cpp
@@ -40,7 +40,7 @@ namespace ir2vec {
 cl::OptionCategory IR2VecCategory("IR2Vec Options");
 
 // FIXME: Use a default vocab when not specified
-static cl::opt<std::string>
+cl::opt<std::string>
     VocabFile("ir2vec-vocab-path", cl::Optional,
               cl::desc("Path to the vocabulary file for IR2Vec"), cl::init(""),
               cl::cat(IR2VecCategory));
diff --git a/llvm/tools/llvm-ir2vec/lib/Utils.cpp 
b/llvm/tools/llvm-ir2vec/lib/Utils.cpp
index a655ae069f642..c7c10ee1ff43f 100644
--- a/llvm/tools/llvm-ir2vec/lib/Utils.cpp
+++ b/llvm/tools/llvm-ir2vec/lib/Utils.cpp
@@ -41,14 +41,16 @@ namespace llvm {
 
 namespace ir2vec {
 
-bool IR2VecTool::initializeVocabulary() {
-  // Register and run the IR2Vec vocabulary analysis
-  // The vocabulary file path is specified via --ir2vec-vocab-path global
-  // option
-  MAM.registerPass([&] { return PassInstrumentationAnalysis(); });
-  MAM.registerPass([&] { return IR2VecVocabAnalysis(); });
-  // This will throw an error if vocab is not found or invalid
-  Vocab = &MAM.getResult<IR2VecVocabAnalysis>(M);
+bool IR2VecTool::initializeVocabulary(StringRef VocabPath) {
+  auto VocabOrErr = Vocabulary::fromFile(VocabPath);
+
+  if (!VocabOrErr) {
+    llvm::errs() << "Failed to load vocabulary: "
+                 << toString(VocabOrErr.takeError()) << "\n";
+    return false;
+  }
+
+  Vocab = std::make_unique<Vocabulary>(std::move(*VocabOrErr));
   return Vocab->isValid();
 }
 
diff --git a/llvm/tools/llvm-ir2vec/lib/Utils.h 
b/llvm/tools/llvm-ir2vec/lib/Utils.h
index 34474b7808463..29e8ce4f1c0ad 100644
--- a/llvm/tools/llvm-ir2vec/lib/Utils.h
+++ b/llvm/tools/llvm-ir2vec/lib/Utils.h
@@ -87,13 +87,13 @@ class IR2VecTool {
 private:
   Module &M;
   ModuleAnalysisManager MAM;
-  const Vocabulary *Vocab = nullptr;
+  std::unique_ptr<Vocabulary> Vocab;
 
 public:
   explicit IR2VecTool(Module &M) : M(M) {}
 
   /// Initialize the IR2Vec vocabulary analysis
-  bool initializeVocabulary();
+  bool initializeVocabulary(StringRef VocabPath);
 
   /// Generate triplets for a single function
   /// Returns a TripletResult with:
diff --git a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp 
b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
index d240e7c6e5201..e8d7d9b19cd8c 100644
--- a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
+++ b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp
@@ -153,7 +153,14 @@ static Error processModule(Module &M, raw_ostream &OS) {
   if (EmbeddingsSubCmd) {
     // Initialize vocabulary for embedding generation
     // Note: Requires --ir2vec-vocab-path option to be set
-    auto VocabStatus = Tool.initializeVocabulary();
+    // and this value will be populated in the var VocabFile
+    if (VocabFile.empty()) {
+      return createStringError(
+          errc::invalid_argument,
+          "IR2Vec vocabulary file path not specified; "
+          "You may need to set it using --ir2vec-vocab-path");
+    }
+    auto VocabStatus = Tool.initializeVocabulary(VocabFile);
     assert(VocabStatus && "Failed to initialize IR2Vec vocabulary");
     (void)VocabStatus;
 

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to