https://github.com/nishant-sachdeva updated https://github.com/llvm/llvm-project/pull/177361
>From 7e128fd03fc036753ef0c3a659f1b79e9303af68 Mon Sep 17 00:00:00 2001 From: nishant_sachdeva <[email protected]> Date: Thu, 22 Jan 2026 19:07:42 +0530 Subject: [PATCH] Modifying llvm-ir2vec vocab reading pipeline to use Vocabulary::fromFile instead of a full pass invocation --- llvm/include/llvm/Analysis/IR2Vec.h | 1 + llvm/lib/Analysis/IR2Vec.cpp | 2 +- llvm/tools/llvm-ir2vec/lib/Utils.cpp | 18 ++++++++++-------- llvm/tools/llvm-ir2vec/lib/Utils.h | 4 ++-- llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp | 9 ++++++++- 5 files changed, 22 insertions(+), 12 deletions(-) diff --git a/llvm/include/llvm/Analysis/IR2Vec.h b/llvm/include/llvm/Analysis/IR2Vec.h index 2bf1c2adc0d4f..a7f88d9efdc3d 100644 --- a/llvm/include/llvm/Analysis/IR2Vec.h +++ b/llvm/include/llvm/Analysis/IR2Vec.h @@ -77,6 +77,7 @@ LLVM_ABI extern cl::opt<float> OpcWeight; LLVM_ABI extern cl::opt<float> TypeWeight; LLVM_ABI extern cl::opt<float> ArgWeight; LLVM_ABI extern cl::opt<IR2VecKind> IR2VecEmbeddingKind; +LLVM_ABI extern cl::opt<std::string> VocabFile; /// Embedding is a datatype that wraps std::vector<double>. It provides /// additional functionality for arithmetic and comparison operations. diff --git a/llvm/lib/Analysis/IR2Vec.cpp b/llvm/lib/Analysis/IR2Vec.cpp index 4c187fe9ce804..c421926d12ab1 100644 --- a/llvm/lib/Analysis/IR2Vec.cpp +++ b/llvm/lib/Analysis/IR2Vec.cpp @@ -40,7 +40,7 @@ namespace ir2vec { cl::OptionCategory IR2VecCategory("IR2Vec Options"); // FIXME: Use a default vocab when not specified -static cl::opt<std::string> +cl::opt<std::string> VocabFile("ir2vec-vocab-path", cl::Optional, cl::desc("Path to the vocabulary file for IR2Vec"), cl::init(""), cl::cat(IR2VecCategory)); diff --git a/llvm/tools/llvm-ir2vec/lib/Utils.cpp b/llvm/tools/llvm-ir2vec/lib/Utils.cpp index a655ae069f642..c7c10ee1ff43f 100644 --- a/llvm/tools/llvm-ir2vec/lib/Utils.cpp +++ b/llvm/tools/llvm-ir2vec/lib/Utils.cpp @@ -41,14 +41,16 @@ namespace llvm { namespace ir2vec { -bool IR2VecTool::initializeVocabulary() { - // Register and run the IR2Vec vocabulary analysis - // The vocabulary file path is specified via --ir2vec-vocab-path global - // option - MAM.registerPass([&] { return PassInstrumentationAnalysis(); }); - MAM.registerPass([&] { return IR2VecVocabAnalysis(); }); - // This will throw an error if vocab is not found or invalid - Vocab = &MAM.getResult<IR2VecVocabAnalysis>(M); +bool IR2VecTool::initializeVocabulary(StringRef VocabPath) { + auto VocabOrErr = Vocabulary::fromFile(VocabPath); + + if (!VocabOrErr) { + llvm::errs() << "Failed to load vocabulary: " + << toString(VocabOrErr.takeError()) << "\n"; + return false; + } + + Vocab = std::make_unique<Vocabulary>(std::move(*VocabOrErr)); return Vocab->isValid(); } diff --git a/llvm/tools/llvm-ir2vec/lib/Utils.h b/llvm/tools/llvm-ir2vec/lib/Utils.h index 34474b7808463..29e8ce4f1c0ad 100644 --- a/llvm/tools/llvm-ir2vec/lib/Utils.h +++ b/llvm/tools/llvm-ir2vec/lib/Utils.h @@ -87,13 +87,13 @@ class IR2VecTool { private: Module &M; ModuleAnalysisManager MAM; - const Vocabulary *Vocab = nullptr; + std::unique_ptr<Vocabulary> Vocab; public: explicit IR2VecTool(Module &M) : M(M) {} /// Initialize the IR2Vec vocabulary analysis - bool initializeVocabulary(); + bool initializeVocabulary(StringRef VocabPath); /// Generate triplets for a single function /// Returns a TripletResult with: diff --git a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp index d240e7c6e5201..e8d7d9b19cd8c 100644 --- a/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp +++ b/llvm/tools/llvm-ir2vec/llvm-ir2vec.cpp @@ -153,7 +153,14 @@ static Error processModule(Module &M, raw_ostream &OS) { if (EmbeddingsSubCmd) { // Initialize vocabulary for embedding generation // Note: Requires --ir2vec-vocab-path option to be set - auto VocabStatus = Tool.initializeVocabulary(); + // and this value will be populated in the var VocabFile + if (VocabFile.empty()) { + return createStringError( + errc::invalid_argument, + "IR2Vec vocabulary file path not specified; " + "You may need to set it using --ir2vec-vocab-path"); + } + auto VocabStatus = Tool.initializeVocabulary(VocabFile); assert(VocabStatus && "Failed to initialize IR2Vec vocabulary"); (void)VocabStatus; _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
