================ @@ -0,0 +1,291 @@ +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +"""IR2Vec Triplet Generator + +Generates IR2Vec triplets by applying random optimization levels to LLVM IR files +and extracting triplets using llvm-ir2vec. Automatically generates preprocessed +files: entity2id.txt, relation2id.txt, and train2id.txt. + +Usage: + python generateTriplets.py <llvm_build_dir> <num_optimizations> <ll_file_list> <output_dir> +""" + +import argparse +import logging +import os +import random +import subprocess +import sys +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path +from typing import List, Set, Tuple + +# Configuration +OPT_LEVELS = ["O0", "O1", "O2", "O3", "Os", "Oz"] +DEFAULT_MAX_WORKERS = 100 + +logger = logging.getLogger(__name__) + + +class TripletResult: + """Result from processing a single LLVM IR file""" + + __slots__ = ["triplets", "max_relation"] + + def __init__(self, triplets: Set[str], max_relation: int): + self.triplets = triplets + self.max_relation = max_relation + + +class IR2VecTripletGenerator: + """Main class for generating IR2Vec triplets""" + + def __init__( + self, + llvm_build_dir: Path, + num_optimizations: int, + output_dir: Path, + max_workers: int = DEFAULT_MAX_WORKERS, + ): + self.llvm_build_dir = llvm_build_dir + self.num_optimizations = num_optimizations + self.output_dir = output_dir + self.max_workers = max_workers + + # Tool paths + self.opt_binary = os.path.join(llvm_build_dir, "bin", "opt") + self.ir2vec_binary = os.path.join(llvm_build_dir, "bin", "llvm-ir2vec") + + self._validate_setup() + + def _validate_setup(self): + """Validate that all required tools and paths exist""" + if not self.llvm_build_dir.exists(): + raise FileNotFoundError( + f"LLVM build directory not found: {self.llvm_build_dir}" + ) + + if not os.path.isfile(self.opt_binary) or not os.access( + self.opt_binary, os.X_OK + ): + raise FileNotFoundError( + f"opt binary not found or not executable: {self.opt_binary}" + ) + + if not os.path.isfile(self.ir2vec_binary) or not os.access( + self.ir2vec_binary, os.X_OK + ): + raise FileNotFoundError( + f"llvm-ir2vec binary not found or not executable: {self.ir2vec_binary}" + ) + + if not (1 <= self.num_optimizations <= len(OPT_LEVELS)): + raise ValueError( + f"Number of optimizations must be between 1-{len(OPT_LEVELS)}" + ) + + self.output_dir.mkdir(parents=True, exist_ok=True) ---------------- boomanaiden154 wrote:
This should probably be somewhere outside of `_validate_setup`? Slightly odd to be in here although I can see the motivation (validating that the output path exists in a way). https://github.com/llvm/llvm-project/pull/149215 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits