================
@@ -0,0 +1,291 @@
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+"""IR2Vec Triplet Generator
+
+Generates IR2Vec triplets by applying random optimization levels to LLVM IR 
files
+and extracting triplets using llvm-ir2vec. Automatically generates preprocessed
+files: entity2id.txt, relation2id.txt, and train2id.txt.
+
+Usage:
+    python generateTriplets.py <llvm_build_dir> <num_optimizations> 
<ll_file_list> <output_dir>
+"""
+
+import argparse
+import logging
+import os
+import random
+import subprocess
+import sys
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+from typing import List, Set, Tuple
+
+# Configuration
+OPT_LEVELS = ["O0", "O1", "O2", "O3", "Os", "Oz"]
+DEFAULT_MAX_WORKERS = 100
+
+logger = logging.getLogger(__name__)
+
+
+class TripletResult:
+    """Result from processing a single LLVM IR file"""
+
+    __slots__ = ["triplets", "max_relation"]
+
+    def __init__(self, triplets: Set[str], max_relation: int):
+        self.triplets = triplets
+        self.max_relation = max_relation
+
+
+class IR2VecTripletGenerator:
+    """Main class for generating IR2Vec triplets"""
+
+    def __init__(
+        self,
+        llvm_build_dir: Path,
+        num_optimizations: int,
+        output_dir: Path,
+        max_workers: int = DEFAULT_MAX_WORKERS,
+    ):
+        self.llvm_build_dir = llvm_build_dir
+        self.num_optimizations = num_optimizations
+        self.output_dir = output_dir
+        self.max_workers = max_workers
+
+        # Tool paths
+        self.opt_binary = os.path.join(llvm_build_dir, "bin", "opt")
+        self.ir2vec_binary = os.path.join(llvm_build_dir, "bin", "llvm-ir2vec")
+
+        self._validate_setup()
+
+    def _validate_setup(self):
+        """Validate that all required tools and paths exist"""
+        if not self.llvm_build_dir.exists():
+            raise FileNotFoundError(
+                f"LLVM build directory not found: {self.llvm_build_dir}"
+            )
+
+        if not os.path.isfile(self.opt_binary) or not os.access(
+            self.opt_binary, os.X_OK
+        ):
+            raise FileNotFoundError(
+                f"opt binary not found or not executable: {self.opt_binary}"
+            )
+
+        if not os.path.isfile(self.ir2vec_binary) or not os.access(
+            self.ir2vec_binary, os.X_OK
+        ):
+            raise FileNotFoundError(
+                f"llvm-ir2vec binary not found or not executable: 
{self.ir2vec_binary}"
+            )
+
+        if not (1 <= self.num_optimizations <= len(OPT_LEVELS)):
+            raise ValueError(
+                f"Number of optimizations must be between 1-{len(OPT_LEVELS)}"
+            )
+
+        self.output_dir.mkdir(parents=True, exist_ok=True)
----------------
boomanaiden154 wrote:

This should probably be somewhere outside of `_validate_setup`? Slightly odd to 
be in here although I can see the motivation (validating that the output path 
exists in a way).

https://github.com/llvm/llvm-project/pull/149215
_______________________________________________
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to