melver created this revision.
melver added a reviewer: dvyukov.
Herald added subscribers: Enna1, pengfei, hiraditya.
Herald added a project: All.
melver requested review of this revision.
Herald added projects: clang, Sanitizers, LLVM.
Herald added subscribers: llvm-commits, Sanitizers, cfe-commits.

Emit all constant integers produced by SanitizerBinaryMetadata as
ULEB128 to further reduce binary space used. Increasing the version is
not necessary given this change depends on (and will land) along with
the bump to v2.

To support this, the !pcsections metadata format is extended to allow
for per-section options, encoded in the first MD operator which must
always be a string and contain the section: "<section>!<options>".

Depends on D143482 <https://reviews.llvm.org/D143482>


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D143484

Files:
  clang/test/CodeGen/sanitize-metadata.c
  compiler-rt/test/metadata/common.h
  llvm/docs/PCSectionsMetadata.rst
  llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h
  llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
  llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
  llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
  llvm/test/CodeGen/X86/pcsections.ll
  llvm/test/Instrumentation/SanitizerBinaryMetadata/atomics.ll

Index: llvm/test/Instrumentation/SanitizerBinaryMetadata/atomics.ll
===================================================================
--- llvm/test/Instrumentation/SanitizerBinaryMetadata/atomics.ll
+++ llvm/test/Instrumentation/SanitizerBinaryMetadata/atomics.ll
@@ -2071,6 +2071,6 @@
 ; CHECK-DAG: ret:
 ; CHECK-NEXT:  ret void
 
-; CHECK: !0 = !{!"sanmd_covered", !1}
-; CHECK: !1 = !{i8 1}
-; CHECK: !2 = !{!"sanmd_atomics"}
+; CHECK: !0 = !{!"sanmd_covered!C", !1}
+; CHECK: !1 = !{i64 1}
+; CHECK: !2 = !{!"sanmd_atomics!C"}
Index: llvm/test/CodeGen/X86/pcsections.ll
===================================================================
--- llvm/test/CodeGen/X86/pcsections.ll
+++ llvm/test/CodeGen/X86/pcsections.ll
@@ -137,9 +137,32 @@
   ret void
 }
 
+define void @multiple_uleb128() !pcsections !6 {
+; CHECK-LABEL: multiple_uleb128:
+; CHECK:       .section	section_aux,"awo",@progbits,.text
+; CHECK-NEXT:  .Lpcsection_base8:
+; DEFCM-NEXT:  .long	.Lfunc_begin3-.Lpcsection_base8
+; LARGE-NEXT:  .quad	.Lfunc_begin3-.Lpcsection_base8
+; CHECK-NEXT:  .uleb128	.Lfunc_end6-.Lfunc_begin3
+; CHECK-NEXT:  .byte	42
+; CHECK-NEXT:  .ascii	"\345\216&"
+; CHECK-NEXT:  .byte	255
+; CHECK-NEXT:  .section	section_aux_21264,"awo",@progbits,.text
+; CHECK-NEXT:  .Lpcsection_base9:
+; DEFCM-NEXT:  .long	.Lfunc_begin3-.Lpcsection_base9
+; LARGE-NEXT:  .quad	.Lfunc_begin3-.Lpcsection_base9
+; CHECK-NEXT:  .long	.Lfunc_end6-.Lfunc_begin3
+; CHECK-NEXT:  .long	21264
+; CHECK-NEXT:  .text
+entry:
+  ret void
+}
+
 !0 = !{!"section_no_aux"}
 !1 = !{!"section_aux", !3}
 !2 = !{!"section_aux_42", !4, !"section_aux_21264", !5}
 !3 = !{i32 10, i32 20, i32 30}
 !4 = !{i32 42}
 !5 = !{i32 21264}
+!6 = !{!"section_aux!C", !7, !"section_aux_21264", !5}
+!7 = !{i64 42, i32 624485, i8 255}
Index: llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
===================================================================
--- llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
+++ llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
@@ -36,14 +36,15 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/StringSaver.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 
 #include <array>
 #include <cstdint>
-#include <limits>
 
 using namespace llvm;
 
@@ -148,7 +149,7 @@
   // to determine if a memory operation is atomic or not in modules compiled
   // with SanitizerBinaryMetadata.
   bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB,
-             uint32_t &FeatureMask);
+             uint64_t &FeatureMask);
 
   // Get start/end section marker pointer.
   GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty);
@@ -169,6 +170,8 @@
   const SanitizerBinaryMetadataOptions Options;
   const Triple TargetTriple;
   IRBuilder<> IRB;
+  BumpPtrAllocator Alloc;
+  UniqueStringSaver StringPool{Alloc};
 };
 
 bool SanitizerBinaryMetadata::run() {
@@ -242,7 +245,7 @@
 
   // The metadata features enabled for this function, stored along covered
   // metadata (if enabled).
-  uint32_t FeatureMask = 0;
+  uint64_t FeatureMask = 0;
   // Don't emit unnecessary covered metadata for all functions to save space.
   bool RequiresCovered = false;
 
@@ -267,10 +270,8 @@
     const auto *MI = &MetadataInfo::Covered;
     MIS.insert(MI);
     const StringRef Section = getSectionName(MI->SectionSuffix);
-    // The feature mask will be placed after the size of the function.
-    assert(FeatureMask <= std::numeric_limits<uint8_t>::max() &&
-           "Increase feature mask bytes and bump version");
-    Constant *CFM = IRB.getInt8(FeatureMask);
+    // The feature mask will be placed after the function size.
+    Constant *CFM = IRB.getInt64(FeatureMask);
     F.setMetadata(LLVMContext::MD_pcsections,
                   MDB.createPCSections({{Section, {CFM}}}));
   }
@@ -377,7 +378,7 @@
 }
 
 bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS,
-                                    MDBuilder &MDB, uint32_t &FeatureMask) {
+                                    MDBuilder &MDB, uint64_t &FeatureMask) {
   SmallVector<const MetadataInfo *, 1> InstMetadata;
   bool RequiresCovered = false;
 
@@ -432,8 +433,9 @@
 }
 
 StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) {
-  // FIXME: Other TargetTriple (req. string pool)
-  return SectionSuffix;
+  // FIXME: Other TargetTriples.
+  // Request ULEB128 encoding for all integer constants.
+  return StringPool.save(SectionSuffix + "!C");
 }
 
 Twine SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) {
Index: llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
===================================================================
--- llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
+++ llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
@@ -52,7 +52,7 @@
   if (!MD)
     return false;
   const auto &Section = *cast<MDString>(MD->getOperand(0));
-  if (!Section.getString().equals(kSanitizerBinaryMetadataCoveredSection))
+  if (!Section.getString().startswith(kSanitizerBinaryMetadataCoveredSection))
     return false;
   auto &AuxMDs = *cast<MDTuple>(MD->getOperand(1));
   // Assume it currently only has features.
Index: llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
===================================================================
--- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1496,9 +1496,22 @@
     // constants may appear, which will simply be emitted into the current
     // section (the user of MD_pcsections decides the format of encoded data).
     assert(isa<MDString>(MD.getOperand(0)) && "first operand not a string");
+    bool ConstULEB128 = false;
     for (const MDOperand &MDO : MD.operands()) {
       if (auto *S = dyn_cast<MDString>(MDO)) {
-        SwitchSection(S->getString());
+        // Found string, start of new section!
+        // Find options for this section "<section>!<opts>" - supported options:
+        //   C = Compress constant integers of size 2-8 bytes as ULEB128.
+        const StringRef SecWithOpt = S->getString();
+        const size_t OptStart = SecWithOpt.find('!'); // likely npos
+        const StringRef Sec = SecWithOpt.substr(0, OptStart);
+        const StringRef Opts = SecWithOpt.substr(OptStart); // likely empty
+        ConstULEB128 = Opts.find('C') != StringRef::npos;
+#ifndef NDEBUG
+        for (char O : Opts)
+          assert((O == '!' || O == 'C') && "Invalid !pcsections options");
+#endif
+        SwitchSection(Sec);
         const MCSymbol *Prev = Syms.front();
         for (const MCSymbol *Sym : Syms) {
           if (Sym == Prev || !Deltas) {
@@ -1510,17 +1523,30 @@
             // `base + addr`.
             emitLabelDifference(Sym, Base, RelativeRelocSize);
           } else {
-            emitLabelDifference(Sym, Prev, 4);
+            // Emit delta between symbol and previous symbol.
+            if (ConstULEB128)
+              OutStreamer->emitAbsoluteSymbolDiffAsULEB128(Sym, Prev);
+            else
+              emitLabelDifference(Sym, Prev, 4);
           }
           Prev = Sym;
         }
       } else {
+        // Emit auxiliary data after PC.
         assert(isa<MDNode>(MDO) && "expecting either string or tuple");
         const auto *AuxMDs = cast<MDNode>(MDO);
         for (const MDOperand &AuxMDO : AuxMDs->operands()) {
           assert(isa<ConstantAsMetadata>(AuxMDO) && "expecting a constant");
-          const auto *C = cast<ConstantAsMetadata>(AuxMDO);
-          emitGlobalConstant(F.getParent()->getDataLayout(), C->getValue());
+          const Constant *C = cast<ConstantAsMetadata>(AuxMDO)->getValue();
+          const DataLayout &DL = F.getParent()->getDataLayout();
+          const uint64_t Size = DL.getTypeStoreSize(C->getType());
+
+          if (auto *CI = dyn_cast<ConstantInt>(C);
+              CI && ConstULEB128 && Size > 1 && Size <= 8) {
+            OutStreamer->emitULEB128IntValue(CI->getZExtValue());
+          } else {
+            emitGlobalConstant(DL, C);
+          }
         }
       }
     }
Index: llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h
===================================================================
--- llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h
+++ llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h
@@ -30,11 +30,11 @@
 inline constexpr int kSanitizerBinaryMetadataUARBit = 1;
 inline constexpr int kSanitizerBinaryMetadataUARHasSizeBit = 2;
 
-inline constexpr uint32_t kSanitizerBinaryMetadataAtomics =
+inline constexpr uint64_t kSanitizerBinaryMetadataAtomics =
     1 << kSanitizerBinaryMetadataAtomicsBit;
-inline constexpr uint32_t kSanitizerBinaryMetadataUAR =
+inline constexpr uint64_t kSanitizerBinaryMetadataUAR =
     1 << kSanitizerBinaryMetadataUARBit;
-inline constexpr uint32_t kSanitizerBinaryMetadataUARHasSize =
+inline constexpr uint64_t kSanitizerBinaryMetadataUARHasSize =
     1 << kSanitizerBinaryMetadataUARHasSizeBit;
 
 inline constexpr char kSanitizerBinaryMetadataCoveredSection[] =
Index: llvm/docs/PCSectionsMetadata.rst
===================================================================
--- llvm/docs/PCSectionsMetadata.rst
+++ llvm/docs/PCSectionsMetadata.rst
@@ -59,6 +59,18 @@
 code models, the entry size matches pointer size. For any smaller code model
 the entry size is just 32 bits.
 
+Encoding Options
+----------------
+
+Optional encoding options can be passed in the first ``MDString`` operator:
+``<section>!<options>``. The following options are available:
+
+    * ``C`` -- Compress constant integers of size 2-8 bytes as ULEB128; this
+      includes the function size (but excludes the PC entry).
+
+For example, ``foo!C`` will emit into section ``foo`` with all constants
+encoded as ULEB128.
+
 Guarantees on Code Generation
 =============================
 
Index: compiler-rt/test/metadata/common.h
===================================================================
--- compiler-rt/test/metadata/common.h
+++ compiler-rt/test/metadata/common.h
@@ -25,6 +25,20 @@
   return v;
 }
 
+uint64_t consume_uleb128(const char *&pos, const char *end) {
+  uint64_t val = 0;
+  int shift = 0;
+  uint8_t cur;
+  do {
+    cur = *pos++;
+    val |= uint64_t{cur & 0x7fu} << shift;
+    shift += 7;
+  } while (cur & 0x80);
+  assert(shift < 64);
+  assert(pos <= end);
+  return val;
+}
+
 constexpr uint32_t kSanitizerBinaryMetadataUARHasSize = 1 << 2;
 
 uint32_t meta_version;
@@ -45,13 +59,13 @@
     const auto base = reinterpret_cast<uintptr_t>(pos);
     const intptr_t offset = offset_ptr_sized ? consume<intptr_t>(pos, end)
                                              : consume<int32_t>(pos, end);
-    [[maybe_unused]] const uint32_t size = consume<uint32_t>(pos, end);
-    const uint32_t features = consume<uint8_t>(pos, end);
-    uint32_t stack_args = 0;
+    [[maybe_unused]] const uint64_t size = consume_uleb128(pos, end);
+    const uint64_t features = consume_uleb128(pos, end);
+    uint64_t stack_args = 0;
     if (features & kSanitizerBinaryMetadataUARHasSize)
-      stack_args = consume<uint32_t>(pos, end);
+      stack_args = consume_uleb128(pos, end);
     if (const char *name = symbolize(base + offset))
-      printf("%s: features=%x stack_args=%u\n", name, features, stack_args);
+      printf("%s: features=%lx stack_args=%lu\n", name, features, stack_args);
   }
   meta_version = version;
   meta_start = start;
Index: clang/test/CodeGen/sanitize-metadata.c
===================================================================
--- clang/test/CodeGen/sanitize-metadata.c
+++ clang/test/CodeGen/sanitize-metadata.c
@@ -31,6 +31,6 @@
 // CHECK-LABEL: __sanitizer_metadata_covered.module_dtor
 // CHECK: call void @__sanitizer_metadata_covered_del(i32 2, ptr @__start_sanmd_covered, ptr @__stop_sanmd_covered)
 
-// ATOMICS: ![[ATOMICS_COVERED]] = !{!"sanmd_covered", ![[ATOMICS_COVERED_AUX:[0-9]+]]}
-// ATOMICS: ![[ATOMICS_COVERED_AUX]] = !{i8 1}
-// ATOMICS: ![[ATOMIC_OP]] = !{!"sanmd_atomics"}
+// ATOMICS: ![[ATOMICS_COVERED]] = !{!"sanmd_covered!C", ![[ATOMICS_COVERED_AUX:[0-9]+]]}
+// ATOMICS: ![[ATOMICS_COVERED_AUX]] = !{i64 1}
+// ATOMICS: ![[ATOMIC_OP]] = !{!"sanmd_atomics!C"}
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to