This revision was automatically updated to reflect the committed changes.
Closed by commit rGbf9814b70560: [SanitizerBinaryMetadata] Emit constants as 
ULEB128 (authored by melver).

Changed prior to commit:
  https://reviews.llvm.org/D143484?vs=495442&id=495802#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143484/new/

https://reviews.llvm.org/D143484

Files:
  clang/test/CodeGen/sanitize-metadata.c
  compiler-rt/test/metadata/common.h
  llvm/docs/PCSectionsMetadata.rst
  llvm/include/llvm/CodeGen/AsmPrinter.h
  llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h
  llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
  llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
  llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
  llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
  llvm/test/CodeGen/X86/pcsections.ll
  llvm/test/Instrumentation/SanitizerBinaryMetadata/atomics.ll

Index: llvm/test/Instrumentation/SanitizerBinaryMetadata/atomics.ll
===================================================================
--- llvm/test/Instrumentation/SanitizerBinaryMetadata/atomics.ll
+++ llvm/test/Instrumentation/SanitizerBinaryMetadata/atomics.ll
@@ -2071,6 +2071,6 @@
 ; CHECK-DAG: ret:
 ; CHECK-NEXT:  ret void
 
-; CHECK: !0 = !{!"sanmd_covered", !1}
-; CHECK: !1 = !{i8 1}
-; CHECK: !2 = !{!"sanmd_atomics"}
+; CHECK: !0 = !{!"sanmd_covered!C", !1}
+; CHECK: !1 = !{i64 1}
+; CHECK: !2 = !{!"sanmd_atomics!C"}
Index: llvm/test/CodeGen/X86/pcsections.ll
===================================================================
--- llvm/test/CodeGen/X86/pcsections.ll
+++ llvm/test/CodeGen/X86/pcsections.ll
@@ -137,9 +137,32 @@
   ret void
 }
 
+define void @multiple_uleb128() !pcsections !6 {
+; CHECK-LABEL: multiple_uleb128:
+; CHECK:       .section	section_aux,"awo",@progbits,.text
+; CHECK-NEXT:  .Lpcsection_base8:
+; DEFCM-NEXT:  .long	.Lfunc_begin3-.Lpcsection_base8
+; LARGE-NEXT:  .quad	.Lfunc_begin3-.Lpcsection_base8
+; CHECK-NEXT:  .uleb128	.Lfunc_end6-.Lfunc_begin3
+; CHECK-NEXT:  .byte	42
+; CHECK-NEXT:  .ascii	"\345\216&"
+; CHECK-NEXT:  .byte	255
+; CHECK-NEXT:  .section	section_aux_21264,"awo",@progbits,.text
+; CHECK-NEXT:  .Lpcsection_base9:
+; DEFCM-NEXT:  .long	.Lfunc_begin3-.Lpcsection_base9
+; LARGE-NEXT:  .quad	.Lfunc_begin3-.Lpcsection_base9
+; CHECK-NEXT:  .long	.Lfunc_end6-.Lfunc_begin3
+; CHECK-NEXT:  .long	21264
+; CHECK-NEXT:  .text
+entry:
+  ret void
+}
+
 !0 = !{!"section_no_aux"}
 !1 = !{!"section_aux", !3}
 !2 = !{!"section_aux_42", !4, !"section_aux_21264", !5}
 !3 = !{i32 10, i32 20, i32 30}
 !4 = !{i32 42}
 !5 = !{i32 21264}
+!6 = !{!"section_aux!C", !7, !"section_aux_21264", !5}
+!7 = !{i64 42, i32 624485, i8 255}
Index: llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
===================================================================
--- llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
+++ llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
@@ -35,15 +35,16 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/StringSaver.h"
 #include "llvm/TargetParser/Triple.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 
 #include <array>
 #include <cstdint>
-#include <limits>
 
 using namespace llvm;
 
@@ -148,7 +149,7 @@
   // to determine if a memory operation is atomic or not in modules compiled
   // with SanitizerBinaryMetadata.
   bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB,
-             uint32_t &FeatureMask);
+             uint64_t &FeatureMask);
 
   // Get start/end section marker pointer.
   GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty);
@@ -169,6 +170,8 @@
   const SanitizerBinaryMetadataOptions Options;
   const Triple TargetTriple;
   IRBuilder<> IRB;
+  BumpPtrAllocator Alloc;
+  UniqueStringSaver StringPool{Alloc};
 };
 
 bool SanitizerBinaryMetadata::run() {
@@ -245,7 +248,7 @@
 
   // The metadata features enabled for this function, stored along covered
   // metadata (if enabled).
-  uint32_t FeatureMask = 0;
+  uint64_t FeatureMask = 0;
   // Don't emit unnecessary covered metadata for all functions to save space.
   bool RequiresCovered = false;
 
@@ -270,10 +273,8 @@
     const auto *MI = &MetadataInfo::Covered;
     MIS.insert(MI);
     const StringRef Section = getSectionName(MI->SectionSuffix);
-    // The feature mask will be placed after the size of the function.
-    assert(FeatureMask <= std::numeric_limits<uint8_t>::max() &&
-           "Increase feature mask bytes and bump version");
-    Constant *CFM = IRB.getInt8(FeatureMask);
+    // The feature mask will be placed after the function size.
+    Constant *CFM = IRB.getInt64(FeatureMask);
     F.setMetadata(LLVMContext::MD_pcsections,
                   MDB.createPCSections({{Section, {CFM}}}));
   }
@@ -380,7 +381,7 @@
 }
 
 bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS,
-                                    MDBuilder &MDB, uint32_t &FeatureMask) {
+                                    MDBuilder &MDB, uint64_t &FeatureMask) {
   SmallVector<const MetadataInfo *, 1> InstMetadata;
   bool RequiresCovered = false;
 
@@ -435,8 +436,9 @@
 }
 
 StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) {
-  // FIXME: Other TargetTriple (req. string pool)
-  return SectionSuffix;
+  // FIXME: Other TargetTriples.
+  // Request ULEB128 encoding for all integer constants.
+  return StringPool.save(SectionSuffix + "!C");
 }
 
 Twine SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) {
Index: llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
===================================================================
--- llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
+++ llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
@@ -52,7 +52,7 @@
   if (!MD)
     return false;
   const auto &Section = *cast<MDString>(MD->getOperand(0));
-  if (!Section.getString().equals(kSanitizerBinaryMetadataCoveredSection))
+  if (!Section.getString().startswith(kSanitizerBinaryMetadataCoveredSection))
     return false;
   auto &AuxMDs = *cast<MDTuple>(MD->getOperand(1));
   // Assume it currently only has features.
Index: llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
===================================================================
--- llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -32,28 +32,6 @@
 // Dwarf Emission Helper Routines
 //===----------------------------------------------------------------------===//
 
-/// EmitSLEB128 - emit the specified signed leb128 value.
-void AsmPrinter::emitSLEB128(int64_t Value, const char *Desc) const {
-  if (isVerbose() && Desc)
-    OutStreamer->AddComment(Desc);
-
-  OutStreamer->emitSLEB128IntValue(Value);
-}
-
-void AsmPrinter::emitULEB128(uint64_t Value, const char *Desc,
-                             unsigned PadTo) const {
-  if (isVerbose() && Desc)
-    OutStreamer->AddComment(Desc);
-
-  OutStreamer->emitULEB128IntValue(Value, PadTo);
-}
-
-/// Emit something like ".uleb128 Hi-Lo".
-void AsmPrinter::emitLabelDifferenceAsULEB128(const MCSymbol *Hi,
-                                              const MCSymbol *Lo) const {
-  OutStreamer->emitAbsoluteSymbolDiffAsULEB128(Hi, Lo);
-}
-
 static const char *DecodeDWARFEncoding(unsigned Encoding) {
   switch (Encoding) {
   case dwarf::DW_EH_PE_absptr:
Index: llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
===================================================================
--- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1496,9 +1496,22 @@
     // constants may appear, which will simply be emitted into the current
     // section (the user of MD_pcsections decides the format of encoded data).
     assert(isa<MDString>(MD.getOperand(0)) && "first operand not a string");
+    bool ConstULEB128 = false;
     for (const MDOperand &MDO : MD.operands()) {
       if (auto *S = dyn_cast<MDString>(MDO)) {
-        SwitchSection(S->getString());
+        // Found string, start of new section!
+        // Find options for this section "<section>!<opts>" - supported options:
+        //   C = Compress constant integers of size 2-8 bytes as ULEB128.
+        const StringRef SecWithOpt = S->getString();
+        const size_t OptStart = SecWithOpt.find('!'); // likely npos
+        const StringRef Sec = SecWithOpt.substr(0, OptStart);
+        const StringRef Opts = SecWithOpt.substr(OptStart); // likely empty
+        ConstULEB128 = Opts.find('C') != StringRef::npos;
+#ifndef NDEBUG
+        for (char O : Opts)
+          assert((O == '!' || O == 'C') && "Invalid !pcsections options");
+#endif
+        SwitchSection(Sec);
         const MCSymbol *Prev = Syms.front();
         for (const MCSymbol *Sym : Syms) {
           if (Sym == Prev || !Deltas) {
@@ -1510,17 +1523,30 @@
             // `base + addr`.
             emitLabelDifference(Sym, Base, RelativeRelocSize);
           } else {
-            emitLabelDifference(Sym, Prev, 4);
+            // Emit delta between symbol and previous symbol.
+            if (ConstULEB128)
+              emitLabelDifferenceAsULEB128(Sym, Prev);
+            else
+              emitLabelDifference(Sym, Prev, 4);
           }
           Prev = Sym;
         }
       } else {
+        // Emit auxiliary data after PC.
         assert(isa<MDNode>(MDO) && "expecting either string or tuple");
         const auto *AuxMDs = cast<MDNode>(MDO);
         for (const MDOperand &AuxMDO : AuxMDs->operands()) {
           assert(isa<ConstantAsMetadata>(AuxMDO) && "expecting a constant");
-          const auto *C = cast<ConstantAsMetadata>(AuxMDO);
-          emitGlobalConstant(F.getParent()->getDataLayout(), C->getValue());
+          const Constant *C = cast<ConstantAsMetadata>(AuxMDO)->getValue();
+          const DataLayout &DL = F.getParent()->getDataLayout();
+          const uint64_t Size = DL.getTypeStoreSize(C->getType());
+
+          if (auto *CI = dyn_cast<ConstantInt>(C);
+              CI && ConstULEB128 && Size > 1 && Size <= 8) {
+            emitULEB128(CI->getZExtValue());
+          } else {
+            emitGlobalConstant(DL, C);
+          }
         }
       }
     }
@@ -2788,6 +2814,22 @@
 /// Emit a long directive and value.
 void AsmPrinter::emitInt32(int Value) const { OutStreamer->emitInt32(Value); }
 
+/// EmitSLEB128 - emit the specified signed leb128 value.
+void AsmPrinter::emitSLEB128(int64_t Value, const char *Desc) const {
+  if (isVerbose() && Desc)
+    OutStreamer->AddComment(Desc);
+
+  OutStreamer->emitSLEB128IntValue(Value);
+}
+
+void AsmPrinter::emitULEB128(uint64_t Value, const char *Desc,
+                             unsigned PadTo) const {
+  if (isVerbose() && Desc)
+    OutStreamer->AddComment(Desc);
+
+  OutStreamer->emitULEB128IntValue(Value, PadTo);
+}
+
 /// Emit a long long directive and value.
 void AsmPrinter::emitInt64(uint64_t Value) const {
   OutStreamer->emitInt64(Value);
@@ -2801,6 +2843,12 @@
   OutStreamer->emitAbsoluteSymbolDiff(Hi, Lo, Size);
 }
 
+/// Emit something like ".uleb128 Hi-Lo".
+void AsmPrinter::emitLabelDifferenceAsULEB128(const MCSymbol *Hi,
+                                              const MCSymbol *Lo) const {
+  OutStreamer->emitAbsoluteSymbolDiffAsULEB128(Hi, Lo);
+}
+
 /// EmitLabelPlusOffset - Emit something like ".long Label+Offset"
 /// where the size in bytes of the directive is specified by Size and Label
 /// specifies the label.  This implicitly uses .set if it is available.
Index: llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h
===================================================================
--- llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h
+++ llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h
@@ -30,11 +30,11 @@
 inline constexpr int kSanitizerBinaryMetadataUARBit = 1;
 inline constexpr int kSanitizerBinaryMetadataUARHasSizeBit = 2;
 
-inline constexpr uint32_t kSanitizerBinaryMetadataAtomics =
+inline constexpr uint64_t kSanitizerBinaryMetadataAtomics =
     1 << kSanitizerBinaryMetadataAtomicsBit;
-inline constexpr uint32_t kSanitizerBinaryMetadataUAR =
+inline constexpr uint64_t kSanitizerBinaryMetadataUAR =
     1 << kSanitizerBinaryMetadataUARBit;
-inline constexpr uint32_t kSanitizerBinaryMetadataUARHasSize =
+inline constexpr uint64_t kSanitizerBinaryMetadataUARHasSize =
     1 << kSanitizerBinaryMetadataUARHasSizeBit;
 
 inline constexpr char kSanitizerBinaryMetadataCoveredSection[] =
Index: llvm/include/llvm/CodeGen/AsmPrinter.h
===================================================================
--- llvm/include/llvm/CodeGen/AsmPrinter.h
+++ llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -643,6 +643,13 @@
   /// Emit a long long directive and value.
   void emitInt64(uint64_t Value) const;
 
+  /// Emit the specified signed leb128 value.
+  void emitSLEB128(int64_t Value, const char *Desc = nullptr) const;
+
+  /// Emit the specified unsigned leb128 value.
+  void emitULEB128(uint64_t Value, const char *Desc = nullptr,
+                   unsigned PadTo = 0) const;
+
   /// Emit something like ".long Hi-Lo" where the size in bytes of the directive
   /// is specified by Size and Hi/Lo specify the labels.  This implicitly uses
   /// .set if it is available.
@@ -670,13 +677,6 @@
   // Dwarf Emission Helper Routines
   //===------------------------------------------------------------------===//
 
-  /// Emit the specified signed leb128 value.
-  void emitSLEB128(int64_t Value, const char *Desc = nullptr) const;
-
-  /// Emit the specified unsigned leb128 value.
-  void emitULEB128(uint64_t Value, const char *Desc = nullptr,
-                   unsigned PadTo = 0) const;
-
   /// Emit a .byte 42 directive that corresponds to an encoding.  If verbose
   /// assembly output is enabled, we output comments describing the encoding.
   /// Desc is a string saying what the encoding is specifying (e.g. "LSDA").
Index: llvm/docs/PCSectionsMetadata.rst
===================================================================
--- llvm/docs/PCSectionsMetadata.rst
+++ llvm/docs/PCSectionsMetadata.rst
@@ -59,6 +59,18 @@
 code models, the entry size matches pointer size. For any smaller code model
 the entry size is just 32 bits.
 
+Encoding Options
+----------------
+
+Optional encoding options can be passed in the first ``MDString`` operator:
+``<section>!<options>``. The following options are available:
+
+    * ``C`` -- Compress constant integers of size 2-8 bytes as ULEB128; this
+      includes the function size (but excludes the PC entry).
+
+For example, ``foo!C`` will emit into section ``foo`` with all constants
+encoded as ULEB128.
+
 Guarantees on Code Generation
 =============================
 
Index: compiler-rt/test/metadata/common.h
===================================================================
--- compiler-rt/test/metadata/common.h
+++ compiler-rt/test/metadata/common.h
@@ -25,6 +25,20 @@
   return v;
 }
 
+uint64_t consume_uleb128(const char *&pos, const char *end) {
+  uint64_t val = 0;
+  int shift = 0;
+  uint8_t cur;
+  do {
+    cur = *pos++;
+    val |= uint64_t{cur & 0x7fu} << shift;
+    shift += 7;
+  } while (cur & 0x80);
+  assert(shift < 64);
+  assert(pos <= end);
+  return val;
+}
+
 constexpr uint32_t kSanitizerBinaryMetadataUARHasSize = 1 << 2;
 
 uint32_t meta_version;
@@ -45,13 +59,13 @@
     const auto base = reinterpret_cast<uintptr_t>(pos);
     const intptr_t offset = offset_ptr_sized ? consume<intptr_t>(pos, end)
                                              : consume<int32_t>(pos, end);
-    [[maybe_unused]] const uint32_t size = consume<uint32_t>(pos, end);
-    const uint32_t features = consume<uint8_t>(pos, end);
-    uint32_t stack_args = 0;
+    [[maybe_unused]] const uint64_t size = consume_uleb128(pos, end);
+    const uint64_t features = consume_uleb128(pos, end);
+    uint64_t stack_args = 0;
     if (features & kSanitizerBinaryMetadataUARHasSize)
-      stack_args = consume<uint32_t>(pos, end);
+      stack_args = consume_uleb128(pos, end);
     if (const char *name = symbolize(base + offset))
-      printf("%s: features=%x stack_args=%u\n", name, features, stack_args);
+      printf("%s: features=%lx stack_args=%lu\n", name, features, stack_args);
   }
   meta_version = version;
   meta_start = start;
Index: clang/test/CodeGen/sanitize-metadata.c
===================================================================
--- clang/test/CodeGen/sanitize-metadata.c
+++ clang/test/CodeGen/sanitize-metadata.c
@@ -31,6 +31,6 @@
 // CHECK-LABEL: __sanitizer_metadata_covered.module_dtor
 // CHECK: call void @__sanitizer_metadata_covered_del(i32 2, ptr @__start_sanmd_covered, ptr @__stop_sanmd_covered)
 
-// ATOMICS: ![[ATOMICS_COVERED]] = !{!"sanmd_covered", ![[ATOMICS_COVERED_AUX:[0-9]+]]}
-// ATOMICS: ![[ATOMICS_COVERED_AUX]] = !{i8 1}
-// ATOMICS: ![[ATOMIC_OP]] = !{!"sanmd_atomics"}
+// ATOMICS: ![[ATOMICS_COVERED]] = !{!"sanmd_covered!C", ![[ATOMICS_COVERED_AUX:[0-9]+]]}
+// ATOMICS: ![[ATOMICS_COVERED_AUX]] = !{i64 1}
+// ATOMICS: ![[ATOMIC_OP]] = !{!"sanmd_atomics!C"}
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to