https://github.com/ziqingluo-90 updated 
https://github.com/llvm/llvm-project/pull/92031

>From ac5aeb5c3a134d085320fc7fc5cf3f2c8c41a1f1 Mon Sep 17 00:00:00 2001
From: ziqingluo-90 <ziqing_...@apple.com>
Date: Mon, 13 May 2024 13:31:21 -0700
Subject: [PATCH 1/3] fix safe buffer opt-out region serialization

---
 clang/include/clang/Lex/Preprocessor.h        |  22 +++-
 .../include/clang/Serialization/ASTBitCodes.h |   3 +
 clang/lib/Lex/Preprocessor.cpp                | 106 ++++++++++++++----
 clang/lib/Serialization/ASTReader.cpp         |  11 ++
 clang/lib/Serialization/ASTWriter.cpp         |   7 ++
 clang/test/Modules/Inputs/SafeBuffers/base.h  |   9 ++
 .../SafeBuffers/safe_buffers_test.modulemap   |  10 ++
 .../Modules/Inputs/SafeBuffers/test_sub1.h    |  20 ++++
 .../Modules/Inputs/SafeBuffers/test_sub2.h    |  11 ++
 clang/test/Modules/safe_buffers_optout.cpp    |  39 +++++++
 ...unsafe-buffer-usage-pragma-pch-complex.cpp |  72 ++++++++++++
 .../warn-unsafe-buffer-usage-pragma-pch.cpp   |  27 +++++
 12 files changed, 314 insertions(+), 23 deletions(-)
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/base.h
 create mode 100644 
clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub1.h
 create mode 100644 clang/test/Modules/Inputs/SafeBuffers/test_sub2.h
 create mode 100644 clang/test/Modules/safe_buffers_optout.cpp
 create mode 100644 
clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp
 create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp

diff --git a/clang/include/clang/Lex/Preprocessor.h 
b/clang/include/clang/Lex/Preprocessor.h
index e89b4a2c5230e..8d6884ebe7597 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2883,11 +2883,15 @@ class Preprocessor {
   /// otherwise.
   SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the 
start location of an never-closed region.
 
-  // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in one
-  // translation unit. Each region is represented by a pair of start and end
-  // locations.  A region is "open" if its' start and end locations are
+  using SafeBufferOptOutMapTy =
+      SmallVector<std::pair<SourceLocation, SourceLocation>, 16>;
+  // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this
+  // translation unit. Each region is represented by a pair of start and
+  // end locations.  A region is "open" if its' start and end locations are
   // identical.
-  SmallVector<std::pair<SourceLocation, SourceLocation>, 8> 
SafeBufferOptOutMap;
+  SafeBufferOptOutMapTy SafeBufferOptOutMap;
+  // `SafeBufferOptOutMap`s of loaded files:
+  llvm::DenseMap<FileID, SafeBufferOptOutMapTy> LoadedSafeBufferOptOutMap;
 
 public:
   /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out
@@ -2918,6 +2922,16 @@ class Preprocessor {
   ///          opt-out region
   bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc);
 
+  /// \return a sequence of SourceLocations representing ordered opt-out 
regions
+  /// specified by
+  /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit.
+  SmallVector<SourceLocation, 64> serializeSafeBufferOptOutMap() const;
+
+  /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a
+  /// record of code `PP_UNSAFE_BUFFER_USAGE`.
+  void setDeserializedSafeBufferOptOutMap(
+      const SmallVectorImpl<SourceLocation> &SrcLocSeqs);
+
 private:
   /// Helper functions to forward lexing to the actual lexer. They all share 
the
   /// same signature.
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h 
b/clang/include/clang/Serialization/ASTBitCodes.h
index dcfa4ac0c1967..d1a0eba943039 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -775,6 +775,9 @@ enum ASTRecordTypes {
   /// Record code for lexical and visible block for delayed namespace in
   /// reduced BMI.
   DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD = 68,
+
+  /// Record code for \#pragma clang unsafe_buffer_usage begin/end
+  PP_UNSAFE_BUFFER_USAGE = 69,
 };
 
 /// Record types used within a source manager block.
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 0b70192743a39..6a41e3d4138aa 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -58,6 +58,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Capacity.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -1483,26 +1484,41 @@ void Preprocessor::emitFinalMacroWarning(const Token 
&Identifier,
 }
 
 bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr,
-                                           const SourceLocation &Loc) const {
-  // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
-  auto FirstRegionEndingAfterLoc = llvm::partition_point(
-      SafeBufferOptOutMap,
-      [&SourceMgr,
-       &Loc](const std::pair<SourceLocation, SourceLocation> &Region) {
-        return SourceMgr.isBeforeInTranslationUnit(Region.second, Loc);
-      });
+                                      const SourceLocation &Loc) const {
+  // The lambda that tests if a `Loc` is in an opt-out region given one opt-out
+  // region map:
+  auto TestInMap = [&SourceMgr](const SafeBufferOptOutMapTy &Map,
+                                const SourceLocation &Loc) -> bool {
+    // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
+    auto FirstRegionEndingAfterLoc = llvm::partition_point(
+        Map, [&SourceMgr,
+              &Loc](const std::pair<SourceLocation, SourceLocation> &Region) {
+          return SourceMgr.isBeforeInTranslationUnit(Region.second, Loc);
+        });
+
+    if (FirstRegionEndingAfterLoc != Map.end()) {
+      // To test if the start location of the found region precedes `Loc`:
+      return SourceMgr.isBeforeInTranslationUnit(
+          FirstRegionEndingAfterLoc->first, Loc);
+    }
+    // If we do not find a region whose end location passes `Loc`, we want to
+    // check if the current region is still open:
+    if (!Map.empty() && Map.back().first == Map.back().second)
+      return SourceMgr.isBeforeInTranslationUnit(Map.back().first, Loc);
+    return false;
+  };
 
-  if (FirstRegionEndingAfterLoc != SafeBufferOptOutMap.end()) {
-    // To test if the start location of the found region precedes `Loc`:
-    return 
SourceMgr.isBeforeInTranslationUnit(FirstRegionEndingAfterLoc->first,
-                                               Loc);
-  }
-  // If we do not find a region whose end location passes `Loc`, we want to
-  // check if the current region is still open:
-  if (!SafeBufferOptOutMap.empty() &&
-      SafeBufferOptOutMap.back().first == SafeBufferOptOutMap.back().second)
-    return 
SourceMgr.isBeforeInTranslationUnit(SafeBufferOptOutMap.back().first,
-                                               Loc);
+  if (SourceMgr.isLocalSourceLocation(Loc))
+    return TestInMap(SafeBufferOptOutMap, Loc);
+
+  // Expand macros (if any) until it gets to a file location:
+  SourceLocation FLoc = SourceMgr.getFileLoc(Loc);
+  FileID FlocFID = SourceMgr.getDecomposedLoc(FLoc).first;
+  auto LoadedMap = LoadedSafeBufferOptOutMap.find(FlocFID);
+
+  if (LoadedMap != LoadedSafeBufferOptOutMap.end())
+    return TestInMap(LoadedMap->getSecond(), Loc);
+  // FIXME: think out if this is unreachable?
   return false;
 }
 
@@ -1551,6 +1567,58 @@ bool 
Preprocessor::isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc) {
   return InSafeBufferOptOutRegion;
 }
 
+SmallVector<SourceLocation, 64>
+Preprocessor::serializeSafeBufferOptOutMap() const {
+  assert(!InSafeBufferOptOutRegion &&
+         "Attempt to serialize safe buffer opt-out regions before file being "
+         "completely preprocessed");
+
+  SmallVector<SourceLocation, 64> SrcSeq;
+
+  for (const auto &[begin, end] : SafeBufferOptOutMap) {
+    SrcSeq.push_back(begin);
+    SrcSeq.push_back(end);
+  }
+  // Only `SafeBufferOptOutMap` gets serialized. No need to serialize
+  // `LoadedSafeBufferOptOutMap` because if this TU loads a pch/module, every
+  // pch/module in the pch-chain/module-DAG will be loaded one by one in order.
+  // It means that for each loading pch/module m, it just needs to load m's own
+  // `SafeBufferOptOutMap`.
+  return SrcSeq;
+}
+
+void Preprocessor::setDeserializedSafeBufferOptOutMap(
+    const SmallVectorImpl<SourceLocation> &SourceLocations) {
+  auto It = SourceLocations.begin();
+
+  assert(SourceLocations.size() % 2 == 0 &&
+         "ill-formed SourceLocation sequence");
+  while (It != SourceLocations.end()) {
+    SourceLocation begin = *It++;
+    SourceLocation end = *It++;
+    SourceLocation FileLoc = SourceMgr.getFileLoc(begin);
+    FileID FID = SourceMgr.getDecomposedLoc(FileLoc).first;
+
+    if (FID.isInvalid()) {
+      // I suppose this should not happen:
+      assert(false && "Attempted to read a safe buffer opt-out region whose "
+                      "begin location is associated to an invalid File ID.");
+      break;
+    }
+    assert(!SourceMgr.isLocalFileID(FID) && "Expected a pch/module file");
+    // Here we assume that
+    // `SourceMgr.getFileLoc(begin) == SourceMgr.getFileLoc(end)`.
+    // Though it may not hold in very rare and strange cases, i.e., a pair of
+    // opt-out pragams written in different files but are always used in a way
+    // that they match in a TU (otherwise PP will complaint).
+    // FIXME: PP should complaint about cross file safe buffer opt-out pragmas.
+    assert(FID == SourceMgr.getDecomposedLoc(SourceMgr.getFileLoc(end)).first 
&&
+           "Maybe a safe buffer opt-out region starts and ends at different "
+           "files.");
+    LoadedSafeBufferOptOutMap[FID].emplace_back(begin, end);
+  }
+}
+
 ModuleLoader::~ModuleLoader() = default;
 
 CommentHandler::~CommentHandler() = default;
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 43b69045bb054..4e22b3e1187c0 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -3586,6 +3586,17 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
       break;
     }
 
+    case PP_UNSAFE_BUFFER_USAGE: {
+      if (!Record.empty()) {
+        SmallVector<SourceLocation, 64> SrcLocs;
+        unsigned Idx = 0;
+        while (Idx < Record.size())
+          SrcLocs.push_back(ReadSourceLocation(F, Record, Idx));
+        PP.setDeserializedSafeBufferOptOutMap(SrcLocs);
+      }
+      break;
+    }
+
     case PP_CONDITIONAL_STACK:
       if (!Record.empty()) {
         unsigned Idx = 0, End = Record.size() - 1;
diff --git a/clang/lib/Serialization/ASTWriter.cpp 
b/clang/lib/Serialization/ASTWriter.cpp
index 30195868ca996..3203173640d18 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -911,6 +911,7 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(PP_CONDITIONAL_STACK);
   RECORD(DECLS_TO_CHECK_FOR_DEFERRED_DIAGS);
   RECORD(PP_ASSUME_NONNULL_LOC);
+  RECORD(PP_UNSAFE_BUFFER_USAGE);
 
   // SourceManager Block.
   BLOCK(SOURCE_MANAGER_BLOCK);
@@ -2439,6 +2440,12 @@ void ASTWriter::WritePreprocessor(const Preprocessor 
&PP, bool IsModule) {
     Record.clear();
   }
 
+  // Write the safe buffer opt-out region map in PP
+  for (SourceLocation &S : PP.serializeSafeBufferOptOutMap())
+    AddSourceLocation(std::move(S), Record);
+  Stream.EmitRecord(PP_UNSAFE_BUFFER_USAGE, Record);
+  Record.clear();
+
   // Enter the preprocessor block.
   Stream.EnterSubblock(PREPROCESSOR_BLOCK_ID, 3);
 
diff --git a/clang/test/Modules/Inputs/SafeBuffers/base.h 
b/clang/test/Modules/Inputs/SafeBuffers/base.h
new file mode 100644
index 0000000000000..660df17998a48
--- /dev/null
+++ b/clang/test/Modules/Inputs/SafeBuffers/base.h
@@ -0,0 +1,9 @@
+#ifdef __cplusplus
+int base(int *p) {
+  int x = p[5];
+#pragma clang unsafe_buffer_usage begin
+  int y = p[5];
+#pragma clang unsafe_buffer_usage end
+  return x + y;
+}
+#endif
diff --git a/clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap 
b/clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap
new file mode 100644
index 0000000000000..a4826c6514b98
--- /dev/null
+++ b/clang/test/Modules/Inputs/SafeBuffers/safe_buffers_test.modulemap
@@ -0,0 +1,10 @@
+module safe_buffers_test_base {
+  header "base.h"
+}
+
+module safe_buffers_test_optout {
+  explicit module test_sub1 {  header "test_sub1.h" }
+  explicit module test_sub2 {  textual header "test_sub2.h" }
+  use safe_buffers_test_base
+}
+
diff --git a/clang/test/Modules/Inputs/SafeBuffers/test_sub1.h 
b/clang/test/Modules/Inputs/SafeBuffers/test_sub1.h
new file mode 100644
index 0000000000000..c2b105c6e748a
--- /dev/null
+++ b/clang/test/Modules/Inputs/SafeBuffers/test_sub1.h
@@ -0,0 +1,20 @@
+#include "base.h"
+
+#ifdef __cplusplus
+int sub1(int *p) {
+  int x = p[5];
+#pragma clang unsafe_buffer_usage begin  
+  int y = p[5];
+#pragma clang unsafe_buffer_usage end
+  return x + y;
+}
+
+template <typename T>
+T sub1_T(T *p) {
+  T x = p[5];
+#pragma clang unsafe_buffer_usage begin  
+  T y = p[5];
+#pragma clang unsafe_buffer_usage end
+  return x + y;
+}
+#endif
diff --git a/clang/test/Modules/Inputs/SafeBuffers/test_sub2.h 
b/clang/test/Modules/Inputs/SafeBuffers/test_sub2.h
new file mode 100644
index 0000000000000..11eb9591ab600
--- /dev/null
+++ b/clang/test/Modules/Inputs/SafeBuffers/test_sub2.h
@@ -0,0 +1,11 @@
+#include "base.h"
+
+#ifdef __cplusplus
+int sub2(int *p) {  
+  int x = p[5];
+#pragma clang unsafe_buffer_usage begin
+  int y = p[5];
+#pragma clang unsafe_buffer_usage end
+  return x + y;
+}
+#endif
diff --git a/clang/test/Modules/safe_buffers_optout.cpp 
b/clang/test/Modules/safe_buffers_optout.cpp
new file mode 100644
index 0000000000000..dd8b1cca5ec89
--- /dev/null
+++ b/clang/test/Modules/safe_buffers_optout.cpp
@@ -0,0 +1,39 @@
+// RUN: rm -rf %t
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module 
-fmodule-name=safe_buffers_test_base -x c++ 
%S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20\
+// RUN:     -o %t/safe_buffers_test_base.pcm -Wunsafe-buffer-usage
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module 
-fmodule-name=safe_buffers_test_optout -x c++ 
%S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20\
+// RUN:     -o %t/safe_buffers_test_optout.pcm 
-fmodule-file=%t/safe_buffers_test_base.pcm -Wunsafe-buffer-usage
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify 
-fmodule-file=%t/safe_buffers_test_optout.pcm -I %S/Inputs/SafeBuffers %s\
+// RUN:     -std=c++20 -Wunsafe-buffer-usage
+
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module 
-fmodules-cache-path=%t -fmodule-name=safe_buffers_test_base -x c++\
+// RUN:     %S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20 
-Wunsafe-buffer-usage
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module 
-fmodules-cache-path=%t -fmodule-name=safe_buffers_test_optout -x c++\
+// RUN:     %S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20 
-Wunsafe-buffer-usage
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify 
-fmodules-cache-path=%t -I %S/Inputs/SafeBuffers %s -x c++\
+// RUN:     -std=c++20 -Wunsafe-buffer-usage
+
+#include "test_sub1.h"
+#include "test_sub2.h"
+
+// Testing safe buffers opt-out region serialization with modules: this
+// file loads 2 submodules from top-level module
+// `safe_buffers_test_optout`, which uses another top-level module
+// `safe_buffers_test_base`. (So the module dependencies form a DAG.)
+
+// expected-warning@base.h:3{{unsafe buffer access}}
+// expected-note@base.h:3{{pass -fsafe-buffer-usage-suggestions to receive 
code hardening suggestions}}
+// expected-warning@test_sub1.h:5{{unsafe buffer access}}
+// expected-note@test_sub1.h:5{{pass -fsafe-buffer-usage-suggestions to 
receive code hardening suggestions}}
+// expected-warning@test_sub1.h:14{{unsafe buffer access}}
+// expected-note@test_sub1.h:14{{pass -fsafe-buffer-usage-suggestions to 
receive code hardening suggestions}}
+// expected-warning@test_sub2.h:5{{unsafe buffer access}}
+// expected-note@test_sub2.h:5{{pass -fsafe-buffer-usage-suggestions to 
receive code hardening suggestions}}
+int foo(int * p) {
+  int x = p[5]; // expected-warning{{unsafe buffer access}} 
expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening 
suggestions}}
+#pragma clang unsafe_buffer_usage begin
+  int y = p[5];
+#pragma clang unsafe_buffer_usage end
+  sub1_T(p); // instantiate template
+  return base(p) + sub1(p) + sub2(p);
+}
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp 
b/clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp
new file mode 100644
index 0000000000000..04ac77b3f342e
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch-complex.cpp
@@ -0,0 +1,72 @@
+// A more complex example than warn-unsafe-buffer-usage-pragma-pch.cpp:
+//   MAIN - includes INC_H_1 
+//        \ loads    PCH_H_1 - includes INC_H_2
+//                           \ loads    PCH_H_2   
+
+// Test with PCH
+// RUN: %clang_cc1 -Wno-unused-value -std=c++20 -emit-pch -o %t-1 -DPCH_H_2 %s
+// RUN: %clang_cc1 -Wno-unused-value -std=c++20 -include-pch %t-1 -emit-pch -o 
%t-2 -DPCH_H_1 %s
+// RUN: %clang_cc1 -Wno-unused-value -Wunsafe-buffer-usage -std=c++20 
-include-pch %t-2 -DMAIN -verify %s
+
+#define UNSAFE_BEGIN _Pragma("clang unsafe_buffer_usage begin")
+#define UNSAFE_END   _Pragma("clang unsafe_buffer_usage end")
+
+
+#ifdef INC_H_1
+#undef INC_H_1
+int a(int *s) {
+  s[2];  // <- expected warning here
+#pragma clang unsafe_buffer_usage begin
+  return s[1];
+#pragma clang unsafe_buffer_usage end
+}
+#endif
+
+#ifdef INC_H_2
+#undef INC_H_2
+int b(int *s) {
+  s[2];  // <- expected warning here  
+#pragma clang unsafe_buffer_usage begin
+  return s[1];
+#pragma clang unsafe_buffer_usage end
+}
+#endif
+
+#ifdef PCH_H_1
+#undef PCH_H_1
+#define INC_H_2
+#include "warn-unsafe-buffer-usage-pragma-pch-complex.cpp"
+
+int c(int *s) {
+  s[2];  // <- expected warning here  
+UNSAFE_BEGIN
+  return s[1];
+UNSAFE_END
+}
+#endif
+
+#ifdef PCH_H_2
+#undef PCH_H_2
+int d(int *s) {
+  s[2];  // <- expected warning here  
+#pragma clang unsafe_buffer_usage begin
+  return s[1];
+#pragma clang unsafe_buffer_usage end
+}
+#endif
+
+#ifdef MAIN
+#undef MAIN
+#define INC_H_1
+#include "warn-unsafe-buffer-usage-pragma-pch-complex.cpp"
+
+// expected-warning@-45{{unsafe buffer access}} expected-note@-45{{pass 
-fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+// expected-warning@-36{{unsafe buffer access}} expected-note@-36{{pass 
-fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+// expected-warning@-24{{unsafe buffer access}} expected-note@-24{{pass 
-fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+// expected-warning@-15{{unsafe buffer access}} expected-note@-15{{pass 
-fsafe-buffer-usage-suggestions to receive code hardening suggestions}}
+int main() {
+  int s[] = {1, 2, 3};
+  return a(s) + b(s) + c(s) + d(s);
+}
+#undef MAIN
+#endif
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp 
b/clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp
new file mode 100644
index 0000000000000..abd3f0ffe9565
--- /dev/null
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp
@@ -0,0 +1,27 @@
+// The original example from https://github.com/llvm/llvm-project/issues/90501
+
+// Test without PCH
+// RUN: %clang_cc1 -Wno-unused-value -Wunsafe-buffer-usage -std=c++20 -include 
%s -verify %s
+// Test with PCH
+// RUN: %clang_cc1 -Wno-unused-value -std=c++20 -emit-pch -o %t %s
+// RUN: %clang_cc1 -Wno-unused-value -Wunsafe-buffer-usage -std=c++20 
-include-pch %t -verify %s
+
+#ifndef A_H
+#define A_H
+
+int a(int *s) {
+  s[2];  // <- expected warning here
+#pragma clang unsafe_buffer_usage begin
+  return s[1];
+#pragma clang unsafe_buffer_usage end
+}
+
+#else
+// expected-warning@-7{{unsafe buffer access}}
+// expected-note@-8{{pass -fsafe-buffer-usage-suggestions to receive code 
hardening suggestions}}
+int main() {
+  int s[] = {1, 2, 3};
+  return a(s);
+}
+
+#endif

>From 23050bcd41a6ebf7dce02be233e0a2d687f3f98d Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziq...@udel.edu>
Date: Wed, 15 May 2024 14:31:37 -0700
Subject: [PATCH 2/3] fix the test command for lazily loading modules

---
 clang/test/Modules/safe_buffers_optout.cpp | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/clang/test/Modules/safe_buffers_optout.cpp 
b/clang/test/Modules/safe_buffers_optout.cpp
index dd8b1cca5ec89..09f42f7683e2b 100644
--- a/clang/test/Modules/safe_buffers_optout.cpp
+++ b/clang/test/Modules/safe_buffers_optout.cpp
@@ -6,12 +6,8 @@
 // RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify 
-fmodule-file=%t/safe_buffers_test_optout.pcm -I %S/Inputs/SafeBuffers %s\
 // RUN:     -std=c++20 -Wunsafe-buffer-usage
 
-// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module 
-fmodules-cache-path=%t -fmodule-name=safe_buffers_test_base -x c++\
-// RUN:     %S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20 
-Wunsafe-buffer-usage
-// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module 
-fmodules-cache-path=%t -fmodule-name=safe_buffers_test_optout -x c++\
-// RUN:     %S/Inputs/SafeBuffers/safe_buffers_test.modulemap -std=c++20 
-Wunsafe-buffer-usage
-// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify 
-fmodules-cache-path=%t -I %S/Inputs/SafeBuffers %s -x c++\
-// RUN:     -std=c++20 -Wunsafe-buffer-usage
+// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify 
-fmodules-cache-path=%t 
-fmodule-map-file=%S/Inputs/SafeBuffers/safe_buffers_test.modulemap -I 
%S/Inputs/SafeBuffers %s\
+// RUN:     -x c++ -std=c++20 -Wunsafe-buffer-usage
 
 #include "test_sub1.h"
 #include "test_sub2.h"

>From 64b1f95cc535f284e993c395d6d258ac8d83cfe4 Mon Sep 17 00:00:00 2001
From: Ziqing Luo <ziq...@udel.edu>
Date: Thu, 16 May 2024 15:37:11 -0700
Subject: [PATCH 3/3] De-serialize an unwelcomed opt-out region is an error

Throw an error if clang deserializes a safe buffer opt-out region that
begins and ends at different files.
---
 clang/include/clang/Lex/Preprocessor.h        |  5 +++-
 clang/lib/Lex/Preprocessor.cpp                | 27 ++++++++++---------
 clang/lib/Serialization/ASTReader.cpp         |  4 ++-
 .../unsafe-buffer-usage-pragma-pch-bad.cpp    | 25 +++++++++++++++++
 4 files changed, 47 insertions(+), 14 deletions(-)
 create mode 100644 clang/test/PCH/unsafe-buffer-usage-pragma-pch-bad.cpp

diff --git a/clang/include/clang/Lex/Preprocessor.h 
b/clang/include/clang/Lex/Preprocessor.h
index 8d6884ebe7597..e49a7c5847684 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -2929,7 +2929,10 @@ class Preprocessor {
 
   /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a
   /// record of code `PP_UNSAFE_BUFFER_USAGE`.
-  void setDeserializedSafeBufferOptOutMap(
+  /// \return `std::nullopt` iff deserialization succeeds and this PP has been
+  /// properly set; Otherwise, a StringRef carrying the message of why
+  /// serialization fails with this PP being untouched.
+  std::optional<StringRef> setDeserializedSafeBufferOptOutMap(
       const SmallVectorImpl<SourceLocation> &SrcLocSeqs);
 
 private:
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 6a41e3d4138aa..42e314e25854a 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -1587,9 +1587,11 @@ Preprocessor::serializeSafeBufferOptOutMap() const {
   return SrcSeq;
 }
 
-void Preprocessor::setDeserializedSafeBufferOptOutMap(
+std::optional<StringRef> Preprocessor::setDeserializedSafeBufferOptOutMap(
     const SmallVectorImpl<SourceLocation> &SourceLocations) {
   auto It = SourceLocations.begin();
+  decltype(LoadedSafeBufferOptOutMap)
+      TmpMap; // temporarily hold data for `LoadedSafeBufferOptOutMap`
 
   assert(SourceLocations.size() % 2 == 0 &&
          "ill-formed SourceLocation sequence");
@@ -1599,24 +1601,25 @@ void Preprocessor::setDeserializedSafeBufferOptOutMap(
     SourceLocation FileLoc = SourceMgr.getFileLoc(begin);
     FileID FID = SourceMgr.getDecomposedLoc(FileLoc).first;
 
-    if (FID.isInvalid()) {
-      // I suppose this should not happen:
-      assert(false && "Attempted to read a safe buffer opt-out region whose "
-                      "begin location is associated to an invalid File ID.");
-      break;
-    }
+    assert(!FID.isInvalid() &&
+           "Attempted to read a safe buffer opt-out region whose "
+           "begin location is associated to an invalid File ID.");
     assert(!SourceMgr.isLocalFileID(FID) && "Expected a pch/module file");
-    // Here we assume that
+    // Here we require that
     // `SourceMgr.getFileLoc(begin) == SourceMgr.getFileLoc(end)`.
     // Though it may not hold in very rare and strange cases, i.e., a pair of
     // opt-out pragams written in different files but are always used in a way
     // that they match in a TU (otherwise PP will complaint).
     // FIXME: PP should complaint about cross file safe buffer opt-out pragmas.
-    assert(FID == SourceMgr.getDecomposedLoc(SourceMgr.getFileLoc(end)).first 
&&
-           "Maybe a safe buffer opt-out region starts and ends at different "
-           "files.");
-    LoadedSafeBufferOptOutMap[FID].emplace_back(begin, end);
+    if (FID != SourceMgr.getDecomposedLoc(SourceMgr.getFileLoc(end)).first) {
+      return "Cannot de-serialize a safe buffer opt-out region that begins and 
"
+             "ends at different files";
+    }
+    TmpMap[FID].emplace_back(begin, end);
   }
+  for (auto [K, V] : TmpMap)
+    LoadedSafeBufferOptOutMap[K].append(V);
+  return std::nullopt;
 }
 
 ModuleLoader::~ModuleLoader() = default;
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 4e22b3e1187c0..f1c3fe0fe456a 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -3592,7 +3592,9 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
         unsigned Idx = 0;
         while (Idx < Record.size())
           SrcLocs.push_back(ReadSourceLocation(F, Record, Idx));
-        PP.setDeserializedSafeBufferOptOutMap(SrcLocs);
+        if (auto ErrMsg = PP.setDeserializedSafeBufferOptOutMap(SrcLocs))
+          return llvm::createStringError(std::errc::invalid_argument,
+                                         ErrMsg->data());
       }
       break;
     }
diff --git a/clang/test/PCH/unsafe-buffer-usage-pragma-pch-bad.cpp 
b/clang/test/PCH/unsafe-buffer-usage-pragma-pch-bad.cpp
new file mode 100644
index 0000000000000..7713fb47cc55f
--- /dev/null
+++ b/clang/test/PCH/unsafe-buffer-usage-pragma-pch-bad.cpp
@@ -0,0 +1,25 @@
+// Test that a serialized safe buffer opt-out region begins and ends at 
different files.
+// RUN: %clang_cc1 -Wno-unused-value -std=c++20 -emit-pch -o %t %s -include %s
+// RUN: not %clang_cc1 -Wno-unused-value -Wunsafe-buffer-usage -std=c++20 
-include-pch %t %s 2>&1 | FileCheck %s
+
+// CHECK: fatal error:{{.*}}'Cannot de-serialize a safe buffer opt-out region 
that begins and ends at different files'
+#ifndef A_H
+#define A_H
+
+int a() {
+#pragma clang unsafe_buffer_usage begin
+  return 0;
+}
+
+#elif (defined(A_H) && !defined(B_H))
+#define B_H
+
+#pragma clang unsafe_buffer_usage end
+
+#else
+
+int main() {
+  return a();
+}
+
+#endif

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to