Author: Chuanqi Xu
Date: 2024-06-21T17:50:30+08:00
New Revision: d4d95ee65159db1ea1a8c4159cfdaf8b81097897

URL: 
https://github.com/llvm/llvm-project/commit/d4d95ee65159db1ea1a8c4159cfdaf8b81097897
DIFF: 
https://github.com/llvm/llvm-project/commit/d4d95ee65159db1ea1a8c4159cfdaf8b81097897.diff

LOG: [Serialization] Register identifiers in ahead and don't emit predefined 
decls

See the added test for the motivation example. In that example, we add a
new function declaration in `a.cppm` and this is not used in the reduced
BMI of `b.cppm`. We expect that the change won't affect the BMI of
`b.cppm`. But it is the not the case.

There are 2 reason for unexpected result:
1. We would register the interesting identifiers in a pretty late phase.
   This may cause some some predefined identifier ID change due to we
   insert other identifiers during emitting decls and types.
2. In `GenerateNameLookup`, we would generate information for predefined
   decls. This may not be intended. Since every predefined decl doesn't
   belong to any module.

And this patch solves the first issue by registering the identifiers in
the very early posititon to make sure the ID won't get affected by the
process to emit decls and types. And we solve the second question by
filtering predefined decls simply.

Added: 
    clang/test/Modules/no-transitive-identifier-change-2.cppm

Modified: 
    clang/include/clang/Serialization/ASTWriter.h
    clang/lib/Serialization/ASTWriter.cpp
    clang/test/Modules/decl-params-determinisim.m

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Serialization/ASTWriter.h 
b/clang/include/clang/Serialization/ASTWriter.h
index ce79c75dc2911..71a7c28047e31 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -228,6 +228,11 @@ class ASTWriter : public ASTDeserializationListener,
   /// unit, while 0 is reserved for NULL.
   llvm::DenseMap<const Decl *, LocalDeclID> DeclIDs;
 
+  /// Set of predefined decls. This is a helper data to determine if a decl
+  /// is predefined. It should be more clear and safer to query the set
+  /// instead of comparing the result of `getDeclID()` or `GetDeclRef()`.
+  llvm::SmallPtrSet<const Decl *, 32> PredefinedDecls;
+
   /// Offset of each declaration in the bitstream, indexed by
   /// the declaration's ID.
   std::vector<serialization::DeclOffset> DeclOffsets;
@@ -563,8 +568,6 @@ class ASTWriter : public ASTDeserializationListener,
   void WriteType(QualType T);
 
   bool isLookupResultExternal(StoredDeclsList &Result, DeclContext *DC);
-  bool isLookupResultEntirelyExternalOrUnreachable(StoredDeclsList &Result,
-                                                   DeclContext *DC);
 
   void GenerateNameLookupTable(const DeclContext *DC,
                                llvm::SmallVectorImpl<char> &LookupTable);
@@ -844,6 +847,8 @@ class ASTWriter : public ASTDeserializationListener,
   bool hasChain() const { return Chain; }
   ASTReader *getChain() const { return Chain; }
 
+  bool isWritingModule() const { return WritingModule; }
+
   bool isWritingStdCXXNamedModules() const {
     return WritingModule && WritingModule->isNamedModule();
   }
@@ -852,6 +857,10 @@ class ASTWriter : public ASTDeserializationListener,
 
   bool getDoneWritingDeclsAndTypes() const { return DoneWritingDeclsAndTypes; }
 
+  bool isDeclPredefined(const Decl *D) const {
+    return PredefinedDecls.count(D);
+  }
+
   void handleVTable(CXXRecordDecl *RD);
 
 private:

diff  --git a/clang/lib/Serialization/ASTWriter.cpp 
b/clang/lib/Serialization/ASTWriter.cpp
index e00bacfd940b9..e6a58dcc61e3f 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -3728,6 +3728,29 @@ static NamedDecl *getDeclForLocalLookup(const 
LangOptions &LangOpts,
 
 namespace {
 
+bool IsInterestingIdentifier(const IdentifierInfo *II, uint64_t MacroOffset,
+                             bool IsModule, bool IsCPlusPlus) {
+  bool NeedDecls = !IsModule || !IsCPlusPlus;
+
+  bool IsInteresting =
+      II->getNotableIdentifierID() != tok::NotableIdentifierKind::not_notable 
||
+      II->getBuiltinID() != Builtin::ID::NotBuiltin ||
+      II->getObjCKeywordID() != tok::ObjCKeywordKind::objc_not_keyword;
+  if (MacroOffset || II->isPoisoned() || (!IsModule && IsInteresting) ||
+      II->hasRevertedTokenIDToIdentifier() ||
+      (NeedDecls && II->getFETokenInfo()))
+    return true;
+
+  return false;
+}
+
+bool IsInterestingNonMacroIdentifier(const IdentifierInfo *II,
+                                     ASTWriter &Writer) {
+  bool IsModule = Writer.isWritingModule();
+  bool IsCPlusPlus = Writer.getLangOpts().CPlusPlus;
+  return IsInterestingIdentifier(II, /*MacroOffset=*/0, IsModule, IsCPlusPlus);
+}
+
 class ASTIdentifierTableTrait {
   ASTWriter &Writer;
   Preprocessor &PP;
@@ -3741,17 +3764,8 @@ class ASTIdentifierTableTrait {
   /// doesn't check whether the name has macros defined; use 
PublicMacroIterator
   /// to check that.
   bool isInterestingIdentifier(const IdentifierInfo *II, uint64_t MacroOffset) 
{
-    bool IsInteresting =
-        II->getNotableIdentifierID() !=
-            tok::NotableIdentifierKind::not_notable ||
-        II->getBuiltinID() != Builtin::ID::NotBuiltin ||
-        II->getObjCKeywordID() != tok::ObjCKeywordKind::objc_not_keyword;
-    if (MacroOffset || II->isPoisoned() || (!IsModule && IsInteresting) ||
-        II->hasRevertedTokenIDToIdentifier() ||
-        (NeedDecls && II->getFETokenInfo()))
-      return true;
-
-    return false;
+    return IsInterestingIdentifier(II, MacroOffset, IsModule,
+                                   Writer.getLangOpts().CPlusPlus);
   }
 
 public:
@@ -3782,10 +3796,6 @@ class ASTIdentifierTableTrait {
     return isInterestingIdentifier(II, MacroOffset);
   }
 
-  bool isInterestingNonMacroIdentifier(const IdentifierInfo *II) {
-    return isInterestingIdentifier(II, 0);
-  }
-
   std::pair<unsigned, unsigned>
   EmitKeyDataLength(raw_ostream &Out, const IdentifierInfo *II, IdentifierID 
ID) {
     // Record the location of the identifier data. This is used when generating
@@ -3887,21 +3897,6 @@ void ASTWriter::WriteIdentifierTable(Preprocessor &PP,
     ASTIdentifierTableTrait Trait(*this, PP, IdResolver, IsModule,
                                   IsModule ? &InterestingIdents : nullptr);
 
-    // Look for any identifiers that were named while processing the
-    // headers, but are otherwise not needed. We add these to the hash
-    // table to enable checking of the predefines buffer in the case
-    // where the user adds new macro definitions when building the AST
-    // file.
-    SmallVector<const IdentifierInfo *, 128> IIs;
-    for (const auto &ID : PP.getIdentifierTable())
-      if (Trait.isInterestingNonMacroIdentifier(ID.second))
-        IIs.push_back(ID.second);
-    // Sort the identifiers lexicographically before getting the references so
-    // that their order is stable.
-    llvm::sort(IIs, llvm::deref<std::less<>>());
-    for (const IdentifierInfo *II : IIs)
-      getIdentifierRef(II);
-
     // Create the on-disk hash table representation. We only store offsets
     // for identifiers that appear here for the first time.
     IdentifierOffsets.resize(NextIdentID - FirstIdentID);
@@ -4125,16 +4120,24 @@ bool ASTWriter::isLookupResultExternal(StoredDeclsList 
&Result,
          DC->hasNeedToReconcileExternalVisibleStorage();
 }
 
-bool ASTWriter::isLookupResultEntirelyExternalOrUnreachable(
-    StoredDeclsList &Result, DeclContext *DC) {
+/// Returns ture if all of the lookup result are either external, not emitted 
or
+/// predefined. In such cases, the lookup result is not interesting and we 
don't
+/// need to record the result in the current being written module. Return false
+/// otherwise.
+static bool isLookupResultNotInteresting(ASTWriter &Writer,
+                                         StoredDeclsList &Result) {
   for (auto *D : Result.getLookupResult()) {
-    auto *LocalD = getDeclForLocalLookup(getLangOpts(), D);
+    auto *LocalD = getDeclForLocalLookup(Writer.getLangOpts(), D);
     if (LocalD->isFromASTFile())
       continue;
 
     // We can only be sure whether the local declaration is reachable
     // after we done writing the declarations and types.
-    if (DoneWritingDeclsAndTypes && !wasDeclEmitted(LocalD))
+    if (Writer.getDoneWritingDeclsAndTypes() && !Writer.wasDeclEmitted(LocalD))
+      continue;
+
+    // We don't need to emit the predefined decls.
+    if (Writer.isDeclPredefined(LocalD))
       continue;
 
     return false;
@@ -4182,11 +4185,11 @@ ASTWriter::GenerateNameLookupTable(const DeclContext 
*ConstDC,
     // that entirely external or unreachable.
     //
     // FIMXE: It looks sufficient to test
-    // isLookupResultEntirelyExternalOrUnreachable here. But due to bug we have
+    // isLookupResultNotInteresting here. But due to bug we have
     // to test isLookupResultExternal here. See
     // https://github.com/llvm/llvm-project/issues/61065 for details.
     if ((GeneratingReducedBMI || isLookupResultExternal(Result, DC)) &&
-        isLookupResultEntirelyExternalOrUnreachable(Result, DC))
+        isLookupResultNotInteresting(*this, Result))
       continue;
 
     // We also skip empty results. If any of the results could be external and
@@ -5007,6 +5010,7 @@ void ASTWriter::PrepareWritingSpecialDecls(Sema &SemaRef) 
{
     if (D) {
       assert(D->isCanonicalDecl() && "predefined decl is not canonical");
       DeclIDs[D] = ID;
+      PredefinedDecls.insert(D);
     }
   };
   RegisterPredefDecl(Context.getTranslationUnitDecl(),
@@ -5355,6 +5359,24 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, 
StringRef isysroot,
 
   writeUnhashedControlBlock(PP, Context);
 
+  // Look for any identifiers that were named while processing the
+  // headers, but are otherwise not needed. We add these to the hash
+  // table to enable checking of the predefines buffer in the case
+  // where the user adds new macro definitions when building the AST
+  // file.
+  //
+  // We do this before emitting any Decl and Types to make sure the
+  // Identifier ID is stable.
+  SmallVector<const IdentifierInfo *, 128> IIs;
+  for (const auto &ID : PP.getIdentifierTable())
+    if (IsInterestingNonMacroIdentifier(ID.second, *this))
+      IIs.push_back(ID.second);
+  // Sort the identifiers lexicographically before getting the references so
+  // that their order is stable.
+  llvm::sort(IIs, llvm::deref<std::less<>>());
+  for (const IdentifierInfo *II : IIs)
+    getIdentifierRef(II);
+
   // Write the set of weak, undeclared identifiers. We always write the
   // entire table, since later PCH files in a PCH chain are only interested in
   // the results at the end of the chain.

diff  --git a/clang/test/Modules/decl-params-determinisim.m 
b/clang/test/Modules/decl-params-determinisim.m
index 9cf37ac4334cf..db4ed33265388 100644
--- a/clang/test/Modules/decl-params-determinisim.m
+++ b/clang/test/Modules/decl-params-determinisim.m
@@ -28,23 +28,23 @@
 
 // CHECK: <TYPE_FUNCTION_PROTO
 // CHECK-NEXT: <DECL_PARM_VAR
-// CHECK-SAME: op5=2
+// CHECK-SAME: op5=13
 // CHECK-NEXT: <DECL_PARM_VAR
-// CHECK-SAME: op5=3
+// CHECK-SAME: op5=14
 // CHECK-NEXT: <DECL_PARM_VAR
-// CHECK-SAME: op5=4
+// CHECK-SAME: op5=15
 // CHECK-NEXT: <DECL_PARM_VAR
-// CHECK-SAME: op5=5
+// CHECK-SAME: op5=16
 
 /// Decl records start at 43
 // CHECK: <DECL_RECORD
-// CHECK-SAME: op5=43
+// CHECK-SAME: op5=54
 // CHECK-NEXT: <DECL_RECORD
-// CHECK-SAME: op5=44
+// CHECK-SAME: op5=55
 // CHECK-NEXT: <DECL_RECORD
-// CHECK-SAME: op5=45
+// CHECK-SAME: op5=56
 // CHECK-NEXT: <DECL_RECORD
-// CHECK-SAME: op5=46
+// CHECK-SAME: op5=57
 
 //--- headers/a.h
 void f(struct A0 *a0,

diff  --git a/clang/test/Modules/no-transitive-identifier-change-2.cppm 
b/clang/test/Modules/no-transitive-identifier-change-2.cppm
new file mode 100644
index 0000000000000..50a54c69cf152
--- /dev/null
+++ b/clang/test/Modules/no-transitive-identifier-change-2.cppm
@@ -0,0 +1,28 @@
+// Test that adding a new identifier within reduced BMI may not produce a 
transitive change.
+//
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/A.cppm -o 
%t/A.pcm
+// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/B.cppm -o 
%t/B.pcm \
+// RUN:     -fmodule-file=A=%t/A.pcm
+//
+// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/A.v1.cppm -o 
%t/A.v1.pcm
+// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %t/B.cppm -o 
%t/B.v1.pcm \
+// RUN:     -fmodule-file=A=%t/A.v1.pcm
+//
+// RUN: 
diff  %t/B.pcm %t/B.v1.pcm &> /dev/null
+
+//--- A.cppm
+export module A;
+export int a();
+
+//--- A.v1.cppm
+export module A;
+export int a();
+export int a2();
+
+//--- B.cppm
+export module B;
+import A;
+export int b() { return a(); }


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to