Author: Chuanqi Xu
Date: 2024-05-07T11:41:08+08:00
New Revision: dfa7ff97b24dc5a3dd714b45af288812c13d0110

URL: 
https://github.com/llvm/llvm-project/commit/dfa7ff97b24dc5a3dd714b45af288812c13d0110
DIFF: 
https://github.com/llvm/llvm-project/commit/dfa7ff97b24dc5a3dd714b45af288812c13d0110.diff

LOG: [C++20] [Modules] [Reduced BMI] Combine the signature of used modules
into the current module

Following of https://github.com/llvm/llvm-project/pull/86912. After
https://github.com/llvm/llvm-project/pull/86912, with reduced BMI, the
BMI can keep unchange if the dependent modules only changes the
implementation (without introduing new decls). However, this is not
strictly correct.

For example:

```
// a.cppm
export module a;
export inline int a() { ... }

// b.cppm
export module b;
import a;
export inline int b() { return a(); }
```

Since both `a()` and `b()` are inline, we need to make sure the BMI of
`b.pcm` will change after the implementation of `a()` changes.

We can't get that naturally since we won't record the body of `a()`
during the writing process. We can't reuse ODRHash here since ODRHash
won't calculate the called function recursively. So ODRHash will be
problematic if `a()` calls other inline functions.

Probably we can solve this by a new hash mechanism. But the safety and
efficiency may a problem too. Here we just combine the hash value of the
used modules conservatively.

Added: 
    clang/test/Modules/function-transitive-change.cppm

Modified: 
    clang/include/clang/Serialization/ASTWriter.h
    clang/lib/Serialization/ASTWriter.cpp
    clang/test/Modules/no-transitive-source-location-change.cppm

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Serialization/ASTWriter.h 
b/clang/include/clang/Serialization/ASTWriter.h
index 6847c1db39c8ac..482e9dd168cc3d 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -357,6 +357,13 @@ class ASTWriter : public ASTDeserializationListener,
   /// contexts.
   llvm::DenseMap<const Decl *, unsigned> AnonymousDeclarationNumbers;
 
+  /// The external top level module during the writing process. Used to
+  /// generate signature for the module file being written.
+  ///
+  /// Only meaningful for standard C++ named modules. See the comments in
+  /// createSignatureForNamedModule() for details.
+  llvm::DenseSet<Module *> TouchedTopLevelModules;
+
   /// An update to a Decl.
   class DeclUpdate {
     /// A DeclUpdateKind.

diff  --git a/clang/lib/Serialization/ASTWriter.cpp 
b/clang/lib/Serialization/ASTWriter.cpp
index 8a0116fa893247..42da50abdc687c 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -1200,6 +1200,31 @@ ASTFileSignature 
ASTWriter::createSignatureForNamedModule() const {
   for (auto [ExportImported, _] : WritingModule->Exports)
     Hasher.update(ExportImported->Signature);
 
+  // We combine all the used modules to make sure the signature is precise.
+  // Consider the case like:
+  //
+  // // a.cppm
+  // export module a;
+  // export inline int a() { ... }
+  //
+  // // b.cppm
+  // export module b;
+  // import a;
+  // export inline int b() { return a(); }
+  //
+  // Since both `a()` and `b()` are inline, we need to make sure the BMI of
+  // `b.pcm` will change after the implementation of `a()` changes. We can't
+  // get that naturally since we won't record the body of `a()` during the
+  // writing process. We can't reuse ODRHash here since ODRHash won't calculate
+  // the called function recursively. So ODRHash will be problematic if `a()`
+  // calls other inline functions.
+  //
+  // Probably we can solve this by a new hash mechanism. But the safety and
+  // efficiency may a problem too. Here we just combine the hash value of the
+  // used modules conservatively.
+  for (Module *M : TouchedTopLevelModules)
+    Hasher.update(M->Signature);
+
   return ASTFileSignature::create(Hasher.result());
 }
 
@@ -6112,8 +6137,12 @@ LocalDeclID ASTWriter::GetDeclRef(const Decl *D) {
 
   // If D comes from an AST file, its declaration ID is already known and
   // fixed.
-  if (D->isFromASTFile())
+  if (D->isFromASTFile()) {
+    if (isWritingStdCXXNamedModules() && D->getOwningModule())
+      TouchedTopLevelModules.insert(D->getOwningModule()->getTopLevelModule());
+
     return LocalDeclID(D->getGlobalID());
+  }
 
   assert(!(reinterpret_cast<uintptr_t>(D) & 0x01) && "Invalid decl pointer");
   LocalDeclID &ID = DeclIDs[D];

diff  --git a/clang/test/Modules/function-transitive-change.cppm 
b/clang/test/Modules/function-transitive-change.cppm
new file mode 100644
index 00000000000000..cfce669e3a7bc2
--- /dev/null
+++ b/clang/test/Modules/function-transitive-change.cppm
@@ -0,0 +1,94 @@
+// Test that, in C++20 modules reduced BMI, the implementation detail changes
+// in non-inline function may not propagate while the inline function changes
+// can get propagate.
+//
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: cd %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-reduced-module-interface -o 
%t/a.pcm
+// RUN: %clang_cc1 -std=c++20 %t/a.v1.cppm -emit-reduced-module-interface -o 
%t/a.v1.pcm
+//
+// The BMI of A should 
diff er since the 
diff erent implementation.
+// RUN: not 
diff  %t/a.pcm %t/a.v1.pcm &> /dev/null
+//
+// The BMI of B should change since the dependent inline function changes
+// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-reduced-module-interface 
-fmodule-file=a=%t/a.pcm \
+// RUN:     -o %t/b.pcm
+// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-reduced-module-interface 
-fmodule-file=a=%t/a.v1.pcm \
+// RUN:     -o %t/b.v1.pcm
+// RUN: not 
diff  %t/b.v1.pcm %t/b.pcm  &> /dev/null
+//
+// Test the case with unused partitions.
+// RUN: %clang_cc1 -std=c++20 %t/M-A.cppm -emit-reduced-module-interface -o 
%t/M-A.pcm
+// RUN: %clang_cc1 -std=c++20 %t/M-B.cppm -emit-reduced-module-interface -o 
%t/M-B.pcm
+// RUN: %clang_cc1 -std=c++20 %t/M.cppm -emit-reduced-module-interface -o 
%t/M.pcm \
+// RUN:     -fmodule-file=M:partA=%t/M-A.pcm \
+// RUN:     -fmodule-file=M:partB=%t/M-B.pcm
+// RUN: %clang_cc1 -std=c++20 %t/N.cppm -emit-reduced-module-interface -o 
%t/N.pcm \
+// RUN:     -fmodule-file=M:partA=%t/M-A.pcm \
+// RUN:     -fmodule-file=M:partB=%t/M-B.pcm \
+// RUN:     -fmodule-file=M=%t/M.pcm
+//
+// Now we change `M-A.cppm` to `M-A.v1.cppm`.
+// RUN: %clang_cc1 -std=c++20 %t/M-A.v1.cppm -emit-reduced-module-interface -o 
%t/M-A.v1.pcm
+// RUN: %clang_cc1 -std=c++20 %t/M.cppm -emit-reduced-module-interface -o 
%t/M.v1.pcm \
+// RUN:     -fmodule-file=M:partA=%t/M-A.v1.pcm \
+// RUN:     -fmodule-file=M:partB=%t/M-B.pcm
+// RUN: %clang_cc1 -std=c++20 %t/N.cppm -emit-reduced-module-interface -o 
%t/N.v1.pcm \
+// RUN:     -fmodule-file=M:partA=%t/M-A.v1.pcm \
+// RUN:     -fmodule-file=M:partB=%t/M-B.pcm \
+// RUN:     -fmodule-file=M=%t/M.v1.pcm
+//
+// The BMI of N can keep unchanged since the N didn't use the changed 
partition unit 'M:A'.
+// RUN: 
diff  %t/N.v1.pcm %t/N.pcm  &> /dev/null
+
+//--- a.cppm
+export module a;
+export inline int a() {
+    return 48;
+}
+
+//--- a.v1.cppm
+export module a;
+export inline int a() {
+    return 50;
+}
+
+//--- b.cppm
+export module b;
+import a;
+export inline int b() {
+    return a();
+}
+
+//--- M-A.cppm
+export module M:partA;
+export inline int a() {
+    return 43;
+}
+
+//--- M-A.v1.cppm
+export module M:partA;
+export inline int a() {
+    return 50;
+}
+
+//--- M-B.cppm
+export module M:partB;
+export inline int b() {
+    return 44;
+}
+
+//--- M.cppm
+export module M;
+export import :partA;
+export import :partB;
+
+//--- N.cppm
+export module N;
+import M;
+
+export inline int n() {
+    return b();
+}

diff  --git a/clang/test/Modules/no-transitive-source-location-change.cppm 
b/clang/test/Modules/no-transitive-source-location-change.cppm
index 303142a1af890b..c9d156a74ce822 100644
--- a/clang/test/Modules/no-transitive-source-location-change.cppm
+++ b/clang/test/Modules/no-transitive-source-location-change.cppm
@@ -1,30 +1,6 @@
 // Testing that adding a new line in a module interface unit won't cause the 
BMI
 // of consuming module unit changes.
 //
-// RUN: rm -rf %t
-// RUN: split-file %s %t
-//
-// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm
-// RUN: %clang_cc1 -std=c++20 %t/A.v1.cppm -emit-module-interface -o 
%t/A.v1.pcm
-//
-// The BMI may not be the same since the source location 
diff ers.
-// RUN: not 
diff  %t/A.pcm %t/A.v1.pcm &> /dev/null
-//
-// The BMI of B shouldn't change since all the locations remain the same.
-// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-module-interface 
-fmodule-file=A=%t/A.pcm \
-// RUN:     -o %t/B.pcm
-// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-module-interface 
-fmodule-file=A=%t/A.v1.pcm \
-// RUN:     -o %t/B.v1.pcm
-// RUN: 
diff  %t/B.v1.pcm %t/B.pcm  &> /dev/null
-//
-// The BMI of C may change since the locations for instantiations changes.
-// RUN: %clang_cc1 -std=c++20 %t/C.cppm -emit-module-interface 
-fmodule-file=A=%t/A.pcm \
-// RUN:     -o %t/C.pcm
-// RUN: %clang_cc1 -std=c++20 %t/C.cppm -emit-module-interface 
-fmodule-file=A=%t/A.v1.pcm \
-// RUN:     -o %t/C.v1.pcm
-// RUN: not 
diff  %t/C.v1.pcm %t/C.pcm  &> /dev/null
-//
-// Test again with reduced BMI.
 // RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-reduced-module-interface -o 
%t/A.pcm
 // RUN: %clang_cc1 -std=c++20 %t/A.v1.cppm -emit-reduced-module-interface -o 
%t/A.v1.pcm
 //


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to