ChuanqiXu created this revision.
ChuanqiXu added reviewers: ilya-biryukov, rsmith, dblaikie, iains, erichkeane, 
tahonermann.
ChuanqiXu added a project: clang-modules.
Herald added a project: All.
ChuanqiXu requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Close https://github.com/llvm/llvm-project/issues/57222

The previous deserialization code wouldn't try to merge lambdas since every 
lambda has a unique type before C++20. In C++20 and later, lambdas can have the 
same type under certain condition. See the issue link or the comment for 
example.

Due to we don't care about merging unnamed entities before, now it is pretty 
problematic to fix it in a well formed.

This is a workaround for the problem but the cost is the inefficiency.  And if 
we want to fix the problem properly, it looks like we need to touch something 
very fundamentally and it may affect a lot of things beyond the C++20 modules 
or even modules.

See my inline comments for details.

The bug/defect has a strong impact. I feel like fixing bug is more important so 
I prefer to land this one first and file an issue for this to record it.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D140002

Files:
  clang/lib/Serialization/ASTReaderDecl.cpp
  clang/test/Modules/MergeLambdas.cppm
  clang/test/Modules/MergeLambdas2.cppm
  clang/test/Modules/MergeLambdas3.cppm

Index: clang/test/Modules/MergeLambdas3.cppm
===================================================================
--- /dev/null
+++ clang/test/Modules/MergeLambdas3.cppm
@@ -0,0 +1,86 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm
+// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-module-interface -o %t/B.pcm
+// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp \
+// RUN:     -fsyntax-only -verify
+//
+// RUN: %clang_cc1 -std=c++20 %t/C.cppm -emit-module-interface -o %t/C.pcm
+// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use2.cpp \
+// RUN:     -fsyntax-only -verify
+//
+// FIXME: We need to handle the Use3 case in Sema part.
+// RUNX: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use3.cpp \
+// RUNX:     -fsyntax-only -verify
+
+//--- lambda.h
+auto cmp = [](auto l, auto r) {
+  return l < r;
+};
+auto cmp2 = [](auto l, auto r) {
+  return l < r;
+};
+
+//--- A.cppm
+module;
+#include "lambda.h"
+export module A;
+export auto a1 = [](auto l, auto r) {
+  return l < r;
+};
+
+export auto a2 = [](auto l, auto r) {
+  return l < r;
+};
+// export using ::cmp;
+export using ::cmp2;
+
+//--- B.cppm
+module;
+#include "lambda.h"
+export module B;
+export auto b1 = [](auto l, auto r) {
+  return l < r;
+};
+export auto b2 = [](auto l, auto r) {
+  return l < r;
+};
+// export using ::cmp;
+export using ::cmp2;
+
+//--- Use.cpp
+// expected-no-diagnostics
+import A;
+import B;
+
+static_assert(!__is_same(decltype(a1), decltype(b1)));
+// static_assert(!__is_same(decltype(a1), decltype(cmp)));
+// static_assert(!__is_same(decltype(b1), decltype(cmp)));
+static_assert(!__is_same(decltype(a1), decltype(a2)));
+static_assert(!__is_same(decltype(b1), decltype(b2)));
+// static_assert(!__is_same(decltype(cmp), decltype(cmp2)));
+
+//--- C.cppm
+module;
+#include "lambda.h"
+export module C;
+export auto c = cmp;
+
+//--- Use2.cpp
+// expected-no-diagnostics
+#include "lambda.h"
+import C;
+static_assert(__is_same(decltype(c), decltype(cmp)));
+
+//--- Use3.cpp
+// FIXME: This is not handled now. We need to handle the case in
+// Sema part.
+// expected-no-diagnostics
+// import C;
+// #include "lambda.h"
+// static_assert(__is_same(decltype(c), decltype(cmp)));
+// auto use() {
+//     return cmp(4, 6); // cmp shouldn't ambiguous.
+// }
Index: clang/test/Modules/MergeLambdas2.cppm
===================================================================
--- /dev/null
+++ clang/test/Modules/MergeLambdas2.cppm
@@ -0,0 +1,44 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm
+// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-module-interface -o %t/B.pcm
+// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp \
+// RUN:     -fsyntax-only -verify
+
+//--- lambda_A.h
+auto cmp = [](auto l, auto r) {
+  return l < r;
+};
+
+//--- lambda_B.h
+auto cmp = [](auto l, auto r) {
+  return l > r;
+};
+
+//--- A.cppm
+module;
+#include "lambda_A.h"
+
+export module A;
+export auto c1 = cmp;
+export using ::cmp;
+
+//--- B.cppm
+module;
+#include "lambda_B.h"
+
+export module B;
+export auto c2 = cmp;
+export using ::cmp;
+
+//--- Use.cpp
+import A;
+import B;
+
+static_assert(!__is_same(decltype(c1), decltype(c2)));
+auto use() {
+    return cmp(4, 6); // expected-error {{reference to 'cmp' is ambiguous}}
+                      // expected-note@* 1+{{candidate found by name lookup is 'cmp'}}
+}
Index: clang/test/Modules/MergeLambdas.cppm
===================================================================
--- /dev/null
+++ clang/test/Modules/MergeLambdas.cppm
@@ -0,0 +1,39 @@
+// RUN: rm -rf %t
+// RUN: mkdir -p %t
+// RUN: split-file %s %t
+//
+// RUN: %clang_cc1 -std=c++20 %t/A.cppm -emit-module-interface -o %t/A.pcm
+// RUN: %clang_cc1 -std=c++20 %t/B.cppm -emit-module-interface -o %t/B.pcm
+// RUN: %clang_cc1 -std=c++20 -fprebuilt-module-path=%t %t/Use.cpp \
+// RUN:     -fsyntax-only -verify
+
+//--- lambda.h
+auto cmp = [](auto l, auto r) {
+  return l < r;
+};
+
+//--- A.cppm
+module;
+#include "lambda.h"
+
+export module A;
+export auto c1 = cmp;
+export using ::cmp;
+
+//--- B.cppm
+module;
+#include "lambda.h"
+
+export module B;
+export auto c2 = cmp;
+export using ::cmp;
+
+//--- Use.cpp
+// expected-no-diagnostics
+import A;
+import B;
+
+static_assert(__is_same(decltype(c1), decltype(c2)));
+auto use() {
+    return cmp(4, 6); // cmp shouldn't ambiguous.
+}
Index: clang/lib/Serialization/ASTReaderDecl.cpp
===================================================================
--- clang/lib/Serialization/ASTReaderDecl.cpp
+++ clang/lib/Serialization/ASTReaderDecl.cpp
@@ -245,6 +245,7 @@
     static DeclContext *getPrimaryContextForMerging(ASTReader &Reader,
                                                     DeclContext *DC);
     FindExistingResult findExisting(NamedDecl *D);
+    FindExistingResult findExistingLambda(CXXRecordDecl *D);
 
   public:
     ASTDeclReader(ASTReader &Reader, ASTRecordReader &Record,
@@ -418,6 +419,8 @@
     template<typename T>
     RedeclarableResult VisitRedeclarable(Redeclarable<T> *D);
 
+    void mergeLambda(CXXRecordDecl *D, RedeclarableResult &Redecl);
+
     template <typename T>
     void mergeRedeclarable(Redeclarable<T> *D, RedeclarableResult &Redecl);
 
@@ -2086,6 +2089,139 @@
     Reader.PendingDefinitions.insert(D);
 }
 
+/// In C++20, lambdas can have the same type conditionally.
+///
+/// [basic.def.odr]p14:
+///   For any definable item D with definitions in multiple translation units,
+///   - if the definitions in different translation units do not satisfy the
+///   following requirements,
+///     the program is ill-formed; ...
+///   - Each such definition shall not be attached to a named module.
+///   - Each such definition shall consist of the same sequence of tokens, where
+///   the definition of a closure type is considered to consist of the sequence
+///   of tokens of the corresponding lambda-expression.
+///   - In each such definition, except within the default arguments and default
+///   template arguments of D, corresponding lambda-expressions shall have the
+///   same closure type (see below).
+///   ...
+///
+/// This function checks if the two lambdas:
+/// - Lives in the same declaration context.
+/// - Have the same definition. We check this by ODR hash code.
+/// - If either of them is attached to a named module.
+/// - If they lives in the same TU.
+static bool IsMergeableLambda(ASTContext &C, CXXRecordDecl *X,
+                              CXXRecordDecl *Y) {
+  assert(X->isLambda() && Y->isLambda());
+  // ASTContext::isSameEntity will check the DeclContext while
+  // CXXRecordDecl::getODRHash will only calculate the hash value based on the
+  // declaration itself. So the check here is not redundant.
+  if (!C.isSameEntity(X, Y))
+    return false;
+
+  if (X->getODRHash() != Y->getODRHash())
+    return false;
+
+  // Check X and Y comes from different TU and neither of them comes from module
+  // purview.
+  Module *XM = X->getOwningModule();
+  Module *YM = Y->getOwningModule();
+
+  // In this context, when we talk about modules, we only talk about named
+  // modules. So ignore header like modules here.
+  if (XM && XM->isHeaderLikeModule())
+    XM = nullptr;
+  if (YM && YM->isHeaderLikeModule())
+    YM = nullptr;
+
+  // If either of them comes from module purview, we can't treat these lambdas
+  // as the same type.
+  if ((XM && XM->isModulePurview()) || (YM && YM->isModulePurview()))
+    return false;
+
+  // The left module should be global module only.
+  assert(!XM || XM->isGlobalModule());
+  assert(!YM || YM->isGlobalModule());
+
+  // We can treat them as the same type if they are not in the same TU.
+  return XM != YM;
+}
+
+ASTDeclReader::FindExistingResult
+ASTDeclReader::findExistingLambda(CXXRecordDecl *D) {
+  assert(D && D->isLambda());
+  assert(!TypedefNameForLinkage);
+
+  DeclContext *DC = D->getDeclContext()->getRedeclContext();
+  DeclContext *MergeDC = getPrimaryContextForMerging(Reader, DC);
+  // Not in a mergeable context.
+  if (!MergeDC)
+    return FindExistingResult(Reader);
+
+  /// FIXME: The `DeclContext::noload_lookup()` wouldn't load local lexical
+  /// lookups since unnamed declarations are skipped. We can find this in
+  /// `DeclContext::buildLookupImpl` and `shouldBeHidden(NamedDecl*)` in
+  /// DeclBase.cpp. So the `DeclContext::noload_lookup()` here can only find
+  /// decls during the deserilizations. For example:
+  ///
+  /// ```
+  /// import A; // contains a lambda definition in GMF.
+  /// import B; // contains a same lambda in GMF with the above one.
+  /// ```
+  ///
+  /// Assume we've read A and we are reading the lambda definition of B.
+  /// The lambda definition in A can be found by
+  /// `DeclContext::noload_lookup()` since we've already load the lambda
+  /// in A into the lookup cache of the corresponding declaration
+  /// context. We can find this one in `ASTDeclReader::VisitDecl`.
+  ///
+  /// However, it doesn't work for the following case:
+  ///
+  /// ```
+  /// auto l = [](){};
+  /// import B; // contains a same lambda in GMF with the above one.
+  /// ```
+  ///
+  /// Now the reader don't load the lambda into the lookup cache so the
+  /// reader don't know about it. So that we can't find the lambda
+  /// definition in the current TU now. So we misses the oppptunity to
+  /// merge the lambda definition.
+  /// To merge the lambda definition in the current TU, we have to iterate
+  /// the declaration in the current TU manually, which is not so efficiency.
+  /// If we can make `DeclContext::noload_lookup()` support unnamed untities
+  /// someday, we can remove `DeclContext::noload_decls()` here simply.
+  llvm::DenseSet<Decl *> PossibleRedecls;
+  for (Decl *NL : MergeDC->noload_lookup(D->getDeclName()))
+    PossibleRedecls.insert(NL);
+
+  for (Decl *ND : MergeDC->noload_decls())
+    PossibleRedecls.insert(ND);
+
+  for (Decl *Candidate : PossibleRedecls)
+    if (CXXRecordDecl *Existing = dyn_cast<CXXRecordDecl>(Candidate))
+      if (Existing->isLambda() &&
+          IsMergeableLambda(Reader.getContext(), Existing, D))
+        return FindExistingResult(Reader, D, Existing, AnonymousDeclNumber,
+                                  TypedefNameForLinkage);
+
+  return FindExistingResult(Reader, D, /*Existing=*/nullptr,
+                            AnonymousDeclNumber, TypedefNameForLinkage);
+}
+
+void ASTDeclReader::mergeLambda(CXXRecordDecl *D, RedeclarableResult &Redecl) {
+  // If C++20 modules are not available, there is no reason to perform this
+  // merge.
+  if (!Reader.getContext().getLangOpts().CPlusPlusModules)
+    return;
+
+  // If we're not the canonical declaration, we don't need to merge.
+  if (!D->isFirstDecl())
+    return;
+
+  if (TagDecl *Existing = findExistingLambda(D))
+    mergeRedeclarable(D, Existing, Redecl);
+}
+
 ASTDeclReader::RedeclarableResult
 ASTDeclReader::VisitCXXRecordDeclImpl(CXXRecordDecl *D) {
   RedeclarableResult Redecl = VisitRecordDeclImpl(D);
@@ -2146,6 +2282,15 @@
       C.KeyFunctions[D] = KeyFn;
   }
 
+  if (D->isLambda()) {
+    /// Lambdas shouldn't be templates.
+    /// Note that the record type of the generic lambdas are not templates.
+    /// The generic thing is the operator functions.
+    assert(!D->getDescribedClassTemplate() &&
+           !D->getMemberSpecializationInfo());
+    mergeLambda(D, Redecl);
+  }
+
   return Redecl;
 }
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to