https://github.com/qiongsiwu updated 
https://github.com/llvm/llvm-project/pull/170215

>From c27d30ead152fba032870cdf53e3988e2aef102f Mon Sep 17 00:00:00 2001
From: Jan Svoboda <[email protected]>
Date: Thu, 2 Nov 2023 14:35:30 -0700
Subject: [PATCH 1/4] [clang][modules] Track included files per submodule

(cherry picked from commit 9debc58d5135fbde51967dfb076d0ec5d954df38)

 Conflicts:
        clang/include/clang/Lex/Preprocessor.h
        clang/lib/Serialization/ASTReader.cpp
        clang/lib/Serialization/ASTReaderInternals.h
        clang/lib/Serialization/ASTWriter.cpp
---
 clang/include/clang/Basic/Module.h          |  2 +
 clang/include/clang/Lex/Preprocessor.h      | 38 ++++++++++++---
 clang/lib/Serialization/ASTReader.cpp       | 33 ++++++++-----
 clang/lib/Serialization/ASTWriter.cpp       | 46 ++++++++++++++----
 clang/test/Modules/per-submodule-includes.m | 53 +++++++++++++++++++++
 5 files changed, 145 insertions(+), 27 deletions(-)
 create mode 100644 clang/test/Modules/per-submodule-includes.m

diff --git a/clang/include/clang/Basic/Module.h 
b/clang/include/clang/Basic/Module.h
index 69a1de6f79b35..02552ed221036 100644
--- a/clang/include/clang/Basic/Module.h
+++ b/clang/include/clang/Basic/Module.h
@@ -498,6 +498,8 @@ class alignas(8) Module {
   /// to import but didn't because they are not direct uses.
   llvm::SmallSetVector<const Module *, 2> UndeclaredUses;
 
+  llvm::DenseSet<const FileEntry *> Includes;
+
   /// A library or framework to link against when an entity from this
   /// module is used.
   struct LinkLibrary {
diff --git a/clang/include/clang/Lex/Preprocessor.h 
b/clang/include/clang/Lex/Preprocessor.h
index b6e42a6151ac3..66a18dd3f24b6 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -1058,6 +1058,8 @@ class Preprocessor {
     /// The set of modules that are visible within the submodule.
     VisibleModuleSet VisibleModules;
 
+    /// The files that have been included.
+    IncludedFilesSet IncludedFiles;
     // FIXME: CounterValue?
     // FIXME: PragmaPushMacroInfo?
   };
@@ -1070,8 +1072,8 @@ class Preprocessor {
   /// in a submodule.
   SubmoduleState *CurSubmoduleState;
 
-  /// The files that have been included.
-  IncludedFilesSet IncludedFiles;
+  /// The files that have been included outside of (sub)modules.
+  IncludedFilesSet Includes;
 
   /// The set of top-level modules that affected preprocessing, but were not
   /// imported.
@@ -1556,19 +1558,41 @@ class Preprocessor {
   /// Mark the file as included.
   /// Returns true if this is the first time the file was included.
   bool markIncluded(FileEntryRef File) {
+    bool AlreadyIncluded = alreadyIncluded(File);
     HeaderInfo.getFileInfo(File).IsLocallyIncluded = true;
-    return IncludedFiles.insert(File).second;
+    CurSubmoduleState->IncludedFiles.insert(File);
+    if (!BuildingSubmoduleStack.empty())
+      BuildingSubmoduleStack.back().M->Includes.insert(File);
+    else if (Module *M = getCurrentModule())
+      M->Includes.insert(File);
+    else
+      Includes.insert(File);
+    return !AlreadyIncluded;
   }
 
   /// Return true if this header has already been included.
   bool alreadyIncluded(FileEntryRef File) const {
     HeaderInfo.getFileInfo(File);
-    return IncludedFiles.count(File);
+    if (CurSubmoduleState->IncludedFiles.contains(File))
+      return true;
+    // TODO: Do this more efficiently.
+    for (const auto &[Name, M] : HeaderInfo.getModuleMap().modules())
+      if (CurSubmoduleState->VisibleModules.isVisible(M))
+        if (M->Includes.contains(File))
+          return true;
+    return false;
+  }
+
+  void markIncludedOnTopLevel(const FileEntry *File) {
+    Includes.insert(File);
+    CurSubmoduleState->IncludedFiles.insert(File);
+  }
+
+  void markIncludedInModule(Module *M, const FileEntry *File) {
+    M->Includes.insert(File);
   }
 
-  /// Get the set of included files.
-  IncludedFilesSet &getIncludedFiles() { return IncludedFiles; }
-  const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; }
+  const IncludedFilesSet &getTopLevelIncludes() const { return Includes; }
 
   /// Return the name of the macro defined before \p Loc that has
   /// spelling \p Tokens.  If there are multiple macros with same spelling,
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index bde000234a062..b738743e92249 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -2374,14 +2374,6 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, 
const unsigned char *d,
   HeaderFileInfo HFI;
   unsigned Flags = *d++;
 
-  OptionalFileEntryRef FE;
-  bool Included = (Flags >> 6) & 0x01;
-  if (Included)
-    if ((FE = getFile(key)))
-      // Not using \c Preprocessor::markIncluded(), since that would attempt to
-      // deserialize this header file info again.
-      Reader.getPreprocessor().getIncludedFiles().insert(*FE);
-
   // FIXME: Refactor with mergeHeaderFileInfo in HeaderSearch.cpp.
   HFI.isImport |= (Flags >> 5) & 0x01;
   HFI.isPragmaOnce |= (Flags >> 4) & 0x01;
@@ -2389,6 +2381,27 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, 
const unsigned char *d,
   HFI.LazyControllingMacro = Reader.getGlobalIdentifierID(
       M, endian::readNext<IdentifierID, llvm::endianness::little>(d));
 
+  auto FE = getFile(key);
+  Preprocessor &PP = Reader.getPreprocessor();
+  ModuleMap &ModMap = PP.getHeaderSearchInfo().getModuleMap();
+
+  unsigned IncludedCount =
+      endian::readNext<uint32_t, llvm::endianness::little, unaligned>(d);
+  for (unsigned I = 0; I < IncludedCount; ++I) {
+    uint32_t LocalSMID =
+        endian::readNext<uint32_t, llvm::endianness::little, unaligned>(d);
+    if (!FE)
+      continue;
+
+    if (LocalSMID == 0) {
+      PP.markIncludedOnTopLevel(*FE);
+    } else {
+      SubmoduleID GlobalSMID = Reader.getGlobalSubmoduleID(M, LocalSMID);
+      Module *Mod = Reader.getSubmodule(GlobalSMID);
+      PP.markIncludedInModule(Mod, *FE);
+    }
+  }
+
   assert((End - d) % 4 == 0 &&
          "Wrong data length in HeaderFileInfo deserialization");
   while (d != End) {
@@ -2401,10 +2414,8 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, 
const unsigned char *d,
     // implicit module import.
     SubmoduleID GlobalSMID = Reader.getGlobalSubmoduleID(M, LocalSMID);
     Module *Mod = Reader.getSubmodule(GlobalSMID);
-    ModuleMap &ModMap =
-        Reader.getPreprocessor().getHeaderSearchInfo().getModuleMap();
 
-    if (FE || (FE = getFile(key))) {
+    if (FE) {
       // FIXME: NameAsWritten
       Module::Header H = {std::string(key.Filename), "", *FE};
       ModMap.addHeader(Mod, H, HeaderRole, /*Imported=*/true);
diff --git a/clang/lib/Serialization/ASTWriter.cpp 
b/clang/lib/Serialization/ASTWriter.cpp
index af46f84d5aac0..046875b4c2145 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -2098,14 +2098,15 @@ namespace {
         llvm::PointerIntPair<Module *, 2, ModuleMap::ModuleHeaderRole>;
 
     struct data_type {
-      data_type(const HeaderFileInfo &HFI, bool AlreadyIncluded,
+      data_type(const HeaderFileInfo &HFI,
+                const std::vector<const Module *> &Includers,
                 ArrayRef<ModuleMap::KnownHeader> KnownHeaders,
                 UnresolvedModule Unresolved)
-          : HFI(HFI), AlreadyIncluded(AlreadyIncluded),
-            KnownHeaders(KnownHeaders), Unresolved(Unresolved) {}
+          : HFI(HFI), Includers(Includers), KnownHeaders(KnownHeaders),
+            Unresolved(Unresolved) {}
 
       HeaderFileInfo HFI;
-      bool AlreadyIncluded;
+      std::vector<const Module *> Includers;
       SmallVector<ModuleMap::KnownHeader, 1> KnownHeaders;
       UnresolvedModule Unresolved;
     };
@@ -2128,6 +2129,12 @@ namespace {
     EmitKeyDataLength(raw_ostream& Out, key_type_ref key, data_type_ref Data) {
       unsigned KeyLen = key.Filename.size() + 1 + 8 + 8;
       unsigned DataLen = 1 + sizeof(IdentifierID);
+
+      DataLen += 4;
+      for (const Module *M : Data.Includers)
+        if (!M || Writer.getLocalOrImportedSubmoduleID(M))
+          DataLen += 4;
+
       for (auto ModInfo : Data.KnownHeaders)
         if (Writer.getLocalOrImportedSubmoduleID(ModInfo.getModule()))
           DataLen += 4;
@@ -2154,8 +2161,7 @@ namespace {
       endian::Writer LE(Out, llvm::endianness::little);
       uint64_t Start = Out.tell(); (void)Start;
 
-      unsigned char Flags = (Data.AlreadyIncluded << 6)
-                          | (Data.HFI.isImport << 5)
+      unsigned char Flags = (Data.HFI.isImport << 5)
                           | (Writer.isWritingStdCXXNamedModules() ? 0 :
                              Data.HFI.isPragmaOnce << 4)
                           | (Data.HFI.DirInfo << 1);
@@ -2167,6 +2173,14 @@ namespace {
         LE.write<IdentifierID>(
             Writer.getIdentifierRef(Data.HFI.LazyControllingMacro.getPtr()));
 
+      LE.write<uint32_t>(Data.Includers.size());
+      for (const Module *M : Data.Includers) {
+        if (!M)
+          LE.write<uint32_t>(0);
+        else if (uint32_t ModID = Writer.getLocalOrImportedSubmoduleID(M))
+          LE.write<uint32_t>(ModID);
+      }
+
       auto EmitModule = [&](Module *M, ModuleMap::ModuleHeaderRole Role) {
         if (uint32_t ModID = Writer.getLocalOrImportedSubmoduleID(M)) {
           uint32_t Value = (ModID << 3) | (unsigned)Role;
@@ -2238,7 +2252,7 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch &HS) 
{
         HeaderFileInfoTrait::key_type Key = {
             FilenameDup, *U.Size, IncludeTimestamps ? *U.ModTime : 0};
         HeaderFileInfoTrait::data_type Data = {
-            Empty, false, {}, {M, ModuleMap::headerKindToRole(U.Kind)}};
+            Empty, {}, {}, {M, ModuleMap::headerKindToRole(U.Kind)}};
         // FIXME: Deal with cases where there are multiple unresolved header
         // directives in different submodules for the same header.
         Generator.insert(Key, Data, GeneratorTrait);
@@ -2278,13 +2292,27 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch 
&HS) {
       SavedStrings.push_back(Filename.data());
     }
 
-    bool Included = HFI->IsLocallyIncluded || PP->alreadyIncluded(*File);
+    std::vector<const Module *> Includers;
+    if (WritingModule) {
+      llvm::DenseSet<const Module *> Seen;
+      std::function<void(const Module *)> Visit = [&](const Module *M) {
+        if (!Seen.insert(M).second)
+          return;
+        if (M->Includes.contains(*File))
+          Includers.push_back(M);
+        for (const Module *SubM : M->submodules())
+          Visit(SubM);
+      };
+      Visit(WritingModule);
+    } else if (PP->getTopLevelIncludes().contains(*File)) {
+      Includers.push_back(nullptr);
+    }
 
     HeaderFileInfoTrait::key_type Key = {
       Filename, File->getSize(), getTimestampForOutput(*File)
     };
     HeaderFileInfoTrait::data_type Data = {
-      *HFI, Included, HS.getModuleMap().findResolvedModulesForHeader(*File), {}
+      *HFI, Includers, HS.getModuleMap().findResolvedModulesForHeader(*File), 
{}
     };
     Generator.insert(Key, Data, GeneratorTrait);
     ++NumHeaderSearchEntries;
diff --git a/clang/test/Modules/per-submodule-includes.m 
b/clang/test/Modules/per-submodule-includes.m
new file mode 100644
index 0000000000000..39fd255e8e93e
--- /dev/null
+++ b/clang/test/Modules/per-submodule-includes.m
@@ -0,0 +1,53 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+
+//--- frameworks/Textual.framework/Headers/Header.h
+static int symbol;
+
+//--- frameworks/FW.framework/Modules/module.modulemap
+framework module FW {
+  umbrella header "FW.h"
+  export *
+  module * { export * }
+}
+//--- frameworks/FW.framework/Headers/FW.h
+#import <FW/Sub1.h>
+#import <FW/Sub2.h>
+//--- frameworks/FW.framework/Headers/Sub1.h
+//--- frameworks/FW.framework/Headers/Sub2.h
+#import <Textual/Header.h>
+
+//--- pch.modulemap
+module __PCH {
+  header "pch.h"
+  export *
+}
+//--- pch.h
+#import <FW/Sub1.h>
+
+//--- tu.m
+#import <Textual/Header.h>
+int fn() { return symbol; }
+
+// Compilation using the PCH regularly succeeds. The import of FW/Sub1.h in the
+// PCH is treated textually due to -fmodule-name=FW.
+//
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache 
-fimplicit-module-maps -F %t/frameworks -fmodule-name=FW \
+// RUN:   -emit-pch -x objective-c %t/pch.h -o %t/pch.h.gch
+//
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache 
-fimplicit-module-maps -F %t/frameworks -fmodule-name=FW \
+// RUN:   -include-pch %t/pch.h.gch -fsyntax-only %t/tu.m
+
+// Compilation using the PCH as precompiled module fails. The import of 
FW/Sub1.h
+// in the PCH is translated to an import. Nothing is preventing that now that
+// -fmodule-name=FW has been replaced with -fmodule-name=__PCH.
+//
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache 
-fimplicit-module-maps -F %t/frameworks \
+// RUN:   -emit-module -fmodule-name=__PCH -x objective-c %t/pch.modulemap -o 
%t/pch.h.pcm
+//
+// Loading FW.pcm marks Textual/Header.h as imported (because it is imported in
+// FW.Sub2), so the TU does not import it again. It's contents remain 
invisible,
+// though.
+//
+// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache 
-fimplicit-module-maps -F %t/frameworks \
+// RUN:   -include %t/pch.h -fmodule-map-file=%t/pch.modulemap -fsyntax-only 
%t/tu.m

>From feb55219a106d93395322f25bb7612269d3567b3 Mon Sep 17 00:00:00 2001
From: Qiongsi Wu <[email protected]>
Date: Mon, 1 Dec 2025 14:07:07 -0800
Subject: [PATCH 2/4] Move the included vector into
 HeaderFileInfoTrait::data_type and add a test case.

---
 clang/lib/Serialization/ASTWriter.cpp         | 21 +++---
 .../Modules/import-submodule-visibility.c     | 64 +++++++++++++++++++
 2 files changed, 76 insertions(+), 9 deletions(-)
 create mode 100644 clang/test/Modules/import-submodule-visibility.c

diff --git a/clang/lib/Serialization/ASTWriter.cpp 
b/clang/lib/Serialization/ASTWriter.cpp
index 046875b4c2145..868d4da70b778 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -2099,11 +2099,11 @@ namespace {
 
     struct data_type {
       data_type(const HeaderFileInfo &HFI,
-                const std::vector<const Module *> &Includers,
+                std::vector<const Module *> &&Includers,
                 ArrayRef<ModuleMap::KnownHeader> KnownHeaders,
                 UnresolvedModule Unresolved)
-          : HFI(HFI), Includers(Includers), KnownHeaders(KnownHeaders),
-            Unresolved(Unresolved) {}
+          : HFI(HFI), Includers(std::move(Includers)),
+            KnownHeaders(KnownHeaders), Unresolved(Unresolved) {}
 
       HeaderFileInfo HFI;
       std::vector<const Module *> Includers;
@@ -2161,10 +2161,11 @@ namespace {
       endian::Writer LE(Out, llvm::endianness::little);
       uint64_t Start = Out.tell(); (void)Start;
 
-      unsigned char Flags = (Data.HFI.isImport << 5)
-                          | (Writer.isWritingStdCXXNamedModules() ? 0 :
-                             Data.HFI.isPragmaOnce << 4)
-                          | (Data.HFI.DirInfo << 1);
+      unsigned char Flags =
+          (Data.HFI.isImport << 5) |
+          (Writer.isWritingStdCXXNamedModules() ? 0
+                                                : Data.HFI.isPragmaOnce << 4) |
+          (Data.HFI.DirInfo << 1);
       LE.write<uint8_t>(Flags);
 
       if (Data.HFI.LazyControllingMacro.isID())
@@ -2312,8 +2313,10 @@ void ASTWriter::WriteHeaderSearch(const HeaderSearch 
&HS) {
       Filename, File->getSize(), getTimestampForOutput(*File)
     };
     HeaderFileInfoTrait::data_type Data = {
-      *HFI, Includers, HS.getModuleMap().findResolvedModulesForHeader(*File), 
{}
-    };
+        *HFI,
+        std::move(Includers),
+        HS.getModuleMap().findResolvedModulesForHeader(*File),
+        {}};
     Generator.insert(Key, Data, GeneratorTrait);
     ++NumHeaderSearchEntries;
   }
diff --git a/clang/test/Modules/import-submodule-visibility.c 
b/clang/test/Modules/import-submodule-visibility.c
new file mode 100644
index 0000000000000..3491494ea7cc0
--- /dev/null
+++ b/clang/test/Modules/import-submodule-visibility.c
@@ -0,0 +1,64 @@
+// This test checks that imports of headers that appeared in a different 
submodule than
+// what is imported by the current TU don't affect the compilation.
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+
+//--- C/C.h
+#include "Textual.h"
+//--- C/module.modulemap
+module C { header "C.h" }
+
+//--- D/D1.h
+#include "Textual.h"
+//--- D/D2.h
+//--- D/module.modulemap
+module D {
+  module D1 { header "D1.h" }
+  module D2 { header "D2.h" }
+}
+
+//--- E/E1.h
+#include "E2.h"
+//--- E/E2.h
+#include "Textual.h"
+//--- E/module.modulemap
+module E {
+  module E1 { header "E1.h" }
+  module E2 { header "E2.h" }
+}
+
+//--- Textual.h
+#define MACRO_TEXTUAL 1
+
+//--- test_top.c
+#import "Textual.h"
+static int x = MACRO_TEXTUAL;
+
+//--- test_sub.c
+#import "D/D2.h"
+#import "Textual.h"
+static int x = MACRO_TEXTUAL;
+
+//--- test_transitive.c
+#import "E/E1.h"
+#import "Textual.h"
+static int x = MACRO_TEXTUAL;
+
+// Simply loading a PCM file containing top-level module including a header 
does
+// not prevent inclusion of that header in the TU.
+//
+// RUN: %clang_cc1 -fmodules -I %t -emit-module %t/C/module.modulemap 
-fmodule-name=C -o %t/C.pcm
+// RUN: %clang_cc1 -fmodules -I %t -fsyntax-only %t/test_top.c 
-fmodule-file=%t/C.pcm
+
+// Loading a PCM file and importing its empty submodule does not prevent
+// inclusion of headers included by invisible sibling submodules.
+//
+// RUN: %clang_cc1 -fmodules -I %t -emit-module %t/D/module.modulemap 
-fmodule-name=D -o %t/D.pcm
+// RUN: %clang_cc1 -fmodules -I %t -fsyntax-only %t/test_sub.c 
-fmodule-file=%t/D.pcm
+
+// Loading a PCM file and importing a submodule does not prevent inclusion of
+// headers included by some of its transitive un-exported dependencies.
+//
+// RUN: %clang_cc1 -fmodules -I %t -emit-module %t/E/module.modulemap 
-fmodule-name=E -o %t/E.pcm
+// RUN: %clang_cc1 -fmodules -I %t -fsyntax-only %t/test_transitive.c 
-fmodule-file=%t/E.pcm

>From ede8c85444c2b38d47de2966b271dcd31aed0ff8 Mon Sep 17 00:00:00 2001
From: Qiongsi Wu <[email protected]>
Date: Sat, 13 Dec 2025 14:55:36 -0800
Subject: [PATCH 3/4] Avoid looping in Preprocessor::alreadyIncluded.

---
 clang/include/clang/Lex/Preprocessor.h | 25 +++++++++++++------------
 clang/lib/Lex/Preprocessor.cpp         |  2 ++
 clang/lib/Serialization/ASTReader.cpp  |  1 +
 3 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/clang/include/clang/Lex/Preprocessor.h 
b/clang/include/clang/Lex/Preprocessor.h
index 66a18dd3f24b6..739e01748efa2 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -1058,8 +1058,10 @@ class Preprocessor {
     /// The set of modules that are visible within the submodule.
     VisibleModuleSet VisibleModules;
 
-    /// The files that have been included.
+    /// The files that have been included. This set includes the headers
+    /// that have been included and visible in this submodule.
     IncludedFilesSet IncludedFiles;
+
     // FIXME: CounterValue?
     // FIXME: PragmaPushMacroInfo?
   };
@@ -1561,26 +1563,22 @@ class Preprocessor {
     bool AlreadyIncluded = alreadyIncluded(File);
     HeaderInfo.getFileInfo(File).IsLocallyIncluded = true;
     CurSubmoduleState->IncludedFiles.insert(File);
-    if (!BuildingSubmoduleStack.empty())
-      BuildingSubmoduleStack.back().M->Includes.insert(File);
-    else if (Module *M = getCurrentModule())
+
+    Module *M = BuildingSubmoduleStack.empty()
+                    ? getCurrentModule()
+                    : BuildingSubmoduleStack.back().M;
+    if (M)
       M->Includes.insert(File);
     else
       Includes.insert(File);
+
     return !AlreadyIncluded;
   }
 
   /// Return true if this header has already been included.
   bool alreadyIncluded(FileEntryRef File) const {
     HeaderInfo.getFileInfo(File);
-    if (CurSubmoduleState->IncludedFiles.contains(File))
-      return true;
-    // TODO: Do this more efficiently.
-    for (const auto &[Name, M] : HeaderInfo.getModuleMap().modules())
-      if (CurSubmoduleState->VisibleModules.isVisible(M))
-        if (M->Includes.contains(File))
-          return true;
-    return false;
+    return CurSubmoduleState->IncludedFiles.contains(File);
   }
 
   void markIncludedOnTopLevel(const FileEntry *File) {
@@ -1590,6 +1588,9 @@ class Preprocessor {
 
   void markIncludedInModule(Module *M, const FileEntry *File) {
     M->Includes.insert(File);
+
+    if (CurSubmoduleState->VisibleModules.isVisible(M))
+      CurSubmoduleState->IncludedFiles.insert(File);
   }
 
   const IncludedFilesSet &getTopLevelIncludes() const { return Includes; }
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index a531f51408dae..efefd4e41d98e 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -1466,6 +1466,8 @@ void Preprocessor::makeModuleVisible(Module *M, 
SourceLocation Loc,
             << Message;
       });
 
+  CurSubmoduleState->IncludedFiles.insert_range(M->Includes);
+
   // Add this module to the imports list of the currently-built submodule.
   if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
     BuildingSubmoduleStack.back().M->Imports.insert(M);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index b738743e92249..5a92f8da4b95b 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -2414,6 +2414,7 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const 
unsigned char *d,
     // implicit module import.
     SubmoduleID GlobalSMID = Reader.getGlobalSubmoduleID(M, LocalSMID);
     Module *Mod = Reader.getSubmodule(GlobalSMID);
+    PP.markIncludedInModule(Mod, *FE);
 
     if (FE) {
       // FIXME: NameAsWritten

>From 9d0b8af34dad3c08c709cf240f0597159ff98b78 Mon Sep 17 00:00:00 2001
From: Qiongsi Wu <[email protected]>
Date: Fri, 6 Mar 2026 14:00:10 -0800
Subject: [PATCH 4/4] Adding two tests, and removing dead code.

---
 clang/lib/Lex/Preprocessor.cpp                |   5 +-
 clang/lib/Serialization/ASTReader.cpp         |   2 -
 .../import-textual-reenter-not-visible.c      | 156 +++++++++++++++
 .../Modules/import-textual-skip-visible.c     | 186 ++++++++++++++++++
 4 files changed, 343 insertions(+), 6 deletions(-)
 create mode 100644 clang/test/Modules/import-textual-reenter-not-visible.c
 create mode 100644 clang/test/Modules/import-textual-skip-visible.c

diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index efefd4e41d98e..b5b9fe7d307f5 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -1461,13 +1461,10 @@ void Preprocessor::makeModuleVisible(Module *M, 
SourceLocation Loc,
         // FIXME: Include the path in the diagnostic.
         // FIXME: Include the import location for the conflicting module.
         Diag(ModuleImportLoc, diag::warn_module_conflict)
-            << Path[0]->getFullModuleName()
-            << Conflict->getFullModuleName()
+            << Path[0]->getFullModuleName() << Conflict->getFullModuleName()
             << Message;
       });
 
-  CurSubmoduleState->IncludedFiles.insert_range(M->Includes);
-
   // Add this module to the imports list of the currently-built submodule.
   if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
     BuildingSubmoduleStack.back().M->Imports.insert(M);
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 5a92f8da4b95b..5e095d6c496a1 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -2414,8 +2414,6 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const 
unsigned char *d,
     // implicit module import.
     SubmoduleID GlobalSMID = Reader.getGlobalSubmoduleID(M, LocalSMID);
     Module *Mod = Reader.getSubmodule(GlobalSMID);
-    PP.markIncludedInModule(Mod, *FE);
-
     if (FE) {
       // FIXME: NameAsWritten
       Module::Header H = {std::string(key.Filename), "", *FE};
diff --git a/clang/test/Modules/import-textual-reenter-not-visible.c 
b/clang/test/Modules/import-textual-reenter-not-visible.c
new file mode 100644
index 0000000000000..9254283fc43f2
--- /dev/null
+++ b/clang/test/Modules/import-textual-reenter-not-visible.c
@@ -0,0 +1,156 @@
+// Test that #import can re-enter a textual header when the header was included
+// by a module that is reachable but NOT visible. The key invariant:
+//
+//   If a textual header was included by module M, and M is NOT visible in the
+//   current TU, then #import of that header MUST re-enter the file (not skip 
it).
+//
+// This is the complement of import-textual-skip-visible.c, which verifies
+// the opposite: visible modules DO suppress re-entry. Here we verify that
+// non-visible modules do NOT suppress re-entry, even when the PCM containing
+// the header info has been loaded.
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+
+//--- Textual.h
+#define MACRO_TEXTUAL 1
+
+// =========================================================================
+// Module E: E1 includes E2, E2 includes Textual.h. NO export *.
+// =========================================================================
+//--- E/E1.h
+#include "E2.h"
+//--- E/E2.h
+#include "Textual.h"
+//--- E/module.modulemap
+module E {
+  module E1 { header "E1.h" }
+  module E2 { header "E2.h" }
+}
+
+// =========================================================================
+// Module F: F1 imports E1 (cross-module via -fmodule-file). NO export *.
+// =========================================================================
+//--- F/F1.h
+#include "E/E1.h"
+//--- F/module.modulemap
+module F {
+  module F1 { header "F1.h" }
+}
+
+// =========================================================================
+// Module G: G1 imports E1 with export *, but E1 itself does NOT export E2.
+// So importing G1 makes E1 visible (via G1's export *), but E2 is still not
+// visible because E1 has no export *.
+// =========================================================================
+//--- G/G1.h
+#include "E/E1.h"
+//--- G/module.modulemap
+module G {
+  module G1 {
+    header "G1.h"
+    export *
+  }
+}
+
+// =========================================================================
+// Module H: H1 imports E1, H2 is empty. Import H2 only — E2 is not visible.
+// =========================================================================
+//--- H/H1.h
+#include "E/E1.h"
+//--- H/H2.h
+// empty
+//--- H/module.modulemap
+module H {
+  module H1 { header "H1.h" }
+  module H2 { header "H2.h" }
+}
+
+// =========================================================================
+// Module J: another independent module that also includes Textual.h.
+// =========================================================================
+//--- J/J1.h
+#include "Textual.h"
+//--- J/module.modulemap
+module J {
+  module J1 { header "J1.h" }
+}
+
+// =========================================================================
+// Test 1: Cross-module, no export.
+// Import F1, which imports E1 (no export *). E2 included Textual.h but is
+// NOT visible. #import Textual.h must re-enter the file.
+// =========================================================================
+//--- test_cross_no_export.c
+#import "F/F1.h"
+#import "Textual.h"
+static int x = MACRO_TEXTUAL;
+
+// =========================================================================
+// Test 2: Cross-module, partial export chain.
+// Import G1 (export *) → E1 (no export *). E1 is visible but E2 is not.
+// Textual.h was included by E2 — must be re-enterable.
+// =========================================================================
+//--- test_partial_export.c
+#import "G/G1.h"
+#import "Textual.h"
+static int x = MACRO_TEXTUAL;
+
+// =========================================================================
+// Test 3: Import a sibling; the other sibling's transitive deps included
+// Textual.h. Import H2 only — H1 (and its dep E1, E2) are not visible.
+// Textual.h must be re-enterable.
+// =========================================================================
+//--- test_sibling_invisible.c
+#import "H/H2.h"
+#import "Textual.h"
+static int x = MACRO_TEXTUAL;
+
+// =========================================================================
+// Test 4: Multiple PCMs loaded, none with Textual.h visible.
+// Load both E.pcm and J.pcm. Import neither E2 nor J1.
+// Textual.h was included by both E2 and J1, but neither is visible.
+// Must be re-enterable.
+// =========================================================================
+//--- test_multiple_pcms.c
+// Don't import anything — just load PCMs via -fmodule-file.
+#import "Textual.h"
+static int x = MACRO_TEXTUAL;
+
+// =========================================================================
+// Test 5: Cross-module transitive, no export at any level.
+// Import F1 → E1 → E2 → Textual.h. Neither F1 nor E1 has export *.
+// E2 is not visible. Textual.h must be re-enterable.
+// =========================================================================
+//--- test_deep_no_export.c
+#import "F/F1.h"
+#import "Textual.h"
+static int x = MACRO_TEXTUAL;
+
+// =========================================================================
+// Build PCMs.
+// =========================================================================
+// RUN: %clang_cc1 -fmodules -I %t -emit-module %t/E/module.modulemap 
-fmodule-name=E -o %t/E.pcm
+// RUN: %clang_cc1 -fmodules -I %t -emit-module %t/F/module.modulemap 
-fmodule-name=F -fmodule-file=%t/E.pcm -o %t/F.pcm
+// RUN: %clang_cc1 -fmodules -I %t -emit-module %t/G/module.modulemap 
-fmodule-name=G -fmodule-file=%t/E.pcm -o %t/G.pcm
+// RUN: %clang_cc1 -fmodules -I %t -emit-module %t/H/module.modulemap 
-fmodule-name=H -fmodule-file=%t/E.pcm -o %t/H.pcm
+// RUN: %clang_cc1 -fmodules -I %t -emit-module %t/J/module.modulemap 
-fmodule-name=J -o %t/J.pcm
+
+// =========================================================================
+// Run tests.
+// =========================================================================
+
+// Test 1: cross-module, no export — Textual.h must be re-entered.
+// RUN: %clang_cc1 -fmodules -I %t -fsyntax-only %t/test_cross_no_export.c 
-fmodule-file=%t/F.pcm
+
+// Test 2: partial export chain — Textual.h must be re-entered.
+// RUN: %clang_cc1 -fmodules -I %t -fsyntax-only %t/test_partial_export.c 
-fmodule-file=%t/G.pcm
+
+// Test 3: sibling invisible — Textual.h must be re-entered.
+// RUN: %clang_cc1 -fmodules -I %t -fsyntax-only %t/test_sibling_invisible.c 
-fmodule-file=%t/H.pcm
+
+// Test 4: multiple PCMs, none visible — Textual.h must be re-entered.
+// RUN: %clang_cc1 -fmodules -I %t -fsyntax-only %t/test_multiple_pcms.c 
-fmodule-file=%t/E.pcm -fmodule-file=%t/J.pcm
+
+// Test 5: deep transitive, no export — Textual.h must be re-entered.
+// RUN: %clang_cc1 -fmodules -I %t -fsyntax-only %t/test_deep_no_export.c 
-fmodule-file=%t/F.pcm
diff --git a/clang/test/Modules/import-textual-skip-visible.c 
b/clang/test/Modules/import-textual-skip-visible.c
new file mode 100644
index 0000000000000..c107472128dbd
--- /dev/null
+++ b/clang/test/Modules/import-textual-skip-visible.c
@@ -0,0 +1,186 @@
+// Test that #import does NOT re-enter a textual header when the header was
+// included by a module that IS visible. The key invariant:
+//
+//   If a textual header was included by module M, and M is visible in the
+//   current TU (directly or via transitive export), then #import of that
+//   header MUST be skipped (the file is already included).
+//
+// This is the complement of import-textual-reenter-not-visible.c, which
+// verifies the opposite: non-visible modules do NOT suppress re-entry.
+
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+
+// Textual.h has no include guard. If #import incorrectly re-enters the file,
+// the duplicate typedef will produce a compilation error.
+//--- Textual.h
+typedef int textual_type_t;
+#define MACRO_TEXTUAL 1
+
+// =========================================================================
+// Module E: E1 includes E2, E2 includes Textual.h. E1 has export *.
+// =========================================================================
+//--- E/E1.h
+#include "E2.h"
+//--- E/E2.h
+#include "Textual.h"
+//--- E/module.modulemap
+module E {
+  module E1 {
+    header "E1.h"
+    export *
+  }
+  module E2 { header "E2.h" }
+}
+
+// =========================================================================
+// Module F: F1 imports E1 cross-module. F1 has export *.
+// =========================================================================
+//--- F/F1.h
+#include "E/E1.h"
+//--- F/module.modulemap
+module F {
+  module F1 {
+    header "F1.h"
+    export *
+  }
+}
+
+// =========================================================================
+// Module G: G1 imports F1 cross-module. G1 has export *.
+// =========================================================================
+//--- G/G1.h
+#include "F/F1.h"
+//--- G/module.modulemap
+module G {
+  module G1 {
+    header "G1.h"
+    export *
+  }
+}
+
+// =========================================================================
+// Module A: A1 independently includes Textual.h (for early deserialization).
+// =========================================================================
+//--- A/A1.h
+#include "Textual.h"
+//--- A/module.modulemap
+module A {
+  module A1 { header "A1.h" }
+}
+
+// =========================================================================
+// Test 1: Sibling re-export within a single module.
+// Import E1 (export *) → E2 becomes visible → Textual.h's macro should
+// be available as a module macro without an explicit #import.
+// =========================================================================
+//--- test_sibling_reexport.c
+#import "E/E1.h"
+#ifdef MACRO_TEXTUAL
+textual_type_t val = MACRO_TEXTUAL;
+#else
+#error "MACRO_TEXTUAL should be visible via E1's re-export of E2"
+#endif
+
+// =========================================================================
+// Test 2: Cross-module transitive re-export (depth 2).
+// Import F1 (export *) → E1 (export *) → E2 visible. Macro should be
+// available without explicit #import.
+// =========================================================================
+//--- test_cross_depth2.c
+#import "F/F1.h"
+#ifdef MACRO_TEXTUAL
+textual_type_t val = MACRO_TEXTUAL;
+#else
+#error "MACRO_TEXTUAL should be visible via F1 -> E1 -> E2 export chain"
+#endif
+
+// =========================================================================
+// Test 3: Cross-module transitive re-export (depth 3).
+// Import G1 (export *) → F1 (export *) → E1 (export *) → E2 visible.
+// =========================================================================
+//--- test_cross_depth3.c
+#import "G/G1.h"
+#ifdef MACRO_TEXTUAL
+textual_type_t val = MACRO_TEXTUAL;
+#else
+#error "MACRO_TEXTUAL should be visible via G1 -> F1 -> E1 -> E2 export chain"
+#endif
+
+// =========================================================================
+// Test 4: Explicit #import of already-visible textual header is suppressed.
+// Import E1 (export *) → E2 visible → Textual.h already included.
+// A subsequent #import Textual.h should be a no-op. The typedef in
+// Textual.h (no include guard) would cause a duplicate definition error
+// if the file were incorrectly re-entered.
+// =========================================================================
+//--- test_import_suppressed.c
+#import "E/E1.h"
+#import "Textual.h"
+textual_type_t val = MACRO_TEXTUAL;
+
+// =========================================================================
+// Test 5: Cross-module #import suppression (depth 2).
+// Import F1 (export * chain) → E2 visible. Explicit #import of Textual.h
+// must be suppressed.
+// =========================================================================
+//--- test_import_suppressed_depth2.c
+#import "F/F1.h"
+#import "Textual.h"
+textual_type_t val = MACRO_TEXTUAL;
+
+// =========================================================================
+// Test 6: Cross-module #import suppression (depth 3).
+// Import G1 (export * chain) → E2 visible. Explicit #import of Textual.h
+// must be suppressed.
+// =========================================================================
+//--- test_import_suppressed_depth3.c
+#import "G/G1.h"
+#import "Textual.h"
+textual_type_t val = MACRO_TEXTUAL;
+
+// =========================================================================
+// Test 7: Early deserialization — import a different module that also
+// includes Textual.h BEFORE importing E1. This forces Textual.h's header
+// info to be deserialized (via markIncludedInModule) at a time when E2 is
+// NOT yet visible. Then import E1 (export *) → E2 becomes visible.
+// The subsequent #import Textual.h must still be suppressed.
+// =========================================================================
+//--- test_early_deser.c
+#import "A/A1.h"
+#import "E/E1.h"
+#import "Textual.h"
+textual_type_t val = MACRO_TEXTUAL;
+
+// =========================================================================
+// Build PCMs.
+// =========================================================================
+// RUN: %clang_cc1 -fmodules -I %t -emit-module %t/E/module.modulemap 
-fmodule-name=E -o %t/E.pcm
+// RUN: %clang_cc1 -fmodules -I %t -emit-module %t/F/module.modulemap 
-fmodule-name=F -fmodule-file=%t/E.pcm -o %t/F.pcm
+// RUN: %clang_cc1 -fmodules -I %t -emit-module %t/G/module.modulemap 
-fmodule-name=G -fmodule-file=%t/F.pcm -o %t/G.pcm
+// RUN: %clang_cc1 -fmodules -I %t -emit-module %t/A/module.modulemap 
-fmodule-name=A -o %t/A.pcm
+
+// =========================================================================
+// Run tests.
+// =========================================================================
+
+// Test 1: sibling re-export — macro visible without #import.
+// RUN: %clang_cc1 -fmodules -I %t -fsyntax-only %t/test_sibling_reexport.c 
-fmodule-file=%t/E.pcm
+
+// Test 2: depth-2 cross-module re-export — macro visible.
+// RUN: %clang_cc1 -fmodules -I %t -fsyntax-only %t/test_cross_depth2.c 
-fmodule-file=%t/F.pcm
+
+// Test 3: depth-3 cross-module re-export — macro visible.
+// RUN: %clang_cc1 -fmodules -I %t -fsyntax-only %t/test_cross_depth3.c 
-fmodule-file=%t/G.pcm
+
+// Test 4: explicit #import suppressed (single module).
+// RUN: %clang_cc1 -fmodules -I %t -fsyntax-only %t/test_import_suppressed.c 
-fmodule-file=%t/E.pcm
+
+// Test 5: explicit #import suppressed (depth 2).
+// RUN: %clang_cc1 -fmodules -I %t -fsyntax-only 
%t/test_import_suppressed_depth2.c -fmodule-file=%t/F.pcm
+
+// Test 6: explicit #import suppressed (depth 3).
+// RUN: %clang_cc1 -fmodules -I %t -fsyntax-only 
%t/test_import_suppressed_depth3.c -fmodule-file=%t/G.pcm
+
+// Test 7: early deserialization — #import suppressed despite prior deser.
+// RUN: %clang_cc1 -fmodules -I %t -fsyntax-only %t/test_early_deser.c 
-fmodule-file=%t/A.pcm -fmodule-file=%t/E.pcm

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to