https://github.com/ian-twilightcoder created 
https://github.com/llvm/llvm-project/pull/83660

Once a file has been `#import`'ed, it gets stamped as if it was `#pragma once` 
and will not be re-entered, even on #include. This means that any errant 
#import of a file designed to be included multiple times, such as <assert.h>, 
will incorrectly mark it as include-once and break the multiple include 
functionality. Normally this isn't a big problem, e.g. <assert.h> can't have 
its NDEBUG mode changed after the first #import, but it is still mostly 
functional. However, when clang modules are involved, this can cause the header 
to be hidden entirely.

Objective-C code most often uses #import for everything, because it's required 
for most Objective-C headers to prevent double inclusion and redeclaration 
errors. (It's rare for Objective-C headers to use macro guards or `#pragma 
once`.) The problem arises when a submodule includes a multiple-include header. 
The "already included" state is global across all modules (which is necessary 
so that non-modular headers don't get compiled into multiple translation units 
and cause redeclaration errors). If another module or the main file #import's 
the same header, it becomes invisible from then on. If the original submodule 
is not imported, the include of the header will effectively do nothing and the 
header will be invisible. The only way to actually get the header's 
declarations is to somehow figure out which submodule consumed the header, and 
import that instead. That's basically impossible since it depends on exactly 
which modules were built in which order.

#import is a poor indicator of whether a header is actually include-once, as 
the #import is external to the header it applies to, and requires that all 
inclusions correctly and consistently use #import vs #include. When modules are 
enabled, consider a header marked `textual` in its module as a stronger 
indicator of multiple-include than #import's indication of include-once. This 
will allow headers like <assert.h> to always be included when modules are 
enabled, even if #import is erroneously used somewhere.

>From 171d0e299dd676ce29583e16fdf8c3e6f3dd7925 Mon Sep 17 00:00:00 2001
From: Ian Anderson <i...@apple.com>
Date: Fri, 1 Mar 2024 22:17:09 -0800
Subject: [PATCH] [clang][modules] Headers meant to be included multiple times
 can be completely invisible in clang module builds

Once a file has been `#import`'ed, it gets stamped as if it was `#pragma once` 
and will not be re-entered, even on #include. This means that any errant 
#import of a file designed to be included multiple times, such as <assert.h>, 
will incorrectly mark it as include-once and break the multiple include 
functionality. Normally this isn't a big problem, e.g. <assert.h> can't have 
its NDEBUG mode changed after the first #import, but it is still mostly 
functional. However, when clang modules are involved, this can cause the header 
to be hidden entirely.

Objective-C code most often uses #import for everything, because it's required 
for most Objective-C headers to prevent double inclusion and redeclaration 
errors. (It's rare for Objective-C headers to use macro guards or `#pragma 
once`.) The problem arises when a submodule includes a multiple-include header. 
The "already included" state is global across all modules (which is necessary 
so that non-modular headers don't get compiled into multiple translation units 
and cause redeclaration errors). If another module or the main file #import's 
the same header, it becomes invisible from then on. If the original submodule 
is not imported, the include of the header will effectively do nothing and the 
header will be invisible. The only way to actually get the header's 
declarations is to somehow figure out which submodule consumed the header, and 
import that instead. That's basically impossible since it depends on exactly 
which modules were built in which order.

#import is a poor indicator of whether a header is actually include-once, as 
the #import is external to the header it applies to, and requires that all 
inclusions correctly and consistently use #import vs #include. When modules are 
enabled, consider a header marked `textual` in its module as a stronger 
indicator of multiple-include than #import's indication of include-once. This 
will allow headers like <assert.h> to always be included when modules are 
enabled, even if #import is erroneously used somewhere.
---
 clang/include/clang/Lex/HeaderSearch.h       |  24 ++-
 clang/lib/Lex/HeaderSearch.cpp               | 156 ++++++++++++-------
 clang/lib/Serialization/ASTReader.cpp        |   2 +-
 clang/test/Modules/builtin-import.mm         |   2 +
 clang/test/Modules/import-textual-noguard.mm |   6 +-
 clang/test/Modules/import-textual.mm         |   2 +
 clang/test/Modules/multiple-import.m         |  43 +++++
 7 files changed, 172 insertions(+), 63 deletions(-)
 create mode 100644 clang/test/Modules/multiple-import.m

diff --git a/clang/include/clang/Lex/HeaderSearch.h 
b/clang/include/clang/Lex/HeaderSearch.h
index 705dcfa8aacc3f..bf8981b94d1b57 100644
--- a/clang/include/clang/Lex/HeaderSearch.h
+++ b/clang/include/clang/Lex/HeaderSearch.h
@@ -78,11 +78,17 @@ struct HeaderFileInfo {
   LLVM_PREFERRED_TYPE(bool)
   unsigned External : 1;
 
-  /// Whether this header is part of a module.
+  /// Whether this header is part of and built with a module.
   LLVM_PREFERRED_TYPE(bool)
   unsigned isModuleHeader : 1;
 
-  /// Whether this header is part of the module that we are building.
+  /// Whether this header is a textual header in a module (isModuleHeader will
+  /// be false)
+  LLVM_PREFERRED_TYPE(bool)
+  unsigned isTextualModuleHeader : 1;
+
+  /// Whether this header is part of and built with the module that we are
+  /// building.
   LLVM_PREFERRED_TYPE(bool)
   unsigned isCompilingModuleHeader : 1;
 
@@ -128,13 +134,20 @@ struct HeaderFileInfo {
 
   HeaderFileInfo()
       : isImport(false), isPragmaOnce(false), DirInfo(SrcMgr::C_User),
-        External(false), isModuleHeader(false), isCompilingModuleHeader(false),
-        Resolved(false), IndexHeaderMapHeader(false), IsValid(false)  {}
+        External(false), isModuleHeader(false), isTextualModuleHeader(false),
+        isCompilingModuleHeader(false), Resolved(false),
+        IndexHeaderMapHeader(false), IsValid(false) {}
 
   /// Retrieve the controlling macro for this header file, if
   /// any.
   const IdentifierInfo *
   getControllingMacro(ExternalPreprocessorSource *External);
+
+  /// Update the module membership bits based on the header role.
+  ///
+  /// isModuleHeader will potentially be set, but not cleared.
+  /// isTextualModuleHeader will be set or cleared based on the role update.
+  void mergeModuleMembership(ModuleMap::ModuleHeaderRole Role);
 };
 
 /// An external source of header file information, which may supply
@@ -522,6 +535,9 @@ class HeaderSearch {
   ///
   /// \return false if \#including the file will have no effect or true
   /// if we should include it.
+  ///
+  /// \param M The module to which `File` belongs (this should usually be the
+  /// SuggestedModule returned by LookupFile/LookupSubframeworkHeader)
   bool ShouldEnterIncludeFile(Preprocessor &PP, FileEntryRef File,
                               bool isImport, bool ModulesEnabled, Module *M,
                               bool &IsFirstIncludeOfFile);
diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
index fcc2b56df166b8..fb6ec456dd277e 100644
--- a/clang/lib/Lex/HeaderSearch.cpp
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -1307,6 +1307,23 @@ OptionalFileEntryRef 
HeaderSearch::LookupSubframeworkHeader(
 // File Info Management.
 
//===----------------------------------------------------------------------===//
 
+static void mergeHeaderFileInfoModuleBits(HeaderFileInfo &HFI,
+                                          bool isModuleHeader,
+                                          bool isTextualModuleHeader) {
+  assert((!isModuleHeader || !isTextualModuleHeader) &&
+         "A header can't build with a module and be textual at the same time");
+  HFI.isModuleHeader |= isModuleHeader;
+  if (HFI.isModuleHeader)
+    HFI.isTextualModuleHeader = false;
+  else
+    HFI.isTextualModuleHeader |= isTextualModuleHeader;
+}
+
+void HeaderFileInfo::mergeModuleMembership(ModuleMap::ModuleHeaderRole Role) {
+  mergeHeaderFileInfoModuleBits(*this, ModuleMap::isModular(Role),
+                                (Role & ModuleMap::TextualHeader));
+}
+
 /// Merge the header file info provided by \p OtherHFI into the current
 /// header file info (\p HFI)
 static void mergeHeaderFileInfo(HeaderFileInfo &HFI,
@@ -1315,7 +1332,8 @@ static void mergeHeaderFileInfo(HeaderFileInfo &HFI,
 
   HFI.isImport |= OtherHFI.isImport;
   HFI.isPragmaOnce |= OtherHFI.isPragmaOnce;
-  HFI.isModuleHeader |= OtherHFI.isModuleHeader;
+  mergeHeaderFileInfoModuleBits(HFI, OtherHFI.isModuleHeader,
+                                OtherHFI.isTextualModuleHeader);
 
   if (!HFI.ControllingMacro && !HFI.ControllingMacroID) {
     HFI.ControllingMacro = OtherHFI.ControllingMacro;
@@ -1403,11 +1421,9 @@ bool 
HeaderSearch::isFileMultipleIncludeGuarded(FileEntryRef File) const {
 void HeaderSearch::MarkFileModuleHeader(FileEntryRef FE,
                                         ModuleMap::ModuleHeaderRole Role,
                                         bool isCompilingModuleHeader) {
-  bool isModularHeader = ModuleMap::isModular(Role);
-
   // Don't mark the file info as non-external if there's nothing to change.
   if (!isCompilingModuleHeader) {
-    if (!isModularHeader)
+    if ((Role & ModuleMap::ExcludedHeader))
       return;
     auto *HFI = getExistingFileInfo(FE);
     if (HFI && HFI->isModuleHeader)
@@ -1415,82 +1431,109 @@ void HeaderSearch::MarkFileModuleHeader(FileEntryRef 
FE,
   }
 
   auto &HFI = getFileInfo(FE);
-  HFI.isModuleHeader |= isModularHeader;
-  HFI.isCompilingModuleHeader |= isCompilingModuleHeader;
+  HFI.mergeModuleMembership(Role);
+  HFI.isCompilingModuleHeader |= (HFI.isModuleHeader && 
isCompilingModuleHeader);
 }
 
 bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP,
                                           FileEntryRef File, bool isImport,
                                           bool ModulesEnabled, Module *M,
                                           bool &IsFirstIncludeOfFile) {
-  ++NumIncluded; // Count # of attempted #includes.
-
+  // An include file should be entered if either:
+  // 1. This is the first include of the file.
+  // 2. This file can be included multiple times, that is it's not an
+  //    "include-once" file.
+  //
+  // Include-once is controlled by these preprocessor directives.
+  //
+  // #pragma once
+  // This directive is in the include file, and marks it as an include-once
+  // file.
+  //
+  // #import <file>
+  // This directive is in the includer, and indicates that the include file
+  // should only be entered if this is the first include.
+  ++NumIncluded;
   IsFirstIncludeOfFile = false;
-
-  // Get information about this file.
   HeaderFileInfo &FileInfo = getFileInfo(File);
 
-  // FIXME: this is a workaround for the lack of proper modules-aware support
-  // for #import / #pragma once
-  auto TryEnterImported = [&]() -> bool {
+  auto MaybeReenterImportedFile = [&]() -> bool {
     if (!ModulesEnabled)
       return false;
+    // Modules add a wrinkle though: what's included isn't necessarily visible.
+    // Consider this module.
+    // module Example {
+    //   module A { header "a.h" export * }
+    //   module B { header "b.h" export * }
+    // }
+    // b.h includes c.h. The main file includes a.h, which will trigger a 
module
+    // build of Example, and c.h will be included. However, c.h isn't visible 
to
+    // the main file. Normally this is fine, the main file can just include c.h
+    // if it needs it. If c.h is in a module, the include will translate into a
+    // module import, this function will be skipped, and everything will work 
as
+    // expected. However, if c.h is not in a module (or is `textual`), then 
this
+    // function will run. If c.h is include-once, it will not be entered from
+    // the main file and it will still not be visible.
+
     // Ensure FileInfo bits are up to date.
     ModMap.resolveHeaderDirectives(File);
-    // Modules with builtins are special; multiple modules use builtins as
-    // modular headers, example:
-    //
-    //    module stddef { header "stddef.h" export * }
-    //
-    // After module map parsing, this expands to:
-    //
-    //    module stddef {
-    //      header "/path_to_builtin_dirs/stddef.h"
-    //      textual "stddef.h"
-    //    }
+
+    // This brings up a subtlety of #import - it's not a very good indicator of
+    // include-once. Developers are often unaware of the difference between
+    // #include and #import, and tend to use one or the other indiscrimiately.
+    // In order to support #include on include-once headers that lack macro
+    // guards and `#pragma once` (which is the vast majority of Objective-C
+    // headers), if a file is ever included with #import, it's marked as
+    // isImport in the HeaderFileInfo and treated as include-once. This allows
+    // #include to work in Objective-C.
+    // #include <Foundation/Foundation.h>
+    // #include <Foundation/NSString.h>
+    // Foundation.h has an #import of NSString.h, and so the second #include is
+    // skipped even though NSString.h has no `#pragma once` and no macro guard.
     //
-    // It's common that libc++ and system modules will both define such
-    // submodules. Make sure cached results for a builtin header won't
-    // prevent other builtin modules from potentially entering the builtin
-    // header. Note that builtins are header guarded and the decision to
-    // actually enter them is postponed to the controlling macros logic below.
-    bool TryEnterHdr = false;
-    if (FileInfo.isCompilingModuleHeader && FileInfo.isModuleHeader)
-      TryEnterHdr = ModMap.isBuiltinHeader(File);
-
-    // Textual headers can be #imported from different modules. Since ObjC
-    // headers find in the wild might rely only on #import and do not contain
-    // controlling macros, be conservative and only try to enter textual 
headers
-    // if such macro is present.
-    if (!FileInfo.isModuleHeader &&
-        FileInfo.getControllingMacro(ExternalLookup))
-      TryEnterHdr = true;
-    return TryEnterHdr;
+    // However, this helpfulness causes problems with modules. If c.h is not an
+    // include-once file, but something included it with #import anyway (as is
+    // typical in Objective-C code), this include will be skipped and c.h will
+    // not be visible. If the file is not `#pragma once`, consider it not
+    // include-once if it is a `textual` header in a module.
+    return !FileInfo.isPragmaOnce && FileInfo.isTextualModuleHeader;
+    // If the include file has a macro guard, then it might still not be
+    // re-entered if the controlling macro is visibly defined. e.g. another
+    // header in the module being built included this file and local submodule
+    // visibility is not enabled.
+
+    // It might be tempting to re-enter the include-once file if it's not
+    // visible in an attempt to make it visible. However this will still cause
+    // redeclaration errors against the known-but-not-visible declarations. The
+    // include file not being visible will most likely cause "undefined x"
+    // errors, but at least there's a slim chance of compilation succeeding.
   };
 
-  // If this is a #import directive, check that we have not already imported
-  // this header.
   if (isImport) {
-    // If this has already been imported, don't import it again.
+    // As discussed above, record that this file was ever `#import`ed, and 
treat
+    // it as an include-once file from here out.
     FileInfo.isImport = true;
-
-    // Has this already been #import'ed or #include'd?
-    if (PP.alreadyIncluded(File) && !TryEnterImported())
+    if (PP.alreadyIncluded(File) && !MaybeReenterImportedFile())
       return false;
   } else {
-    // Otherwise, if this is a #include of a file that was previously #import'd
-    // or if this is the second #include of a #pragma once file, ignore it.
-    if ((FileInfo.isPragmaOnce || FileInfo.isImport) && !TryEnterImported())
+    // isPragmaOnce and isImport are only set after the file has been included
+    // at least once. If either are set then this is a repeat #include of an
+    // include-once file.
+    if (FileInfo.isPragmaOnce ||
+        (FileInfo.isImport && !MaybeReenterImportedFile()))
       return false;
   }
 
-  // Next, check to see if the file is wrapped with #ifndef guards.  If so, and
-  // if the macro that guards it is defined, we know the #include has no 
effect.
-  if (const IdentifierInfo *ControllingMacro
-      = FileInfo.getControllingMacro(ExternalLookup)) {
+  // As a final optimization, check for a macro guard and skip entering the 
file
+  // if the controlling macro is defined. The macro guard will effectively 
erase
+  // the file's contents, and the include would have no effect other than to
+  // waste time opening and reading a file.
+  if (const IdentifierInfo *ControllingMacro =
+          FileInfo.getControllingMacro(ExternalLookup)) {
     // If the header corresponds to a module, check whether the macro is 
already
-    // defined in that module rather than checking in the current set of 
visible
-    // modules.
+    // defined in that module rather than checking all visible modules. This is
+    // mainly to cover corner cases where the same controlling macro is used in
+    // different files in multiple modules.
     if (M ? PP.isMacroDefinedInLocalModule(ControllingMacro, M)
           : PP.isMacroDefined(ControllingMacro)) {
       ++NumMultiIncludeFileOptzn;
@@ -1499,7 +1542,6 @@ bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor 
&PP,
   }
 
   IsFirstIncludeOfFile = PP.markIncluded(File);
-
   return true;
 }
 
diff --git a/clang/lib/Serialization/ASTReader.cpp 
b/clang/lib/Serialization/ASTReader.cpp
index 683a076e6bc399..862618eb9ed7f3 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -2094,7 +2094,7 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const 
unsigned char *d,
       Module::Header H = {std::string(key.Filename), "", *FE};
       ModMap.addHeader(Mod, H, HeaderRole, /*Imported=*/true);
     }
-    HFI.isModuleHeader |= ModuleMap::isModular(HeaderRole);
+    HFI.mergeModuleMembership(HeaderRole);
   }
 
   // This HeaderFileInfo was externally loaded.
diff --git a/clang/test/Modules/builtin-import.mm 
b/clang/test/Modules/builtin-import.mm
index 8a27cb358484c9..52db9c15803ce5 100644
--- a/clang/test/Modules/builtin-import.mm
+++ b/clang/test/Modules/builtin-import.mm
@@ -1,4 +1,6 @@
 // RUN: rm -rf %t
+// RUN: %clang_cc1 -fsyntax-only -nobuiltininc -nostdinc++ -isysroot 
%S/Inputs/libc-libcxx/sysroot -isystem 
%S/Inputs/libc-libcxx/sysroot/usr/include/c++/v1 -isystem 
%S/Inputs/libc-libcxx/sysroot/usr/include -std=c++11 -fmodules 
-fimplicit-module-maps -fmodules-cache-path=%t -x objective-c++ %s
+// RUN: rm -rf %t
 // RUN: %clang_cc1 -fsyntax-only -nobuiltininc -nostdinc++ -isysroot 
%S/Inputs/libc-libcxx/sysroot -isystem 
%S/Inputs/libc-libcxx/sysroot/usr/include/c++/v1 -isystem 
%S/Inputs/libc-libcxx/sysroot/usr/include -std=c++11 -fmodules 
-fimplicit-module-maps -fmodules-cache-path=%t -x objective-c++ 
-fmodules-local-submodule-visibility %s
 
 #include <stdio.h>
diff --git a/clang/test/Modules/import-textual-noguard.mm 
b/clang/test/Modules/import-textual-noguard.mm
index dd124b6609d000..ffc506117f519d 100644
--- a/clang/test/Modules/import-textual-noguard.mm
+++ b/clang/test/Modules/import-textual-noguard.mm
@@ -1,7 +1,11 @@
 // RUN: rm -rf %t
+// RUN: %clang_cc1 -fsyntax-only -std=c++11 -fmodules -fimplicit-module-maps 
-I%S/Inputs/import-textual/M2 -fmodules-cache-path=%t -x objective-c++ %s 
-verify
+// RUN: rm -rf %t
 // RUN: %clang_cc1 -fsyntax-only -std=c++11 -fmodules -fimplicit-module-maps 
-I%S/Inputs/import-textual/M2 -fmodules-cache-path=%t -x objective-c++ 
-fmodules-local-submodule-visibility %s -verify
 
-#include "A/A.h" // expected-error {{could not build module 'M'}}
+// expected-no-diagnostics
+
+#include "A/A.h"
 #include "B/B.h"
 
 typedef aint xxx;
diff --git a/clang/test/Modules/import-textual.mm 
b/clang/test/Modules/import-textual.mm
index 6593239d7fd709..94a6aa448cdc2a 100644
--- a/clang/test/Modules/import-textual.mm
+++ b/clang/test/Modules/import-textual.mm
@@ -1,4 +1,6 @@
 // RUN: rm -rf %t
+// RUN: %clang_cc1 -fsyntax-only -std=c++11 -fmodules -fimplicit-module-maps 
-I%S/Inputs/import-textual/M -fmodules-cache-path=%t -x objective-c++ %s -verify
+// RUN: rm -rf %t
 // RUN: %clang_cc1 -fsyntax-only -std=c++11 -fmodules -fimplicit-module-maps 
-I%S/Inputs/import-textual/M -fmodules-cache-path=%t -x objective-c++ 
-fmodules-local-submodule-visibility %s -verify
 
 // expected-no-diagnostics
diff --git a/clang/test/Modules/multiple-import.m 
b/clang/test/Modules/multiple-import.m
new file mode 100644
index 00000000000000..24b0f50cd3433c
--- /dev/null
+++ b/clang/test/Modules/multiple-import.m
@@ -0,0 +1,43 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+// RUN: %clang_cc1 -fmodules-cache-path=%t/no-lsv -fmodules 
-fimplicit-module-maps -I%t %t/multiple-imports.m -verify
+// RUN: %clang_cc1 -fmodules-cache-path=%t/lsv -fmodules 
-fimplicit-module-maps -fmodules-local-submodule-visibility -I%t 
%t/multiple-imports.m -verify
+
+//--- multiple-imports.m
+// expected-no-diagnostics
+#import <one.h>
+#import <assert.h>
+void test(void) {
+  assert(0);
+}
+
+//--- module.modulemap
+module Submodules [system] {
+  module one {
+    header "one.h"
+    export *
+  }
+  module two {
+    header "two.h"
+    export *
+  }
+}
+
+module libc [system] {
+  textual header "assert.h"
+}
+
+//--- one.h
+#ifndef one_h
+#define one_h
+#endif
+
+//--- two.h
+#ifndef two_h
+#define two_h
+#include <assert.h>
+#endif
+
+//--- assert.h
+#undef assert
+#define assert(expression) ((void)0)

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to