Author: Sam McCall
Date: 2024-06-19T15:29:05+02:00
New Revision: 5574a5894fdb7f9a46a4fbe6c8970fd39890dc9b

URL: 
https://github.com/llvm/llvm-project/commit/5574a5894fdb7f9a46a4fbe6c8970fd39890dc9b
DIFF: 
https://github.com/llvm/llvm-project/commit/5574a5894fdb7f9a46a4fbe6c8970fd39890dc9b.diff

LOG: [include-cleaner] don't consider the associated header unused (#67228)


Loosely, the "associated header" of `foo.cpp` is `foo.h`.
It should be included, many styles include it first.

So far we haven't special cased it in any way, and require this include
to be justified. e.g. if foo.cpp defines a function declared in foo.h,
then the #include is allowed to check these declarations match.

However this doesn't really align with what users want:
- people reasonably want to include the associated header for the
  side-effect of validating that it compiles.
  In the degenerate case, `lib.cpp`is just `#include "lib.h"` (see bug)
- That `void foo(){}` IWYU-uses `void foo();` is a bit artificial, and
  most users won't internalize this. Instead they'll stick with the
  simpler model "include the header that defines your API".
  In the rare cases where these give different answers[1], our current
  behavior is a puzzling special case from the user POV.
  It is more user-friendly to accept both models.
- even where this diagnostic is a true positive (defs don't match header
  decls) the diagnostic does not communicate this usefully.

Fixes https://github.com/llvm/llvm-project/issues/67140

[1] Example of an associated header that's not IWYU-used:
```
// http.h
inline URL buildHttpURL(string host, int port, string path) {
  return "http://"; + host + ":" + port + "/" + path;
}
// http.cpp
class HTTPURLHandler : URLHandler { ... };
REGISTER_URL_HANDLER("http", HTTPURLHandler);
```

Added: 
    

Modified: 
    clang-tools-extra/include-cleaner/lib/Record.cpp
    clang-tools-extra/include-cleaner/unittests/RecordTest.cpp

Removed: 
    


################################################################################
diff  --git a/clang-tools-extra/include-cleaner/lib/Record.cpp 
b/clang-tools-extra/include-cleaner/lib/Record.cpp
index 78a4df6cc40ea..6b5be956ec108 100644
--- a/clang-tools-extra/include-cleaner/lib/Record.cpp
+++ b/clang-tools-extra/include-cleaner/lib/Record.cpp
@@ -34,6 +34,7 @@
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem/UniqueID.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/StringSaver.h"
 #include <algorithm>
 #include <assert.h>
@@ -180,7 +181,9 @@ class PragmaIncludes::RecordPragma : public PPCallbacks, 
public CommentHandler {
   RecordPragma(const Preprocessor &P, PragmaIncludes *Out)
       : SM(P.getSourceManager()), HeaderInfo(P.getHeaderSearchInfo()), 
Out(Out),
         Arena(std::make_shared<llvm::BumpPtrAllocator>()),
-        UniqueStrings(*Arena) {}
+        UniqueStrings(*Arena),
+        MainFileStem(llvm::sys::path::stem(
+            SM.getNonBuiltinFilenameForID(SM.getMainFileID()).value_or(""))) {}
 
   void FileChanged(SourceLocation Loc, FileChangeReason Reason,
                    SrcMgr::CharacteristicKind FileType,
@@ -228,8 +231,9 @@ class PragmaIncludes::RecordPragma : public PPCallbacks, 
public CommentHandler {
       }
     if (!IncludedHeader && File)
       IncludedHeader = *File;
-    checkForExport(HashFID, HashLine, std::move(IncludedHeader), File);
+    checkForExport(HashFID, HashLine, IncludedHeader, File);
     checkForKeep(HashLine, File);
+    checkForDeducedAssociated(IncludedHeader);
   }
 
   void checkForExport(FileID IncludingFile, int HashLine,
@@ -269,6 +273,27 @@ class PragmaIncludes::RecordPragma : public PPCallbacks, 
public CommentHandler {
       KeepStack.pop_back(); // Pop immediately for single-line keep pragma.
   }
 
+  // Consider marking H as the "associated header" of the main file.
+  //
+  // Our heuristic:
+  // - it must be the first #include in the main file
+  // - it must have the same name stem as the main file (foo.h and foo.cpp)
+  // (IWYU pragma: associated is also supported, just not by this function).
+  //
+  // We consider the associated header as if it had a keep pragma.
+  // (Unlike IWYU, we don't treat #includes inside the associated header as if
+  // they were written in the main file.)
+  void checkForDeducedAssociated(std::optional<Header> H) {
+    namespace path = llvm::sys::path;
+    if (!InMainFile || SeenAssociatedCandidate)
+      return;
+    SeenAssociatedCandidate = true; // Only the first #include is our 
candidate.
+    if (!H || H->kind() != Header::Physical)
+      return;
+    if (path::stem(H->physical().getName(), path::Style::posix) == 
MainFileStem)
+      Out->ShouldKeep.insert(H->physical().getUniqueID());
+  }
+
   bool HandleComment(Preprocessor &PP, SourceRange Range) override {
     auto &SM = PP.getSourceManager();
     auto Pragma =
@@ -280,7 +305,9 @@ class PragmaIncludes::RecordPragma : public PPCallbacks, 
public CommentHandler {
     int CommentLine = SM.getLineNumber(CommentFID, CommentOffset);
 
     if (InMainFile) {
-      if (Pragma->starts_with("keep")) {
+      if (Pragma->starts_with("keep") ||
+          // Limited support for associated headers: never consider unused.
+          Pragma->starts_with("associated")) {
         KeepStack.push_back({CommentLine, false});
       } else if (Pragma->starts_with("begin_keep")) {
         KeepStack.push_back({CommentLine, true});
@@ -342,6 +369,9 @@ class PragmaIncludes::RecordPragma : public PPCallbacks, 
public CommentHandler {
   std::shared_ptr<llvm::BumpPtrAllocator> Arena;
   /// Intern table for strings. Contents are on the arena.
   llvm::StringSaver UniqueStrings;
+  // Used when deducing associated header.
+  llvm::StringRef MainFileStem;
+  bool SeenAssociatedCandidate = false;
 
   struct ExportPragma {
     // The line number where we saw the begin_exports or export pragma.

diff  --git a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp 
b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp
index d1f7590b22268..1a5996e5df284 100644
--- a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp
+++ b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp
@@ -316,7 +316,10 @@ class PragmaIncludeTest : public ::testing::Test {
     };
   }
 
-  TestAST build() { return TestAST(Inputs); }
+  TestAST build(bool ResetPragmaIncludes = true) {
+    if (ResetPragmaIncludes) PI = PragmaIncludes();
+    return TestAST(Inputs);
+  }
 
   void createEmptyFiles(llvm::ArrayRef<StringRef> FileNames) {
     for (llvm::StringRef File : FileNames)
@@ -379,6 +382,56 @@ TEST_F(PragmaIncludeTest, IWYUKeep) {
   EXPECT_TRUE(PI.shouldKeep(FM.getFile("std/set").get()));
 }
 
+TEST_F(PragmaIncludeTest, AssociatedHeader) {
+  createEmptyFiles({"foo/main.h", "bar/main.h", "bar/other.h", "std/vector"});
+  auto IsKeep = [&](llvm::StringRef Name, TestAST &AST) {
+    return PI.shouldKeep(AST.fileManager().getFile(Name).get());
+  };
+
+  Inputs.FileName = "main.cc";
+  Inputs.ExtraArgs.push_back("-isystemstd");
+  {
+    Inputs.Code = R"cpp(
+      #include "foo/main.h"
+      #include "bar/main.h"
+    )cpp";
+    auto AST = build();
+    EXPECT_TRUE(IsKeep("foo/main.h", AST));
+    EXPECT_FALSE(IsKeep("bar/main.h", AST)) << "not first include";
+  }
+
+  {
+    Inputs.Code = R"cpp(
+      #include "bar/other.h"
+      #include "bar/main.h"
+    )cpp";
+    auto AST = build();
+    EXPECT_FALSE(IsKeep("bar/other.h", AST));
+    EXPECT_FALSE(IsKeep("bar/main.h", AST)) << "not first include";
+  }
+
+  {
+    Inputs.Code = R"cpp(
+      #include "foo/main.h"
+      #include "bar/other.h" // IWYU pragma: associated
+      #include <vector> // IWYU pragma: associated
+    )cpp";
+    auto AST = build();
+    EXPECT_TRUE(IsKeep("foo/main.h", AST));
+    EXPECT_TRUE(IsKeep("bar/other.h", AST));
+    EXPECT_TRUE(IsKeep("std/vector", AST));
+  }
+
+  Inputs.FileName = "vector.cc";
+  {
+    Inputs.Code = R"cpp(
+      #include <vector>
+    )cpp";
+    auto AST = build();
+    EXPECT_FALSE(IsKeep("std/vector", AST)) << "stdlib is not associated";
+  }
+}
+
 TEST_F(PragmaIncludeTest, IWYUPrivate) {
   Inputs.Code = R"cpp(
     #include "public.h"
@@ -577,7 +630,7 @@ TEST_F(PragmaIncludeTest, OutlivesFMAndSM) {
   Inputs.MakeAction = nullptr; // Don't populate PI anymore.
 
   // Now this build gives us a new File&Source Manager.
-  TestAST Processed = build();
+  TestAST Processed = build(/*ResetPragmaIncludes=*/false);
   auto &FM = Processed.fileManager();
   auto PrivateFE = FM.getFile("private.h");
   assert(PrivateFE);
@@ -610,7 +663,7 @@ TEST_F(PragmaIncludeTest, CanRecordManyTimes) {
   // any IWYU pragmas. Make sure strings from previous recordings are still
   // alive.
   Inputs.Code = "";
-  build();
+  build(/*ResetPragmaIncludes=*/false);
   EXPECT_EQ(Public, "\"public.h\"");
 }
 } // namespace


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to