llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-tidy @llvm/pr-subscribers-clang-tools-extra Author: Daniil Dudkin (unterumarmung) <details> <summary>Changes</summary> Add a `FragmentHeaders` option to `misc-include-cleaner`. Direct includes of matching headers are treated as fragments of the main file for usage scanning, while diagnostics remain anchored to the main file. Extend `include-cleaner::walkUsed` with a `FileID` predicate so callers can define main-file semantics (used by clang-tidy to include fragment files). Macro references remain main-file-only. Add tests for fragment behavior, generated-path matching, non-recursive includes, diagnostic anchoring, and macro uses. Update docs accordingly. --- Patch is 20.43 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/180282.diff 6 Files Affected: - (modified) clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.cpp (+106-7) - (modified) clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h (+5-1) - (modified) clang-tools-extra/docs/clang-tidy/checks/misc/include-cleaner.rst (+24-1) - (modified) clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h (+8) - (modified) clang-tools-extra/include-cleaner/lib/Analysis.cpp (+14-1) - (modified) clang-tools-extra/unittests/clang-tidy/IncludeCleanerTest.cpp (+174) ``````````diff diff --git a/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.cpp b/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.cpp index 558c368901f1c..1d49efb64e518 100644 --- a/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.cpp @@ -32,10 +32,12 @@ #include "clang/Tooling/Inclusions/StandardLibrary.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem/UniqueID.h" #include "llvm/Support/Path.h" #include "llvm/Support/Regex.h" #include <optional> @@ -51,6 +53,23 @@ struct MissingIncludeInfo { include_cleaner::SymbolReference SymRef; include_cleaner::Header Missing; }; + +llvm::SmallString<128> normalizePath(llvm::StringRef Path) { + namespace path = llvm::sys::path; + + llvm::SmallString<128> P = Path; + path::remove_dots(P, /*remove_dot_dot=*/true); + path::native(P, path::Style::posix); + while (!P.empty() && P.back() == '/') + P.pop_back(); + return P; +} + +bool matchesAnyRegex(llvm::ArrayRef<llvm::Regex> Regexes, + llvm::StringRef Path) { + return llvm::any_of(Regexes, + [&](const llvm::Regex &R) { return R.match(Path); }); +} } // namespace IncludeCleanerCheck::IncludeCleanerCheck(StringRef Name, @@ -61,6 +80,9 @@ IncludeCleanerCheck::IncludeCleanerCheck(StringRef Name, DeduplicateFindings(Options.get("DeduplicateFindings", true)), UnusedIncludes(Options.get("UnusedIncludes", true)), MissingIncludes(Options.get("MissingIncludes", true)) { + for (StringRef Pattern : + utils::options::parseStringList(Options.get("FragmentHeaders", ""))) + FragmentHeaderPatterns.push_back(Pattern.str()); for (const auto &Header : IgnoreHeaders) { if (!llvm::Regex{Header}.isValid()) configurationDiag("Invalid ignore headers regex '%0'") << Header; @@ -69,6 +91,12 @@ IncludeCleanerCheck::IncludeCleanerCheck(StringRef Name, HeaderSuffix += "$"; IgnoreHeadersRegex.emplace_back(HeaderSuffix); } + for (const auto &Pattern : FragmentHeaderPatterns) { + llvm::Regex CompiledRegex(Pattern); + if (!CompiledRegex.isValid()) + configurationDiag("Invalid fragment headers regex '%0'") << Pattern; + FragmentHeaderRegexes.push_back(std::move(CompiledRegex)); + } if (UnusedIncludes == false && MissingIncludes == false) this->configurationDiag("The check 'misc-include-cleaner' will not " @@ -79,6 +107,12 @@ IncludeCleanerCheck::IncludeCleanerCheck(StringRef Name, void IncludeCleanerCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { Options.store(Opts, "IgnoreHeaders", utils::options::serializeStringList(IgnoreHeaders)); + llvm::SmallVector<StringRef> FragmentHeaderRefs; + FragmentHeaderRefs.reserve(FragmentHeaderPatterns.size()); + for (const auto &Pattern : FragmentHeaderPatterns) + FragmentHeaderRefs.push_back(Pattern); + Options.store(Opts, "FragmentHeaders", + utils::options::serializeStringList(FragmentHeaderRefs)); Options.store(Opts, "DeduplicateFindings", DeduplicateFindings); Options.store(Opts, "UnusedIncludes", UnusedIncludes); Options.store(Opts, "MissingIncludes", MissingIncludes); @@ -123,20 +157,69 @@ void IncludeCleanerCheck::check(const MatchFinder::MatchResult &Result) { const FileEntry *MainFile = SM->getFileEntryForID(SM->getMainFileID()); llvm::DenseSet<const include_cleaner::Include *> Used; std::vector<MissingIncludeInfo> Missing; - llvm::SmallVector<Decl *> MainFileDecls; + // Include-cleaner normally limits analysis to main-file roots (see + // https://clangd.llvm.org/design/include-cleaner). Some generated headers + // (e.g. TableGen .inc/.def) are fragments of the main file. When configured, + // treat direct includes of such headers as part of the main file for usage + // scanning, while keeping diagnostics anchored to the main file. + // Use UniqueID for stable membership across FileEntry instances. + llvm::DenseSet<llvm::sys::fs::UniqueID> FragmentFiles; + if (!FragmentHeaderRegexes.empty()) { + for (const auto &Inc : RecordedPreprocessor.Includes.all()) { + if (!SM->isWrittenInMainFile(SM->getExpansionLoc(Inc.HashLocation))) + continue; + if (!Inc.Resolved) + continue; + llvm::SmallString<128> ResolvedPath = + normalizePath(Inc.Resolved->getName()); + bool IsFragment = matchesAnyRegex(FragmentHeaderRegexes, ResolvedPath); + if (!IsFragment) { + // Fall back to matching the spelled include for virtual paths. + llvm::SmallString<128> SpelledPath = normalizePath(Inc.Spelled); + if (!SpelledPath.empty()) + IsFragment = matchesAnyRegex(FragmentHeaderRegexes, SpelledPath); + } + if (IsFragment) + FragmentFiles.insert(Inc.Resolved->getUniqueID()); + } + } + auto IsAnalyzedDecl = [&](Decl *D) { + SourceLocation Loc = D->getLocation(); + if (Loc.isInvalid()) + return false; + SourceLocation ExpandedLoc = SM->getExpansionLoc(Loc); + FileID FID = SM->getFileID(ExpandedLoc); + if (FID == SM->getMainFileID()) + return true; + if (FragmentFiles.empty()) + return false; + if (auto FE = SM->getFileEntryRefForID(FID)) + return FragmentFiles.contains(FE->getUniqueID()); + return false; + }; + llvm::SmallVector<Decl *> RootDecls; for (Decl *D : Result.Nodes.getNodeAs<TranslationUnitDecl>("top")->decls()) { - if (!SM->isWrittenInMainFile(SM->getExpansionLoc(D->getLocation()))) + if (!IsAnalyzedDecl(D)) continue; // FIXME: Filter out implicit template specializations. - MainFileDecls.push_back(D); + RootDecls.push_back(D); } llvm::DenseSet<include_cleaner::Symbol> SeenSymbols; OptionalDirectoryEntryRef ResourceDir = PP->getHeaderSearchInfo().getModuleMap().getBuiltinDir(); + // include-cleaner filters uses to main/preamble; extend it to fragments. + auto IsUsageLocation = [&](FileID FID) { + if (FID == SM->getMainFileID() || FID == SM->getPreambleFileID()) + return true; + if (FragmentFiles.empty()) + return false; + if (auto FE = SM->getFileEntryRefForID(FID)) + return FragmentFiles.contains(FE->getUniqueID()); + return false; + }; // FIXME: Find a way to have less code duplication between include-cleaner // analysis implementation and the below code. - walkUsed(MainFileDecls, RecordedPreprocessor.MacroReferences, &RecordedPI, - *PP, + walkUsed(RootDecls, RecordedPreprocessor.MacroReferences, &RecordedPI, *PP, [&](const include_cleaner::SymbolReference &Ref, llvm::ArrayRef<include_cleaner::Header> Providers) { // Process each symbol once to reduce noise in the findings. @@ -171,7 +254,8 @@ void IncludeCleanerCheck::check(const MatchFinder::MatchResult &Result) { Ref.RT == include_cleaner::RefType::Explicit && !shouldIgnore(Providers.front())) Missing.push_back({Ref, Providers.front()}); - }); + }, + IsUsageLocation); std::vector<const include_cleaner::Include *> Unused; for (const include_cleaner::Include &I : @@ -224,6 +308,21 @@ void IncludeCleanerCheck::check(const MatchFinder::MatchResult &Result) { FileStyle->IncludeStyle); // Deduplicate insertions when running in bulk fix mode. llvm::StringSet<> InsertedHeaders{}; + auto diagLocation = [&](SourceLocation Loc) { + SourceLocation SpellingLoc = SM->getSpellingLoc(Loc); + if (SM->isWrittenInMainFile(SpellingLoc) || FragmentFiles.empty()) + return SpellingLoc; + SourceLocation ExpandedLoc = SM->getExpansionLoc(Loc); + FileID FID = SM->getFileID(ExpandedLoc); + if (auto FE = SM->getFileEntryRefForID(FID); + FE && FragmentFiles.contains(FE->getUniqueID())) { + // Map fragment diagnostics to the include site in the main file. + SourceLocation IncludeLoc = SM->getIncludeLoc(FID); + if (IncludeLoc.isValid()) + return IncludeLoc; + } + return SM->getLocForStartOfFile(SM->getMainFileID()); + }; for (const auto &Inc : Missing) { const std::string Spelling = include_cleaner::spellHeader( {Inc.Missing, PP->getHeaderSearchInfo(), MainFile}); @@ -236,7 +335,7 @@ void IncludeCleanerCheck::check(const MatchFinder::MatchResult &Result) { llvm::StringRef{Spelling}.trim("\"<>"), Angled, tooling::IncludeDirective::Include)) { const DiagnosticBuilder DB = - diag(SM->getSpellingLoc(Inc.SymRef.RefLocation), + diag(diagLocation(Inc.SymRef.RefLocation), "no header providing \"%0\" is directly included") << Inc.SymRef.Target.name(); if (areDiagsSelfContained() || diff --git a/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h b/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h index 619d8191ab41f..c395815fee12b 100644 --- a/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h +++ b/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h @@ -20,12 +20,14 @@ #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/Preprocessor.h" #include "llvm/Support/Regex.h" +#include <string> #include <vector> namespace clang::tidy::misc { /// Checks for unused and missing includes. Generates findings only for -/// the main file of a translation unit. +/// the main file of a translation unit, optionally treating some direct +/// includes as fragments of the main file for usage scanning. /// Findings correspond to https://clangd.llvm.org/design/include-cleaner. /// /// For the user-facing documentation see: @@ -45,6 +47,7 @@ class IncludeCleanerCheck : public ClangTidyCheck { include_cleaner::PragmaIncludes RecordedPI; const Preprocessor *PP = nullptr; std::vector<StringRef> IgnoreHeaders; + std::vector<std::string> FragmentHeaderPatterns; // Whether emit only one finding per usage of a symbol. const bool DeduplicateFindings; // Whether to report unused includes. @@ -52,6 +55,7 @@ class IncludeCleanerCheck : public ClangTidyCheck { // Whether to report missing includes. const bool MissingIncludes; llvm::SmallVector<llvm::Regex> IgnoreHeadersRegex; + std::vector<llvm::Regex> FragmentHeaderRegexes; bool shouldIgnore(const include_cleaner::Header &H); }; diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc/include-cleaner.rst b/clang-tools-extra/docs/clang-tidy/checks/misc/include-cleaner.rst index 4364610787058..b550a67866f99 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/misc/include-cleaner.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/misc/include-cleaner.rst @@ -4,7 +4,8 @@ misc-include-cleaner ==================== Checks for unused and missing includes. Generates findings only for -the main file of a translation unit. +the main file of a translation unit. Optionally, direct includes can be +treated as fragments of the main file for usage scanning. Findings correspond to https://clangd.llvm.org/design/include-cleaner. Example: @@ -34,6 +35,28 @@ Options insertion/removal for all headers under the directory `foo`. Default is an empty string, no headers will be ignored. +.. option:: FragmentHeaders + + A semicolon-separated list of regexes that match against normalized resolved + include paths (POSIX-style separators). Direct includes of the main file + that match are treated as fragments of the main file for usage scanning. + This is intended for non-self-contained include fragments such as TableGen + ``.inc``/``.def`` files or generated headers. Only direct includes are + considered; includes inside fragments are not treated as fragments. + + Diagnostics remain anchored to the main file, but symbol uses inside + fragments can keep prerequisite includes in the main file from being + removed or marked missing. Note that include-cleaner does not support + ``// IWYU pragma: associated``. + + Example configuration: + + .. code-block:: yaml + + CheckOptions: + - key: misc-include-cleaner.FragmentHeaders + value: '(^|.*/)(gen-out|generated)/|.*\.(inc|def)$' + .. option:: DeduplicateFindings A boolean that controls whether the check should deduplicate findings for the diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h index c3241763237d1..2589812c914c8 100644 --- a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h @@ -25,6 +25,7 @@ namespace clang { class SourceLocation; +class FileID; class SourceManager; class Decl; class FileEntry; @@ -60,6 +61,13 @@ void walkUsed(llvm::ArrayRef<Decl *> ASTRoots, llvm::ArrayRef<SymbolReference> MacroRefs, const PragmaIncludes *PI, const Preprocessor &PP, UsedSymbolCB CB); +/// Overload that allows customizing which FileIDs are treated as "main file". +/// The predicate is evaluated on the spelling file of a reference. +void walkUsed(llvm::ArrayRef<Decl *> ASTRoots, + llvm::ArrayRef<SymbolReference> MacroRefs, + const PragmaIncludes *PI, const Preprocessor &PP, + UsedSymbolCB CB, + llvm::function_ref<bool(FileID)> IsMainFile); struct AnalysisResults { std::vector<const Include *> Unused; diff --git a/clang-tools-extra/include-cleaner/lib/Analysis.cpp b/clang-tools-extra/include-cleaner/lib/Analysis.cpp index e48a380211af0..52e94c545e81f 100644 --- a/clang-tools-extra/include-cleaner/lib/Analysis.cpp +++ b/clang-tools-extra/include-cleaner/lib/Analysis.cpp @@ -54,6 +54,18 @@ void walkUsed(llvm::ArrayRef<Decl *> ASTRoots, const PragmaIncludes *PI, const Preprocessor &PP, UsedSymbolCB CB) { const auto &SM = PP.getSourceManager(); + // Default behavior: only main file and preamble are considered for uses. + walkUsed(ASTRoots, MacroRefs, PI, PP, CB, [&](FileID FID) { + return FID == SM.getMainFileID() || FID == SM.getPreambleFileID(); + }); +} + +void walkUsed(llvm::ArrayRef<Decl *> ASTRoots, + llvm::ArrayRef<SymbolReference> MacroRefs, + const PragmaIncludes *PI, const Preprocessor &PP, + UsedSymbolCB CB, + llvm::function_ref<bool(FileID)> IsMainFile) { + const auto &SM = PP.getSourceManager(); // This is duplicated in writeHTMLReport, changes should be mirrored there. tooling::stdlib::Recognizer Recognizer; for (auto *Root : ASTRoots) { @@ -71,7 +83,8 @@ void walkUsed(llvm::ArrayRef<Decl *> ASTRoots, SpellLoc = SM.getSpellingLoc(Loc); } auto FID = SM.getFileID(SpellLoc); - if (FID != SM.getMainFileID() && FID != SM.getPreambleFileID()) + // Allow callers to extend "main file" semantics (e.g. for fragments). + if (!IsMainFile(FID)) return; // FIXME: Most of the work done here is repetitive. It might be useful to // have a cache/batching. diff --git a/clang-tools-extra/unittests/clang-tidy/IncludeCleanerTest.cpp b/clang-tools-extra/unittests/clang-tidy/IncludeCleanerTest.cpp index 00576916492e1..f428ba3e3591a 100644 --- a/clang-tools-extra/unittests/clang-tidy/IncludeCleanerTest.cpp +++ b/clang-tools-extra/unittests/clang-tidy/IncludeCleanerTest.cpp @@ -339,6 +339,180 @@ TEST(IncludeCleanerCheckTest, UnusedIncludes) { } } +TEST(IncludeCleanerCheckTest, FragmentHeadersPreserveIncludes) { + const std::string Fragment = "gen.inc"; + const std::string Code = llvm::formatv(R"( +#include <vector> +#include "{0}" + +Gen G; +)", + Fragment) + .str(); + const char *FragmentCode = R"( +struct Gen { + std::vector<int> Values; +}; +)"; + const char *VectorHeader = R"( +#pragma once +namespace std { +template <typename T> +struct vector {}; +} // namespace std +)"; + + { + std::vector<ClangTidyError> Errors; + runCheckOnCode<IncludeCleanerCheck>( + Code, &Errors, "file.cpp", {}, ClangTidyOptions(), + {{Fragment, FragmentCode}, {"vector", VectorHeader}}); + ASSERT_THAT(Errors.size(), testing::Eq(1U)); + EXPECT_EQ(Errors.front().Message.Message, + "included header vector is not used directly"); + } + { + std::vector<ClangTidyError> Errors; + ClangTidyOptions Opts; + Opts.CheckOptions["test-check-0.FragmentHeaders"] = ""; + runCheckOnCode<IncludeCleanerCheck>( + Code, &Errors, "file.cpp", {}, Opts, + {{Fragment, FragmentCode}, {"vector", VectorHeader}}); + ASSERT_THAT(Errors.size(), testing::Eq(1U)); + } + { + std::vector<ClangTidyError> Errors; + ClangTidyOptions Opts; + Opts.CheckOptions["test-check-0.FragmentHeaders"] = ".*\\.inc$"; + runCheckOnCode<IncludeCleanerCheck>( + Code, &Errors, "file.cpp", {}, Opts, + {{Fragment, FragmentCode}, {"vector", VectorHeader}}); + ASSERT_THAT(Errors.size(), testing::Eq(0U)); + } +} + +TEST(IncludeCleanerCheckTest, FragmentHeadersGeneratedPaths) { + const std::string Fragment = appendPathFileSystemIndependent( + {"gen-out", "bin", "gen.inc"}); + const std::string Code = llvm::formatv(R"( +#include <vector> +#include "{0}" + +Gen G; +)", + Fragment) + .str(); + const char *FragmentCode = R"( +struct Gen { + std::vector<int> Values; +}; +)"; + const char *VectorHeader = R"( +#pragma once +namespace std { +template <typename T> +struct vector {}; +} // namespace std +)"; + + std::vector<ClangTidyError> Errors; + ClangTidyOptions Opts; + Opts.CheckOptions["test-check-0.FragmentHeaders"] = + "(^|.*/)(gen-out|generated)/"; + runCheckOnCode<IncludeCleanerCheck>( + Code, &Errors, "file.cpp", {}, Opts, + {{Fragment, FragmentCode}, {"vector", VectorHeader}}); + ASSERT_THAT(Errors.size(), testing::Eq(0U)); +} + +TEST(IncludeCleanerCheckTest, FragmentHeadersNonRecursive) { + const char *Code = R"( +#include <vector> +#include "gen.inc" + +int use = gen_value; +)"; + const char *GenCode = R"( +int gen_value = 0; +#include "sub.inc" +)"; + const char *SubCode = R"( +struct Gen { + std::vector<int> Values; +}; +)"; + const char *VectorHeader = R"( +#pragma once +namespace std { +template <typename T> +struct vector {}; +} // namespace std +)"; + + std::vector<ClangTidyError> Errors; + ClangTidyOptions Opts; + Opts.CheckOptions["test-check-0.FragmentHeaders"] = ".*\\.inc$"; + runCheckOnCode<IncludeCleanerCheck>( + Code, &Errors, "file.cpp", {}, Opts, + {{"gen.inc", GenCode}, {"sub.inc", SubCode}, {"vector", VectorHeader}}); + ASSERT_THAT(Errors.size(), testing::Eq(1U)); + EXPECT_EQ(Errors.front().Message.Message, + "included header vector is not used directly"); +} + +TEST(IncludeCleanerCheckTest, FragmentHeadersMissingIncludeAnchored) { + llvm::Annotations Code(R"( +#include $diag^"gen.inc" +int use = gen_value; +)"); + const char *GenCode = R"( +#include "foo.h" +int gen_value = 0; +int missing = foo(); +)"; + const char *FooHeader = R"( +#pragma once +int foo(); +)"; + + std::vector<ClangTidyError> Errors; + ClangTidyOptions Opts; + Opts.CheckOptions["test-check-0.FragmentHeaders"] = ".*\\.inc$"; + runCheckOnCode<IncludeCleanerCheck>( + Code.code(), &Errors, "file.cpp", {}, Opts, + {{"gen.inc", GenCode}, {"foo.h", FooHeader}}); + ASSERT_THAT(Errors.size(), testing::Eq(1U)); + EXPECT_EQ(Errors.front().Message.Message, + "no header providing \"foo\" is directly included"); + EXPECT_EQ(Errors.front().Message.FileOffset, Code.point("diag")); +} + +TEST(IncludeCleanerCheckTest, FragmentHeadersMacroUseNotCounted) { + const char *Code = R"( +#include "macros.h" +#include "gen.inc" + +int use = x; +)"; + const char *GenCode = R"( +MAKE_INT(x) +)"; + const char *MacrosHeader = R"( +#pragma once +#define MAKE_INT(name) int name; +)"; + + std::vector<ClangTidyError> Errors; + ClangTidyOptions Opts; + Opts.CheckOptions["test-check-0.FragmentHea... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/180282 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
