https://github.com/unterumarmung updated https://github.com/llvm/llvm-project/pull/180282
>From b107b2dbad873fe7f1a1d146d5ad0e5e84898daa Mon Sep 17 00:00:00 2001 From: Daniil Dudkin <[email protected]> Date: Fri, 6 Feb 2026 01:40:50 +0300 Subject: [PATCH 1/4] clang-tidy: treat fragment includes as main-file roots Add a `FragmentHeaders` option to `misc-include-cleaner`. Direct includes of matching headers are treated as fragments of the main file for usage scanning, while diagnostics remain anchored to the main file. Extend `include-cleaner::walkUsed` with a `FileID` predicate so callers can define main-file semantics (used by clang-tidy to include fragment files). Macro references remain main-file-only. Add tests for fragment behavior, generated-path matching, non-recursive includes, diagnostic anchoring, and macro uses. Update docs accordingly. --- .../clang-tidy/misc/IncludeCleanerCheck.cpp | 182 ++++++++++++++---- .../clang-tidy/misc/IncludeCleanerCheck.h | 6 +- .../checks/misc/include-cleaner.rst | 25 ++- .../include/clang-include-cleaner/Analysis.h | 7 + .../include-cleaner/lib/Analysis.cpp | 14 +- .../clang-tidy/IncludeCleanerTest.cpp | 174 +++++++++++++++++ 6 files changed, 364 insertions(+), 44 deletions(-) diff --git a/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.cpp b/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.cpp index 558c368901f1c..699cf7c5d52ac 100644 --- a/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.cpp @@ -32,10 +32,12 @@ #include "clang/Tooling/Inclusions/StandardLibrary.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem/UniqueID.h" #include "llvm/Support/Path.h" #include "llvm/Support/Regex.h" #include <optional> @@ -51,6 +53,23 @@ struct MissingIncludeInfo { include_cleaner::SymbolReference SymRef; include_cleaner::Header Missing; }; + +llvm::SmallString<128> normalizePath(llvm::StringRef Path) { + namespace path = llvm::sys::path; + + llvm::SmallString<128> P = Path; + path::remove_dots(P, /*remove_dot_dot=*/true); + path::native(P, path::Style::posix); + while (!P.empty() && P.back() == '/') + P.pop_back(); + return P; +} + +bool matchesAnyRegex(llvm::ArrayRef<llvm::Regex> Regexes, + llvm::StringRef Path) { + return llvm::any_of(Regexes, + [&](const llvm::Regex &R) { return R.match(Path); }); +} } // namespace IncludeCleanerCheck::IncludeCleanerCheck(StringRef Name, @@ -61,6 +80,9 @@ IncludeCleanerCheck::IncludeCleanerCheck(StringRef Name, DeduplicateFindings(Options.get("DeduplicateFindings", true)), UnusedIncludes(Options.get("UnusedIncludes", true)), MissingIncludes(Options.get("MissingIncludes", true)) { + for (StringRef Pattern : + utils::options::parseStringList(Options.get("FragmentHeaders", ""))) + FragmentHeaderPatterns.push_back(Pattern.str()); for (const auto &Header : IgnoreHeaders) { if (!llvm::Regex{Header}.isValid()) configurationDiag("Invalid ignore headers regex '%0'") << Header; @@ -69,6 +91,12 @@ IncludeCleanerCheck::IncludeCleanerCheck(StringRef Name, HeaderSuffix += "$"; IgnoreHeadersRegex.emplace_back(HeaderSuffix); } + for (const auto &Pattern : FragmentHeaderPatterns) { + llvm::Regex CompiledRegex(Pattern); + if (!CompiledRegex.isValid()) + configurationDiag("Invalid fragment headers regex '%0'") << Pattern; + FragmentHeaderRegexes.push_back(std::move(CompiledRegex)); + } if (UnusedIncludes == false && MissingIncludes == false) this->configurationDiag("The check 'misc-include-cleaner' will not " @@ -79,6 +107,12 @@ IncludeCleanerCheck::IncludeCleanerCheck(StringRef Name, void IncludeCleanerCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { Options.store(Opts, "IgnoreHeaders", utils::options::serializeStringList(IgnoreHeaders)); + llvm::SmallVector<StringRef> FragmentHeaderRefs; + FragmentHeaderRefs.reserve(FragmentHeaderPatterns.size()); + for (const auto &Pattern : FragmentHeaderPatterns) + FragmentHeaderRefs.push_back(Pattern); + Options.store(Opts, "FragmentHeaders", + utils::options::serializeStringList(FragmentHeaderRefs)); Options.store(Opts, "DeduplicateFindings", DeduplicateFindings); Options.store(Opts, "UnusedIncludes", UnusedIncludes); Options.store(Opts, "MissingIncludes", MissingIncludes); @@ -123,55 +157,106 @@ void IncludeCleanerCheck::check(const MatchFinder::MatchResult &Result) { const FileEntry *MainFile = SM->getFileEntryForID(SM->getMainFileID()); llvm::DenseSet<const include_cleaner::Include *> Used; std::vector<MissingIncludeInfo> Missing; - llvm::SmallVector<Decl *> MainFileDecls; + // Include-cleaner normally limits analysis to main-file roots (see + // https://clangd.llvm.org/design/include-cleaner). Some generated headers + // (e.g. TableGen .inc/.def) are fragments of the main file. When configured, + // treat direct includes of such headers as part of the main file for usage + // scanning, while keeping diagnostics anchored to the main file. + // Use UniqueID for stable membership across FileEntry instances. + llvm::DenseSet<llvm::sys::fs::UniqueID> FragmentFiles; + if (!FragmentHeaderRegexes.empty()) { + for (const auto &Inc : RecordedPreprocessor.Includes.all()) { + if (!SM->isWrittenInMainFile(SM->getExpansionLoc(Inc.HashLocation))) + continue; + if (!Inc.Resolved) + continue; + llvm::SmallString<128> ResolvedPath = + normalizePath(Inc.Resolved->getName()); + bool IsFragment = matchesAnyRegex(FragmentHeaderRegexes, ResolvedPath); + if (!IsFragment) { + // Fall back to matching the spelled include for virtual paths. + llvm::SmallString<128> SpelledPath = normalizePath(Inc.Spelled); + if (!SpelledPath.empty()) + IsFragment = matchesAnyRegex(FragmentHeaderRegexes, SpelledPath); + } + if (IsFragment) + FragmentFiles.insert(Inc.Resolved->getUniqueID()); + } + } + auto IsAnalyzedDecl = [&](Decl *D) { + SourceLocation Loc = D->getLocation(); + if (Loc.isInvalid()) + return false; + SourceLocation ExpandedLoc = SM->getExpansionLoc(Loc); + FileID FID = SM->getFileID(ExpandedLoc); + if (FID == SM->getMainFileID()) + return true; + if (FragmentFiles.empty()) + return false; + if (auto FE = SM->getFileEntryRefForID(FID)) + return FragmentFiles.contains(FE->getUniqueID()); + return false; + }; + llvm::SmallVector<Decl *> RootDecls; for (Decl *D : Result.Nodes.getNodeAs<TranslationUnitDecl>("top")->decls()) { - if (!SM->isWrittenInMainFile(SM->getExpansionLoc(D->getLocation()))) + if (!IsAnalyzedDecl(D)) continue; // FIXME: Filter out implicit template specializations. - MainFileDecls.push_back(D); + RootDecls.push_back(D); } llvm::DenseSet<include_cleaner::Symbol> SeenSymbols; OptionalDirectoryEntryRef ResourceDir = PP->getHeaderSearchInfo().getModuleMap().getBuiltinDir(); + // include-cleaner filters uses to main/preamble; extend it to fragments. + auto IsUsageLocation = [&](FileID FID) { + if (FID == SM->getMainFileID() || FID == SM->getPreambleFileID()) + return true; + if (FragmentFiles.empty()) + return false; + if (auto FE = SM->getFileEntryRefForID(FID)) + return FragmentFiles.contains(FE->getUniqueID()); + return false; + }; // FIXME: Find a way to have less code duplication between include-cleaner // analysis implementation and the below code. - walkUsed(MainFileDecls, RecordedPreprocessor.MacroReferences, &RecordedPI, - *PP, - [&](const include_cleaner::SymbolReference &Ref, - llvm::ArrayRef<include_cleaner::Header> Providers) { - // Process each symbol once to reduce noise in the findings. - // Tidy checks are used in two different workflows: - // - Ones that show all the findings for a given file. For such - // workflows there is not much point in showing all the occurences, - // as one is enough to indicate the issue. - // - Ones that show only the findings on changed pieces. For such - // workflows it's useful to show findings on every reference of a - // symbol as otherwise tools might give incosistent results - // depending on the parts of the file being edited. But it should - // still help surface findings for "new violations" (i.e. - // dependency did not exist in the code at all before). - if (DeduplicateFindings && !SeenSymbols.insert(Ref.Target).second) - return; - bool Satisfied = false; - for (const include_cleaner::Header &H : Providers) { - if (H.kind() == include_cleaner::Header::Physical && - (H.physical() == MainFile || - H.physical().getDir() == ResourceDir)) { - Satisfied = true; - continue; - } - - for (const include_cleaner::Include *I : - RecordedPreprocessor.Includes.match(H)) { - Used.insert(I); - Satisfied = true; - } - } - if (!Satisfied && !Providers.empty() && - Ref.RT == include_cleaner::RefType::Explicit && - !shouldIgnore(Providers.front())) - Missing.push_back({Ref, Providers.front()}); - }); + walkUsed( + RootDecls, RecordedPreprocessor.MacroReferences, &RecordedPI, *PP, + [&](const include_cleaner::SymbolReference &Ref, + llvm::ArrayRef<include_cleaner::Header> Providers) { + // Process each symbol once to reduce noise in the findings. + // Tidy checks are used in two different workflows: + // - Ones that show all the findings for a given file. For such + // workflows there is not much point in showing all the occurences, + // as one is enough to indicate the issue. + // - Ones that show only the findings on changed pieces. For such + // workflows it's useful to show findings on every reference of a + // symbol as otherwise tools might give incosistent results + // depending on the parts of the file being edited. But it should + // still help surface findings for "new violations" (i.e. + // dependency did not exist in the code at all before). + if (DeduplicateFindings && !SeenSymbols.insert(Ref.Target).second) + return; + bool Satisfied = false; + for (const include_cleaner::Header &H : Providers) { + if (H.kind() == include_cleaner::Header::Physical && + (H.physical() == MainFile || + H.physical().getDir() == ResourceDir)) { + Satisfied = true; + continue; + } + + for (const include_cleaner::Include *I : + RecordedPreprocessor.Includes.match(H)) { + Used.insert(I); + Satisfied = true; + } + } + if (!Satisfied && !Providers.empty() && + Ref.RT == include_cleaner::RefType::Explicit && + !shouldIgnore(Providers.front())) + Missing.push_back({Ref, Providers.front()}); + }, + IsUsageLocation); std::vector<const include_cleaner::Include *> Unused; for (const include_cleaner::Include &I : @@ -224,6 +309,21 @@ void IncludeCleanerCheck::check(const MatchFinder::MatchResult &Result) { FileStyle->IncludeStyle); // Deduplicate insertions when running in bulk fix mode. llvm::StringSet<> InsertedHeaders{}; + auto diagLocation = [&](SourceLocation Loc) { + SourceLocation SpellingLoc = SM->getSpellingLoc(Loc); + if (SM->isWrittenInMainFile(SpellingLoc) || FragmentFiles.empty()) + return SpellingLoc; + SourceLocation ExpandedLoc = SM->getExpansionLoc(Loc); + FileID FID = SM->getFileID(ExpandedLoc); + if (auto FE = SM->getFileEntryRefForID(FID); + FE && FragmentFiles.contains(FE->getUniqueID())) { + // Map fragment diagnostics to the include site in the main file. + SourceLocation IncludeLoc = SM->getIncludeLoc(FID); + if (IncludeLoc.isValid()) + return IncludeLoc; + } + return SM->getLocForStartOfFile(SM->getMainFileID()); + }; for (const auto &Inc : Missing) { const std::string Spelling = include_cleaner::spellHeader( {Inc.Missing, PP->getHeaderSearchInfo(), MainFile}); @@ -236,7 +336,7 @@ void IncludeCleanerCheck::check(const MatchFinder::MatchResult &Result) { llvm::StringRef{Spelling}.trim("\"<>"), Angled, tooling::IncludeDirective::Include)) { const DiagnosticBuilder DB = - diag(SM->getSpellingLoc(Inc.SymRef.RefLocation), + diag(diagLocation(Inc.SymRef.RefLocation), "no header providing \"%0\" is directly included") << Inc.SymRef.Target.name(); if (areDiagsSelfContained() || diff --git a/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h b/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h index 619d8191ab41f..c395815fee12b 100644 --- a/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h +++ b/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h @@ -20,12 +20,14 @@ #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/Preprocessor.h" #include "llvm/Support/Regex.h" +#include <string> #include <vector> namespace clang::tidy::misc { /// Checks for unused and missing includes. Generates findings only for -/// the main file of a translation unit. +/// the main file of a translation unit, optionally treating some direct +/// includes as fragments of the main file for usage scanning. /// Findings correspond to https://clangd.llvm.org/design/include-cleaner. /// /// For the user-facing documentation see: @@ -45,6 +47,7 @@ class IncludeCleanerCheck : public ClangTidyCheck { include_cleaner::PragmaIncludes RecordedPI; const Preprocessor *PP = nullptr; std::vector<StringRef> IgnoreHeaders; + std::vector<std::string> FragmentHeaderPatterns; // Whether emit only one finding per usage of a symbol. const bool DeduplicateFindings; // Whether to report unused includes. @@ -52,6 +55,7 @@ class IncludeCleanerCheck : public ClangTidyCheck { // Whether to report missing includes. const bool MissingIncludes; llvm::SmallVector<llvm::Regex> IgnoreHeadersRegex; + std::vector<llvm::Regex> FragmentHeaderRegexes; bool shouldIgnore(const include_cleaner::Header &H); }; diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc/include-cleaner.rst b/clang-tools-extra/docs/clang-tidy/checks/misc/include-cleaner.rst index 4364610787058..b550a67866f99 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/misc/include-cleaner.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/misc/include-cleaner.rst @@ -4,7 +4,8 @@ misc-include-cleaner ==================== Checks for unused and missing includes. Generates findings only for -the main file of a translation unit. +the main file of a translation unit. Optionally, direct includes can be +treated as fragments of the main file for usage scanning. Findings correspond to https://clangd.llvm.org/design/include-cleaner. Example: @@ -34,6 +35,28 @@ Options insertion/removal for all headers under the directory `foo`. Default is an empty string, no headers will be ignored. +.. option:: FragmentHeaders + + A semicolon-separated list of regexes that match against normalized resolved + include paths (POSIX-style separators). Direct includes of the main file + that match are treated as fragments of the main file for usage scanning. + This is intended for non-self-contained include fragments such as TableGen + ``.inc``/``.def`` files or generated headers. Only direct includes are + considered; includes inside fragments are not treated as fragments. + + Diagnostics remain anchored to the main file, but symbol uses inside + fragments can keep prerequisite includes in the main file from being + removed or marked missing. Note that include-cleaner does not support + ``// IWYU pragma: associated``. + + Example configuration: + + .. code-block:: yaml + + CheckOptions: + - key: misc-include-cleaner.FragmentHeaders + value: '(^|.*/)(gen-out|generated)/|.*\.(inc|def)$' + .. option:: DeduplicateFindings A boolean that controls whether the check should deduplicate findings for the diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h index c3241763237d1..65b79fe454011 100644 --- a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h @@ -25,6 +25,7 @@ namespace clang { class SourceLocation; +class FileID; class SourceManager; class Decl; class FileEntry; @@ -60,6 +61,12 @@ void walkUsed(llvm::ArrayRef<Decl *> ASTRoots, llvm::ArrayRef<SymbolReference> MacroRefs, const PragmaIncludes *PI, const Preprocessor &PP, UsedSymbolCB CB); +/// Overload that allows customizing which FileIDs are treated as "main file". +/// The predicate is evaluated on the spelling file of a reference. +void walkUsed(llvm::ArrayRef<Decl *> ASTRoots, + llvm::ArrayRef<SymbolReference> MacroRefs, + const PragmaIncludes *PI, const Preprocessor &PP, UsedSymbolCB CB, + llvm::function_ref<bool(FileID)> IsMainFile); struct AnalysisResults { std::vector<const Include *> Unused; diff --git a/clang-tools-extra/include-cleaner/lib/Analysis.cpp b/clang-tools-extra/include-cleaner/lib/Analysis.cpp index e48a380211af0..fde11f1e3490f 100644 --- a/clang-tools-extra/include-cleaner/lib/Analysis.cpp +++ b/clang-tools-extra/include-cleaner/lib/Analysis.cpp @@ -54,6 +54,17 @@ void walkUsed(llvm::ArrayRef<Decl *> ASTRoots, const PragmaIncludes *PI, const Preprocessor &PP, UsedSymbolCB CB) { const auto &SM = PP.getSourceManager(); + // Default behavior: only main file and preamble are considered for uses. + walkUsed(ASTRoots, MacroRefs, PI, PP, CB, [&](FileID FID) { + return FID == SM.getMainFileID() || FID == SM.getPreambleFileID(); + }); +} + +void walkUsed(llvm::ArrayRef<Decl *> ASTRoots, + llvm::ArrayRef<SymbolReference> MacroRefs, + const PragmaIncludes *PI, const Preprocessor &PP, UsedSymbolCB CB, + llvm::function_ref<bool(FileID)> IsMainFile) { + const auto &SM = PP.getSourceManager(); // This is duplicated in writeHTMLReport, changes should be mirrored there. tooling::stdlib::Recognizer Recognizer; for (auto *Root : ASTRoots) { @@ -71,7 +82,8 @@ void walkUsed(llvm::ArrayRef<Decl *> ASTRoots, SpellLoc = SM.getSpellingLoc(Loc); } auto FID = SM.getFileID(SpellLoc); - if (FID != SM.getMainFileID() && FID != SM.getPreambleFileID()) + // Allow callers to extend "main file" semantics (e.g. for fragments). + if (!IsMainFile(FID)) return; // FIXME: Most of the work done here is repetitive. It might be useful to // have a cache/batching. diff --git a/clang-tools-extra/unittests/clang-tidy/IncludeCleanerTest.cpp b/clang-tools-extra/unittests/clang-tidy/IncludeCleanerTest.cpp index 00576916492e1..19c187cbc7ace 100644 --- a/clang-tools-extra/unittests/clang-tidy/IncludeCleanerTest.cpp +++ b/clang-tools-extra/unittests/clang-tidy/IncludeCleanerTest.cpp @@ -339,6 +339,180 @@ TEST(IncludeCleanerCheckTest, UnusedIncludes) { } } +TEST(IncludeCleanerCheckTest, FragmentHeadersPreserveIncludes) { + const std::string Fragment = "gen.inc"; + const std::string Code = llvm::formatv(R"( +#include <vector> +#include "{0}" + +Gen G; +)", + Fragment) + .str(); + const char *FragmentCode = R"( +struct Gen { + std::vector<int> Values; +}; +)"; + const char *VectorHeader = R"( +#pragma once +namespace std { +template <typename T> +struct vector {}; +} // namespace std +)"; + + { + std::vector<ClangTidyError> Errors; + runCheckOnCode<IncludeCleanerCheck>( + Code, &Errors, "file.cpp", {}, ClangTidyOptions(), + {{Fragment, FragmentCode}, {"vector", VectorHeader}}); + ASSERT_THAT(Errors.size(), testing::Eq(1U)); + EXPECT_EQ(Errors.front().Message.Message, + "included header vector is not used directly"); + } + { + std::vector<ClangTidyError> Errors; + ClangTidyOptions Opts; + Opts.CheckOptions["test-check-0.FragmentHeaders"] = ""; + runCheckOnCode<IncludeCleanerCheck>( + Code, &Errors, "file.cpp", {}, Opts, + {{Fragment, FragmentCode}, {"vector", VectorHeader}}); + ASSERT_THAT(Errors.size(), testing::Eq(1U)); + } + { + std::vector<ClangTidyError> Errors; + ClangTidyOptions Opts; + Opts.CheckOptions["test-check-0.FragmentHeaders"] = ".*\\.inc$"; + runCheckOnCode<IncludeCleanerCheck>( + Code, &Errors, "file.cpp", {}, Opts, + {{Fragment, FragmentCode}, {"vector", VectorHeader}}); + ASSERT_THAT(Errors.size(), testing::Eq(0U)); + } +} + +TEST(IncludeCleanerCheckTest, FragmentHeadersGeneratedPaths) { + const std::string Fragment = + appendPathFileSystemIndependent({"gen-out", "bin", "gen.inc"}); + const std::string Code = llvm::formatv(R"( +#include <vector> +#include "{0}" + +Gen G; +)", + Fragment) + .str(); + const char *FragmentCode = R"( +struct Gen { + std::vector<int> Values; +}; +)"; + const char *VectorHeader = R"( +#pragma once +namespace std { +template <typename T> +struct vector {}; +} // namespace std +)"; + + std::vector<ClangTidyError> Errors; + ClangTidyOptions Opts; + Opts.CheckOptions["test-check-0.FragmentHeaders"] = + "(^|.*/)(gen-out|generated)/"; + runCheckOnCode<IncludeCleanerCheck>( + Code, &Errors, "file.cpp", {}, Opts, + {{Fragment, FragmentCode}, {"vector", VectorHeader}}); + ASSERT_THAT(Errors.size(), testing::Eq(0U)); +} + +TEST(IncludeCleanerCheckTest, FragmentHeadersNonRecursive) { + const char *Code = R"( +#include <vector> +#include "gen.inc" + +int use = gen_value; +)"; + const char *GenCode = R"( +int gen_value = 0; +#include "sub.inc" +)"; + const char *SubCode = R"( +struct Gen { + std::vector<int> Values; +}; +)"; + const char *VectorHeader = R"( +#pragma once +namespace std { +template <typename T> +struct vector {}; +} // namespace std +)"; + + std::vector<ClangTidyError> Errors; + ClangTidyOptions Opts; + Opts.CheckOptions["test-check-0.FragmentHeaders"] = ".*\\.inc$"; + runCheckOnCode<IncludeCleanerCheck>( + Code, &Errors, "file.cpp", {}, Opts, + {{"gen.inc", GenCode}, {"sub.inc", SubCode}, {"vector", VectorHeader}}); + ASSERT_THAT(Errors.size(), testing::Eq(1U)); + EXPECT_EQ(Errors.front().Message.Message, + "included header vector is not used directly"); +} + +TEST(IncludeCleanerCheckTest, FragmentHeadersMissingIncludeAnchored) { + llvm::Annotations Code(R"( +#include $diag^"gen.inc" +int use = gen_value; +)"); + const char *GenCode = R"( +#include "foo.h" +int gen_value = 0; +int missing = foo(); +)"; + const char *FooHeader = R"( +#pragma once +int foo(); +)"; + + std::vector<ClangTidyError> Errors; + ClangTidyOptions Opts; + Opts.CheckOptions["test-check-0.FragmentHeaders"] = ".*\\.inc$"; + runCheckOnCode<IncludeCleanerCheck>( + Code.code(), &Errors, "file.cpp", {}, Opts, + {{"gen.inc", GenCode}, {"foo.h", FooHeader}}); + ASSERT_THAT(Errors.size(), testing::Eq(1U)); + EXPECT_EQ(Errors.front().Message.Message, + "no header providing \"foo\" is directly included"); + EXPECT_EQ(Errors.front().Message.FileOffset, Code.point("diag")); +} + +TEST(IncludeCleanerCheckTest, FragmentHeadersMacroUseNotCounted) { + const char *Code = R"( +#include "macros.h" +#include "gen.inc" + +int use = x; +)"; + const char *GenCode = R"( +MAKE_INT(x) +)"; + const char *MacrosHeader = R"( +#pragma once +#define MAKE_INT(name) int name; +)"; + + std::vector<ClangTidyError> Errors; + ClangTidyOptions Opts; + Opts.CheckOptions["test-check-0.FragmentHeaders"] = ".*\\.inc$"; + runCheckOnCode<IncludeCleanerCheck>( + Code, &Errors, "file.cpp", {}, Opts, + {{"gen.inc", GenCode}, {"macros.h", MacrosHeader}}); + ASSERT_THAT(Errors.size(), testing::Eq(1U)); + EXPECT_EQ(Errors.front().Message.Message, + "included header macros.h is not used directly"); +} + TEST(IncludeCleanerCheckTest, MissingIncludes) { const char *PreCode = R"( #include "baz.h" // IWYU pragma: keep >From d15f8b6ff6d29bac37fbb0c5d2d24026c6f4c2d2 Mon Sep 17 00:00:00 2001 From: Daniil Dudkin <[email protected]> Date: Sat, 7 Feb 2026 00:23:01 +0300 Subject: [PATCH 2/4] fix clang-tidy --- .../clang-tidy/misc/IncludeCleanerCheck.cpp | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.cpp b/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.cpp index 699cf7c5d52ac..93f411f643cf2 100644 --- a/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.cpp @@ -53,8 +53,9 @@ struct MissingIncludeInfo { include_cleaner::SymbolReference SymRef; include_cleaner::Header Missing; }; +} // namespace -llvm::SmallString<128> normalizePath(llvm::StringRef Path) { +static llvm::SmallString<128> normalizePath(llvm::StringRef Path) { namespace path = llvm::sys::path; llvm::SmallString<128> P = Path; @@ -65,12 +66,11 @@ llvm::SmallString<128> normalizePath(llvm::StringRef Path) { return P; } -bool matchesAnyRegex(llvm::ArrayRef<llvm::Regex> Regexes, - llvm::StringRef Path) { +static bool matchesAnyRegex(llvm::ArrayRef<llvm::Regex> Regexes, + llvm::StringRef Path) { return llvm::any_of(Regexes, [&](const llvm::Regex &R) { return R.match(Path); }); } -} // namespace IncludeCleanerCheck::IncludeCleanerCheck(StringRef Name, ClangTidyContext *Context) @@ -80,7 +80,7 @@ IncludeCleanerCheck::IncludeCleanerCheck(StringRef Name, DeduplicateFindings(Options.get("DeduplicateFindings", true)), UnusedIncludes(Options.get("UnusedIncludes", true)), MissingIncludes(Options.get("MissingIncludes", true)) { - for (StringRef Pattern : + for (const StringRef Pattern : utils::options::parseStringList(Options.get("FragmentHeaders", ""))) FragmentHeaderPatterns.push_back(Pattern.str()); for (const auto &Header : IgnoreHeaders) { @@ -170,12 +170,12 @@ void IncludeCleanerCheck::check(const MatchFinder::MatchResult &Result) { continue; if (!Inc.Resolved) continue; - llvm::SmallString<128> ResolvedPath = + const llvm::SmallString<128> ResolvedPath = normalizePath(Inc.Resolved->getName()); bool IsFragment = matchesAnyRegex(FragmentHeaderRegexes, ResolvedPath); if (!IsFragment) { // Fall back to matching the spelled include for virtual paths. - llvm::SmallString<128> SpelledPath = normalizePath(Inc.Spelled); + const llvm::SmallString<128> SpelledPath = normalizePath(Inc.Spelled); if (!SpelledPath.empty()) IsFragment = matchesAnyRegex(FragmentHeaderRegexes, SpelledPath); } @@ -184,11 +184,11 @@ void IncludeCleanerCheck::check(const MatchFinder::MatchResult &Result) { } } auto IsAnalyzedDecl = [&](Decl *D) { - SourceLocation Loc = D->getLocation(); + const SourceLocation Loc = D->getLocation(); if (Loc.isInvalid()) return false; - SourceLocation ExpandedLoc = SM->getExpansionLoc(Loc); - FileID FID = SM->getFileID(ExpandedLoc); + const SourceLocation ExpandedLoc = SM->getExpansionLoc(Loc); + const FileID FID = SM->getFileID(ExpandedLoc); if (FID == SM->getMainFileID()) return true; if (FragmentFiles.empty()) @@ -309,12 +309,12 @@ void IncludeCleanerCheck::check(const MatchFinder::MatchResult &Result) { FileStyle->IncludeStyle); // Deduplicate insertions when running in bulk fix mode. llvm::StringSet<> InsertedHeaders{}; - auto diagLocation = [&](SourceLocation Loc) { + auto DiagLocation = [&](SourceLocation Loc) { SourceLocation SpellingLoc = SM->getSpellingLoc(Loc); if (SM->isWrittenInMainFile(SpellingLoc) || FragmentFiles.empty()) return SpellingLoc; - SourceLocation ExpandedLoc = SM->getExpansionLoc(Loc); - FileID FID = SM->getFileID(ExpandedLoc); + const SourceLocation ExpandedLoc = SM->getExpansionLoc(Loc); + const FileID FID = SM->getFileID(ExpandedLoc); if (auto FE = SM->getFileEntryRefForID(FID); FE && FragmentFiles.contains(FE->getUniqueID())) { // Map fragment diagnostics to the include site in the main file. @@ -336,7 +336,7 @@ void IncludeCleanerCheck::check(const MatchFinder::MatchResult &Result) { llvm::StringRef{Spelling}.trim("\"<>"), Angled, tooling::IncludeDirective::Include)) { const DiagnosticBuilder DB = - diag(diagLocation(Inc.SymRef.RefLocation), + diag(DiagLocation(Inc.SymRef.RefLocation), "no header providing \"%0\" is directly included") << Inc.SymRef.Target.name(); if (areDiagsSelfContained() || >From bf3862705b5d9595a5a3967e8af24a5501e67d2e Mon Sep 17 00:00:00 2001 From: Daniil Dudkin <[email protected]> Date: Sat, 7 Feb 2026 15:48:28 +0300 Subject: [PATCH 3/4] Fix review comments --- .../clang-tidy/misc/IncludeCleanerCheck.h | 5 +-- clang-tools-extra/docs/ReleaseNotes.rst | 7 +++ .../checks/misc/include-cleaner.rst | 15 ++++--- .../clang-tidy/IncludeCleanerTest.cpp | 45 +++++++++++++++++++ 4 files changed, 62 insertions(+), 10 deletions(-) diff --git a/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h b/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h index c395815fee12b..407ff1e92d3fd 100644 --- a/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h +++ b/clang-tools-extra/clang-tidy/misc/IncludeCleanerCheck.h @@ -20,7 +20,6 @@ #include "clang/Lex/HeaderSearch.h" #include "clang/Lex/Preprocessor.h" #include "llvm/Support/Regex.h" -#include <string> #include <vector> namespace clang::tidy::misc { @@ -47,7 +46,7 @@ class IncludeCleanerCheck : public ClangTidyCheck { include_cleaner::PragmaIncludes RecordedPI; const Preprocessor *PP = nullptr; std::vector<StringRef> IgnoreHeaders; - std::vector<std::string> FragmentHeaderPatterns; + llvm::SmallVector<std::string> FragmentHeaderPatterns; // Whether emit only one finding per usage of a symbol. const bool DeduplicateFindings; // Whether to report unused includes. @@ -55,7 +54,7 @@ class IncludeCleanerCheck : public ClangTidyCheck { // Whether to report missing includes. const bool MissingIncludes; llvm::SmallVector<llvm::Regex> IgnoreHeadersRegex; - std::vector<llvm::Regex> FragmentHeaderRegexes; + llvm::SmallVector<llvm::Regex> FragmentHeaderRegexes; bool shouldIgnore(const include_cleaner::Header &H); }; diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 0ad69f5fdc5aa..5f1b9c9c58d72 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -175,6 +175,13 @@ Changes in existing checks - Added support for analyzing function parameters with the `AnalyzeParameters` option. +- Improved :doc:`misc-include-cleaner + <clang-tidy/checks/misc/include-cleaner>` check: + + - Added the `FragmentHeaders` option to treat direct includes matching + regular expressions as fragments of the main file for usage scanning. This + preserves main-file includes required only by those fragments. + - Improved :doc:`modernize-use-std-format <clang-tidy/checks/modernize/use-std-format>` check by fixing a crash when an argument is part of a macro expansion. diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc/include-cleaner.rst b/clang-tools-extra/docs/clang-tidy/checks/misc/include-cleaner.rst index b550a67866f99..645bb47d71a8b 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/misc/include-cleaner.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/misc/include-cleaner.rst @@ -37,12 +37,13 @@ Options .. option:: FragmentHeaders - A semicolon-separated list of regexes that match against normalized resolved - include paths (POSIX-style separators). Direct includes of the main file - that match are treated as fragments of the main file for usage scanning. - This is intended for non-self-contained include fragments such as TableGen - ``.inc``/``.def`` files or generated headers. Only direct includes are - considered; includes inside fragments are not treated as fragments. + A semicolon-separated list of regular expressions that match against + normalized resolved include paths (POSIX-style separators). Direct includes + of the main file that match are treated as fragments of the main file for + usage scanning. This is intended for non-self-contained include fragments + such as TableGen ``.inc``/``.def`` files or generated headers. Only direct + includes are considered; includes inside fragments are not treated as + fragments. Diagnostics remain anchored to the main file, but symbol uses inside fragments can keep prerequisite includes in the main file from being @@ -55,7 +56,7 @@ Options CheckOptions: - key: misc-include-cleaner.FragmentHeaders - value: '(^|.*/)(gen-out|generated)/|.*\.(inc|def)$' + value: 'gen-out/;generated/;\\.(inc|def)$' .. option:: DeduplicateFindings diff --git a/clang-tools-extra/unittests/clang-tidy/IncludeCleanerTest.cpp b/clang-tools-extra/unittests/clang-tidy/IncludeCleanerTest.cpp index 19c187cbc7ace..66571068f32ef 100644 --- a/clang-tools-extra/unittests/clang-tidy/IncludeCleanerTest.cpp +++ b/clang-tools-extra/unittests/clang-tidy/IncludeCleanerTest.cpp @@ -425,6 +425,51 @@ struct vector {}; ASSERT_THAT(Errors.size(), testing::Eq(0U)); } +TEST(IncludeCleanerCheckTest, FragmentHeadersMultiplePatterns) { + const char *Code = R"( +#include <vector> +#include <string> +#include "gen.inc" +#include "gen.def" + +IncGen Inc; +DefGen Def; +)"; + const char *IncCode = R"( +struct IncGen { + std::vector<int> Values; +}; +)"; + const char *DefCode = R"( +struct DefGen { + std::string Name; +}; +)"; + const char *VectorHeader = R"( +#pragma once +namespace std { +template <typename T> +struct vector {}; +} // namespace std +)"; + const char *StringHeader = R"( +#pragma once +namespace std { +struct string {}; +} // namespace std +)"; + + std::vector<ClangTidyError> Errors; + ClangTidyOptions Opts; + Opts.CheckOptions["test-check-0.FragmentHeaders"] = ".*\\.inc$;.*\\.def$"; + runCheckOnCode<IncludeCleanerCheck>(Code, &Errors, "file.cpp", {}, Opts, + {{"gen.inc", IncCode}, + {"gen.def", DefCode}, + {"vector", VectorHeader}, + {"string", StringHeader}}); + ASSERT_THAT(Errors.size(), testing::Eq(0U)); +} + TEST(IncludeCleanerCheckTest, FragmentHeadersNonRecursive) { const char *Code = R"( #include <vector> >From b93fe99a2fa0a7b44ee72c8bb95e3517fc19c65b Mon Sep 17 00:00:00 2001 From: Daniil Dudkin <[email protected]> Date: Sat, 7 Feb 2026 18:43:03 +0300 Subject: [PATCH 4/4] fix release notes --- clang-tools-extra/docs/ReleaseNotes.rst | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 5f1b9c9c58d72..8e04695327bc6 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -176,11 +176,10 @@ Changes in existing checks option. - Improved :doc:`misc-include-cleaner - <clang-tidy/checks/misc/include-cleaner>` check: - - - Added the `FragmentHeaders` option to treat direct includes matching - regular expressions as fragments of the main file for usage scanning. This - preserves main-file includes required only by those fragments. + <clang-tidy/checks/misc/include-cleaner>` check by adding the + `FragmentHeaders` option to treat direct includes matching regular + expressions as fragments of the main file for usage scanning, preserving + main-file includes required only by those fragments. - Improved :doc:`modernize-use-std-format <clang-tidy/checks/modernize/use-std-format>` check by fixing a crash _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
