https://github.com/localspook created https://github.com/llvm/llvm-project/pull/174237
Right now, the deduplication algorithm runs in O(n²) time, because whenever it finds a duplicate warning, it removes it using `std::vector::erase` (which is itself O(n)). This starts taking up a noticeable amount of time as you start getting a lot of warnings. For example, running all checks over `clang/lib/Sema/Sema.cpp` and the headers it includes: ```sh time ./build/release/bin/clang-tidy -p build/debug --checks=* clang/lib/Sema/Sema.cpp -header-filter=.* > /dev/null ``` ...takes 2m28s on my system before this change and 2m6s after. Now, this scenario *is* a bit artificial; I imagine runs with so many warnings are uncommon in practice. On the other hand, the change is quite small, so... thoughts? >From d4b84182ffb5346df7a7d78d2df43a0c54f6d205 Mon Sep 17 00:00:00 2001 From: Victor Chernyakin <[email protected]> Date: Fri, 2 Jan 2026 07:49:33 -0800 Subject: [PATCH] [clang-tidy] Speed up deduplicating warnings from alias checks --- .../ClangTidyDiagnosticConsumer.cpp | 45 +++++++++---------- 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp index 0355eccc397e5..5718b3f602da9 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp @@ -783,38 +783,33 @@ std::vector<ClangTidyError> ClangTidyDiagnosticConsumer::take() { return std::move(Errors); } -namespace { -struct LessClangTidyErrorWithoutDiagnosticName { - bool operator()(const ClangTidyError *LHS, const ClangTidyError *RHS) const { - const tooling::DiagnosticMessage &M1 = LHS->Message; - const tooling::DiagnosticMessage &M2 = RHS->Message; - - return std::tie(M1.FilePath, M1.FileOffset, M1.Message) < - std::tie(M2.FilePath, M2.FileOffset, M2.Message); - } -}; -} // end anonymous namespace - void ClangTidyDiagnosticConsumer::removeDuplicatedDiagnosticsOfAliasCheckers() { - using UniqueErrorSet = - std::set<ClangTidyError *, LessClangTidyErrorWithoutDiagnosticName>; - UniqueErrorSet UniqueErrors; + if (Errors.size() <= 1) + return; + + static constexpr auto Projection = [](const ClangTidyError &E) { + const tooling::DiagnosticMessage &M = E.Message; + return std::tie(M.FilePath, M.FileOffset, M.Message); + }; - auto IT = Errors.begin(); - while (IT != Errors.end()) { - ClangTidyError &Error = *IT; - const std::pair<UniqueErrorSet::iterator, bool> Inserted = - UniqueErrors.insert(&Error); + // This orders any duplicates into consecutive runs. + llvm::stable_sort(Errors, + [](const ClangTidyError &LHS, const ClangTidyError &RHS) { + return Projection(LHS) < Projection(RHS); + }); + auto LastUniqueError = Errors.begin(); + for (ClangTidyError &Error : llvm::drop_begin(Errors, 1)) { + ClangTidyError &ExistingError = *LastUniqueError; // Unique error, we keep it and move along. - if (Inserted.second) { - ++IT; + if (Projection(Error) != Projection(ExistingError)) { + ++LastUniqueError; + *LastUniqueError = std::move(Error); } else { - ClangTidyError &ExistingError = **Inserted.first; const llvm::StringMap<tooling::Replacements> &CandidateFix = Error.Message.Fix; const llvm::StringMap<tooling::Replacements> &ExistingFix = - (*Inserted.first)->Message.Fix; + ExistingError.Message.Fix; if (CandidateFix != ExistingFix) { // In case of a conflict, don't suggest any fix-it. @@ -833,7 +828,7 @@ void ClangTidyDiagnosticConsumer::removeDuplicatedDiagnosticsOfAliasCheckers() { // Since it is the same error, we should take it as alias and remove it. ExistingError.EnabledDiagnosticAliases.emplace_back(Error.DiagnosticName); - IT = Errors.erase(IT); } } + Errors.erase(LastUniqueError + 1, Errors.end()); } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
