Could the underlying API be made more robust to handle StringRefs rather than null terminated char* in the first place? (like SourceManager::getCharacterData could return StringRef? Presumably at some layer it already knows how long the file is - the underlying MemoryBuffer, in this case)
On Thu, Oct 6, 2022 at 2:39 AM Sam McCall via cfe-commits <cfe-commits@lists.llvm.org> wrote: > > > Author: Sam McCall > Date: 2022-10-06T11:38:55+02:00 > New Revision: 5d2d527c32da2081b814ef8b446bc3e037f74b0a > > URL: > https://github.com/llvm/llvm-project/commit/5d2d527c32da2081b814ef8b446bc3e037f74b0a > DIFF: > https://github.com/llvm/llvm-project/commit/5d2d527c32da2081b814ef8b446bc3e037f74b0a.diff > > LOG: [clangd] Avoid scanning up to end of file on each comment! > > Assigning char* (pointing at comment start) to StringRef was causing us > to scan the rest of the source file looking for the null terminator. > > This seems to be eating about 8% of our *total* CPU! > > While fixing this, factor out the common bits from the two places we're > parsing IWYU pragmas. > > Differential Revision: https://reviews.llvm.org/D135314 > > Added: > > > Modified: > clang-tools-extra/clangd/Headers.cpp > clang-tools-extra/clangd/Headers.h > clang-tools-extra/clangd/index/CanonicalIncludes.cpp > clang-tools-extra/clangd/unittests/HeadersTests.cpp > > Removed: > > > > ################################################################################ > diff --git a/clang-tools-extra/clangd/Headers.cpp > b/clang-tools-extra/clangd/Headers.cpp > index 5231a47487bc7..52b954e921620 100644 > --- a/clang-tools-extra/clangd/Headers.cpp > +++ b/clang-tools-extra/clangd/Headers.cpp > @@ -22,9 +22,17 @@ > namespace clang { > namespace clangd { > > -const char IWYUPragmaKeep[] = "// IWYU pragma: keep"; > -const char IWYUPragmaExport[] = "// IWYU pragma: export"; > -const char IWYUPragmaBeginExports[] = "// IWYU pragma: begin_exports"; > +llvm::Optional<StringRef> parseIWYUPragma(const char *Text) { > + // This gets called for every comment seen in the preamble, so it's quite > hot. > + constexpr llvm::StringLiteral IWYUPragma = "// IWYU pragma: "; > + if (strncmp(Text, IWYUPragma.data(), IWYUPragma.size())) > + return llvm::None; > + Text += IWYUPragma.size(); > + const char *End = Text; > + while (*End != 0 && *End != '\n') > + ++End; > + return StringRef(Text, End - Text); > +} > > class IncludeStructure::RecordHeaders : public PPCallbacks, > public CommentHandler { > @@ -129,10 +137,10 @@ class IncludeStructure::RecordHeaders : public > PPCallbacks, > } > > bool HandleComment(Preprocessor &PP, SourceRange Range) override { > - bool Err = false; > - llvm::StringRef Text = SM.getCharacterData(Range.getBegin(), &Err); > - if (Err) > + auto Pragma = parseIWYUPragma(SM.getCharacterData(Range.getBegin())); > + if (!Pragma) > return false; > + > if (inMainFile()) { > // Given: > // > @@ -150,8 +158,7 @@ class IncludeStructure::RecordHeaders : public > PPCallbacks, > // will know that the next inclusion is behind the IWYU pragma. > // FIXME: Support "IWYU pragma: begin_exports" and "IWYU pragma: > // end_exports". > - if (!Text.startswith(IWYUPragmaExport) && > - !Text.startswith(IWYUPragmaKeep)) > + if (!Pragma->startswith("export") && !Pragma->startswith("keep")) > return false; > unsigned Offset = SM.getFileOffset(Range.getBegin()); > LastPragmaKeepInMainFileLine = > @@ -161,8 +168,7 @@ class IncludeStructure::RecordHeaders : public > PPCallbacks, > // does not support them properly yet, so they will be not marked as > // unused. > // FIXME: Once IncludeCleaner supports export pragmas, remove this. > - if (!Text.startswith(IWYUPragmaExport) && > - !Text.startswith(IWYUPragmaBeginExports)) > + if (!Pragma->startswith("export") && > !Pragma->startswith("begin_exports")) > return false; > Out->HasIWYUExport.insert( > *Out->getID(SM.getFileEntryForID(SM.getFileID(Range.getBegin())))); > > diff --git a/clang-tools-extra/clangd/Headers.h > b/clang-tools-extra/clangd/Headers.h > index ff3f063168325..ba72ad397bf8f 100644 > --- a/clang-tools-extra/clangd/Headers.h > +++ b/clang-tools-extra/clangd/Headers.h > @@ -35,6 +35,12 @@ namespace clangd { > /// Returns true if \p Include is literal include like "path" or <path>. > bool isLiteralInclude(llvm::StringRef Include); > > +/// If Text begins an Include-What-You-Use directive, returns it. > +/// Given "// IWYU pragma: keep", returns "keep". > +/// Input is a null-terminated char* as provided by SM.getCharacterData(). > +/// (This should not be StringRef as we do *not* want to scan for its > length). > +llvm::Optional<StringRef> parseIWYUPragma(const char *Text); > + > /// Represents a header file to be #include'd. > struct HeaderFile { > std::string File; > > diff --git a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp > b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp > index b0ae42a96ecf4..8568079ca1adb 100644 > --- a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp > +++ b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp > @@ -17,8 +17,6 @@ > namespace clang { > namespace clangd { > namespace { > -const char IWYUPragma[] = "// IWYU pragma: private, include "; > - > const std::pair<llvm::StringRef, llvm::StringRef> IncludeMappings[] = { > {"include/__stddef_max_align_t.h", "<cstddef>"}, > {"include/__wmmintrin_aes.h", "<wmmintrin.h>"}, > @@ -712,17 +710,17 @@ collectIWYUHeaderMaps(CanonicalIncludes *Includes) { > PragmaCommentHandler(CanonicalIncludes *Includes) : Includes(Includes) {} > > bool HandleComment(Preprocessor &PP, SourceRange Range) override { > - llvm::StringRef Text = > - Lexer::getSourceText(CharSourceRange::getCharRange(Range), > - PP.getSourceManager(), PP.getLangOpts()); > - if (!Text.consume_front(IWYUPragma)) > + auto Pragma = parseIWYUPragma( > + PP.getSourceManager().getCharacterData(Range.getBegin())); > + if (!Pragma || !Pragma->consume_front("private, include ")) > return false; > auto &SM = PP.getSourceManager(); > // We always insert using the spelling from the pragma. > if (auto *FE = SM.getFileEntryForID(SM.getFileID(Range.getBegin()))) > - Includes->addMapping( > - FE->getLastRef(), > - isLiteralInclude(Text) ? Text.str() : ("\"" + Text + > "\"").str()); > + Includes->addMapping(FE->getLastRef(), > + isLiteralInclude(*Pragma) > + ? Pragma->str() > + : ("\"" + *Pragma + "\"").str()); > return false; > } > > > diff --git a/clang-tools-extra/clangd/unittests/HeadersTests.cpp > b/clang-tools-extra/clangd/unittests/HeadersTests.cpp > index 32e4aea15490b..324d4b58a1ef1 100644 > --- a/clang-tools-extra/clangd/unittests/HeadersTests.cpp > +++ b/clang-tools-extra/clangd/unittests/HeadersTests.cpp > @@ -9,6 +9,7 @@ > #include "Headers.h" > > #include "Compiler.h" > +#include "Matchers.h" > #include "TestFS.h" > #include "TestTU.h" > #include "clang/Basic/TokenKinds.h" > @@ -30,6 +31,7 @@ namespace { > using ::testing::AllOf; > using ::testing::Contains; > using ::testing::ElementsAre; > +using ::testing::Eq; > using ::testing::IsEmpty; > using ::testing::Not; > using ::testing::UnorderedElementsAre; > @@ -445,6 +447,18 @@ TEST_F(HeadersTest, HasIWYUPragmas) { > EXPECT_FALSE(Includes.hasIWYUExport(getID("none.h", Includes))); > } > > +TEST(Headers, ParseIWYUPragma) { > + EXPECT_THAT(parseIWYUPragma("// IWYU pragma: keep"), HasValue(Eq("keep"))); > + EXPECT_THAT(parseIWYUPragma("// IWYU pragma: keep\netc"), > + HasValue(Eq("keep"))); > + EXPECT_EQ(parseIWYUPragma("/* IWYU pragma: keep"), llvm::None) > + << "Only // comments supported!"; > + EXPECT_EQ(parseIWYUPragma("// IWYU pragma: keep"), llvm::None) > + << "Sensitive to whitespace"; > + EXPECT_EQ(parseIWYUPragma("// IWYU pragma:keep"), llvm::None) > + << "Sensitive to whitespace"; > +} > + > } // namespace > } // namespace clangd > } // namespace clang > > > > _______________________________________________ > cfe-commits mailing list > cfe-commits@lists.llvm.org > https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits