akyrtzi created this revision.
Herald added a subscriber: mgorny.
Herald added a project: All.
akyrtzi requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Directive `dependency_directives_scan::tokens_present_before_eof` is introduced 
to indicate there were tokens present before
the last scanned dependency directive and EOF.
This is useful to ensure we correctly identify the macro guards when lexing 
using the dependency directives.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D133357

Files:
  clang/include/clang/Lex/DependencyDirectivesScanner.h
  clang/lib/Lex/DependencyDirectivesScanner.cpp
  clang/lib/Lex/Lexer.cpp
  clang/unittests/Lex/CMakeLists.txt
  clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
  clang/unittests/Lex/PPDependencyDirectivesTest.cpp

Index: clang/unittests/Lex/PPDependencyDirectivesTest.cpp
===================================================================
--- /dev/null
+++ clang/unittests/Lex/PPDependencyDirectivesTest.cpp
@@ -0,0 +1,150 @@
+//===- unittests/Lex/PPDependencyDirectivesTest.cpp -------------------------=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/TargetOptions.h"
+#include "clang/Lex/DependencyDirectivesScanner.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/HeaderSearchOptions.h"
+#include "clang/Lex/ModuleLoader.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/PreprocessorOptions.h"
+#include "llvm/Testing/Support/Error.h"
+#include "gtest/gtest.h"
+
+using namespace clang;
+
+namespace {
+
+// The test fixture.
+class PPDependencyDirectivesTest : public ::testing::Test {
+protected:
+  PPDependencyDirectivesTest()
+      : FileMgr(FileMgrOpts), DiagID(new DiagnosticIDs()),
+        Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
+        SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) {
+    TargetOpts->Triple = "x86_64-apple-macos12";
+    Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);
+  }
+
+  FileSystemOptions FileMgrOpts;
+  FileManager FileMgr;
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
+  DiagnosticsEngine Diags;
+  SourceManager SourceMgr;
+  LangOptions LangOpts;
+  std::shared_ptr<TargetOptions> TargetOpts;
+  IntrusiveRefCntPtr<TargetInfo> Target;
+};
+
+class IncludeCollector : public PPCallbacks {
+public:
+  Preprocessor &PP;
+  SmallVectorImpl<StringRef> &IncludedFiles;
+
+  IncludeCollector(Preprocessor &PP, SmallVectorImpl<StringRef> &IncludedFiles)
+      : PP(PP), IncludedFiles(IncludedFiles) {}
+
+  void LexedFileChanged(FileID FID, LexedFileChangeReason Reason,
+                        SrcMgr::CharacteristicKind FileType, FileID PrevFID,
+                        SourceLocation Loc) override {
+    if (Reason != LexedFileChangeReason::EnterFile)
+      return;
+    if (FID == PP.getPredefinesFileID())
+      return;
+    StringRef Filename =
+        PP.getSourceManager().getSLocEntry(FID).getFile().getName();
+    IncludedFiles.push_back(Filename);
+  }
+};
+
+TEST_F(PPDependencyDirectivesTest, MacroGuard) {
+  // "head1.h" has a macro guard and should only be included once.
+  // "head2.h" and "head3.h" have tokens following the macro check, they should
+  // be included multiple times.
+
+  auto VFS = new llvm::vfs::InMemoryFileSystem();
+  VFS->addFile(
+      "head1.h", 0,
+      llvm::MemoryBuffer::getMemBuffer("#ifndef H1_H\n#define H1_H\n#endif\n"));
+  VFS->addFile(
+      "head2.h", 0,
+      llvm::MemoryBuffer::getMemBuffer("#ifndef H2_H\n#define H2_H\n#endif\n\n"
+                                       "extern int foo;\n"));
+  VFS->addFile("head3.h", 0,
+               llvm::MemoryBuffer::getMemBuffer(
+                   "#ifndef H3_H\n#define H3_H\n#endif\n\n"
+                   "#ifdef SOMEMAC\nextern int foo;\n#endif\n"));
+  VFS->addFile("main.c", 0,
+               llvm::MemoryBuffer::getMemBuffer(
+                   "#include \"head1.h\"\n#include \"head1.h\"\n"
+                   "#include \"head2.h\"\n#include \"head2.h\"\n"
+                   "#include \"head3.h\"\n#include \"head3.h\"\n"));
+  FileMgr.setVirtualFileSystem(VFS);
+
+  Optional<FileEntryRef> FE;
+  ASSERT_THAT_ERROR(FileMgr.getFileRef("main.c").moveInto(FE),
+                    llvm::Succeeded());
+  SourceMgr.setMainFileID(
+      SourceMgr.createFileID(*FE, SourceLocation(), SrcMgr::C_User));
+
+  struct DepDirectives {
+    SmallVector<dependency_directives_scan::Token> Tokens;
+    SmallVector<dependency_directives_scan::Directive> Directives;
+  };
+  SmallVector<std::unique_ptr<DepDirectives>> DepDirectivesObjects;
+
+  auto getDependencyDirectives = [&](FileEntryRef File)
+      -> Optional<ArrayRef<dependency_directives_scan::Directive>> {
+    DepDirectivesObjects.push_back(std::make_unique<DepDirectives>());
+    StringRef Input = (*FileMgr.getBufferForFile(File))->getBuffer();
+    SmallVector<dependency_directives_scan::Token> Tokens;
+    SmallVector<dependency_directives_scan::Directive> Directives;
+    bool Err = scanSourceForDependencyDirectives(
+        Input, DepDirectivesObjects.back()->Tokens,
+        DepDirectivesObjects.back()->Directives);
+    EXPECT_FALSE(Err);
+    return llvm::makeArrayRef(DepDirectivesObjects.back()->Directives);
+  };
+
+  auto PPOpts = std::make_shared<PreprocessorOptions>();
+  PPOpts->DependencyDirectivesForFile = [&](FileEntryRef File)
+      -> Optional<ArrayRef<dependency_directives_scan::Directive>> {
+    return getDependencyDirectives(File);
+  };
+
+  TrivialModuleLoader ModLoader;
+  HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr,
+                          Diags, LangOpts, Target.get());
+  Preprocessor PP(PPOpts, Diags, LangOpts, SourceMgr, HeaderInfo, ModLoader,
+                  /*IILookup =*/nullptr,
+                  /*OwnsHeaderSearch =*/false);
+  PP.Initialize(*Target);
+
+  SmallVector<StringRef> IncludedFiles;
+  PP.addPPCallbacks(std::make_unique<IncludeCollector>(PP, IncludedFiles));
+  PP.EnterMainSourceFile();
+  while (true) {
+    Token tok;
+    PP.Lex(tok);
+    if (tok.is(tok::eof))
+      break;
+  }
+
+  SmallVector<StringRef> ExpectedIncludes{
+      "main.c", "./head1.h", "./head2.h", "./head2.h", "./head3.h", "./head3.h",
+  };
+  EXPECT_EQ(IncludedFiles, ExpectedIncludes);
+}
+
+} // anonymous namespace
Index: clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
===================================================================
--- clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
+++ clang/unittests/Lex/DependencyDirectivesScannerTest.cpp
@@ -25,7 +25,8 @@
     return true;
 
   raw_svector_ostream OS(Out);
-  printDependencyDirectivesAsSource(Input, Directives, OS);
+  printDependencyDirectivesAsSource(Input, Directives, OS,
+                                    /*PrintMarkerForTokensBeforeEOF*/ true);
   if (!Out.empty() && Out.back() != '\n')
     Out.push_back('\n');
   Out.push_back('\0');
@@ -57,10 +58,11 @@
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives("abc def\nxyz", Out, Tokens,
                                                     Directives));
-  EXPECT_TRUE(Out.empty());
+  EXPECT_STREQ("<TokBEOF>\n", Out.data());
   EXPECT_TRUE(Tokens.empty());
-  ASSERT_EQ(1u, Directives.size());
-  ASSERT_EQ(pp_eof, Directives.back().Kind);
+  ASSERT_EQ(2u, Directives.size());
+  EXPECT_EQ(tokens_present_before_eof, Directives[0].Kind);
+  EXPECT_EQ(pp_eof, Directives[1].Kind);
 }
 
 TEST(MinimizeSourceToDependencyDirectivesTest, AllTokens) {
@@ -451,7 +453,7 @@
   SmallVector<char, 128> Out;
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives("#pragma A\n", Out));
-  EXPECT_STREQ("", Out.data());
+  EXPECT_STREQ("<TokBEOF>\n", Out.data());
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(
       "#pragma push_macro(\"MACRO\")\n", Out));
@@ -470,15 +472,15 @@
   EXPECT_STREQ("#pragma include_alias(<A>, <B>)\n", Out.data());
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives("#pragma clang\n", Out));
-  EXPECT_STREQ("", Out.data());
+  EXPECT_STREQ("<TokBEOF>\n", Out.data());
 
   ASSERT_FALSE(
       minimizeSourceToDependencyDirectives("#pragma clang module\n", Out));
-  EXPECT_STREQ("", Out.data());
+  EXPECT_STREQ("<TokBEOF>\n", Out.data());
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(
       "#pragma clang module impor\n", Out));
-  EXPECT_STREQ("", Out.data());
+  EXPECT_STREQ("<TokBEOF>\n", Out.data());
 
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(
       "#pragma clang module import\n", Out));
@@ -663,7 +665,7 @@
            "#error \\\n#include <t.h>\n",
        }) {
     ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
-    EXPECT_STREQ("", Out.data());
+    EXPECT_STREQ("<TokBEOF>\n", Out.data());
   }
 
   for (auto Source : {
@@ -767,7 +769,8 @@
 )";
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
   EXPECT_STREQ(
-      "#if NEVER_ENABLED\n#define why(fmt,...) #error don't try me\n#endif\n",
+      "#if NEVER_ENABLED\n#define why(fmt,...) #error don't try me\n#endif\n"
+      "<TokBEOF>\n",
       Out.data());
 
   Source = R"(#if NEVER_ENABLED
@@ -778,7 +781,8 @@
   )";
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
   EXPECT_STREQ(
-      "#if NEVER_ENABLED\n#define why(fmt,...) \"quote dropped\n#endif\n",
+      "#if NEVER_ENABLED\n#define why(fmt,...) \"quote dropped\n#endif\n"
+      "<TokBEOF>\n",
       Out.data());
 }
 
@@ -799,11 +803,11 @@
 
   StringRef Source = "#define X '\\ \t\nx'\nvoid foo() {}";
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
-  EXPECT_STREQ("#define X '\\ \t\nx'\n", Out.data());
+  EXPECT_STREQ("#define X '\\ \t\nx'\n<TokBEOF>\n", Out.data());
 
   Source = "#define X \"\\ \r\nx\"\nvoid foo() {}";
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
-  EXPECT_STREQ("#define X \"\\ \r\nx\"\n", Out.data());
+  EXPECT_STREQ("#define X \"\\ \r\nx\"\n<TokBEOF>\n", Out.data());
 
   Source = "#define X \"\\ \r\nx\n#include <x>\n";
   ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
@@ -848,11 +852,56 @@
                "exp\\\nort import:l[[rename]];"
                "import<<=3;import a b d e d e f e;"
                "import foo[[no_unique_address]];import foo();"
-               "import f(:sefse);import f(->a=3);\n",
+               "import f(:sefse);import f(->a=3);"
+               "<TokBEOF>\n",
                Out.data());
-  ASSERT_EQ(Directives.size(), 10u);
+  ASSERT_EQ(Directives.size(), 11u);
   EXPECT_EQ(Directives[0].Kind, pp_include);
   EXPECT_EQ(Directives[1].Kind, cxx_export_module_decl);
 }
 
+TEST(MinimizeSourceToDependencyDirectivesTest, TokensBeforeEOF) {
+  SmallString<128> Out;
+
+  StringRef Source = R"(
+    #define A
+    #ifdef B
+    int x;
+    #endif
+    )";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#define A\n<TokBEOF>\n", Out.data());
+
+  Source = R"(
+    #ifndef A
+    #define A
+    #endif // some comment
+
+    // other comment
+    )";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#ifndef A\n#define A\n#endif\n", Out.data());
+
+  Source = R"(
+    #ifndef A
+    #define A
+    #endif /* some comment
+
+    */
+    )";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#ifndef A\n#define A\n#endif\n", Out.data());
+
+  Source = R"(
+    #ifndef A
+    #define A
+    #endif /* some comment
+
+    */
+    int x;
+    )";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#ifndef A\n#define A\n#endif\n<TokBEOF>\n", Out.data());
+}
+
 } // end anonymous namespace
Index: clang/unittests/Lex/CMakeLists.txt
===================================================================
--- clang/unittests/Lex/CMakeLists.txt
+++ clang/unittests/Lex/CMakeLists.txt
@@ -9,6 +9,7 @@
   LexerTest.cpp
   PPCallbacksTest.cpp
   PPConditionalDirectiveRecordTest.cpp
+  PPDependencyDirectivesTest.cpp
   PPMemoryAllocationsTest.cpp
   )
 
@@ -19,4 +20,6 @@
   clangLex
   clangParse
   clangSema
+
+  LLVMTestingSupport
   )
Index: clang/lib/Lex/Lexer.cpp
===================================================================
--- clang/lib/Lex/Lexer.cpp
+++ clang/lib/Lex/Lexer.cpp
@@ -4323,6 +4323,8 @@
   while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) {
     if (DepDirectives.front().Kind == pp_eof)
       return LexEndOfFile(Result, BufferEnd);
+    if (DepDirectives.front().Kind == tokens_present_before_eof)
+      MIOpt.ReadToken();
     NextDepDirectiveTokenIndex = 0;
     DepDirectives = DepDirectives.drop_front();
   }
@@ -4398,6 +4400,7 @@
     case cxx_import_decl:
     case cxx_export_module_decl:
     case cxx_export_import_decl:
+    case tokens_present_before_eof:
       break;
     case pp_if:
     case pp_ifdef:
Index: clang/lib/Lex/DependencyDirectivesScanner.cpp
===================================================================
--- clang/lib/Lex/DependencyDirectivesScanner.cpp
+++ clang/lib/Lex/DependencyDirectivesScanner.cpp
@@ -87,6 +87,9 @@
   dependency_directives_scan::Token &lexIncludeFilename(const char *&First,
                                                         const char *const End);
 
+  void skipLine(const char *&First, const char *const End);
+  void skipDirective(StringRef Name, const char *&First, const char *const End);
+
   /// Lexes next token and if it is identifier returns its string, otherwise
   /// it skips the current line and returns \p None.
   ///
@@ -150,6 +153,7 @@
   DiagnosticsEngine *Diags;
   SourceLocation InputSourceLoc;
 
+  const char *LastTokenPtr = nullptr;
   /// Keeps track of the tokens for the currently lexed directive. Once a
   /// directive is fully lexed and "committed" then the tokens get appended to
   /// \p Tokens and \p CurDirToks is cleared for the next directive.
@@ -364,7 +368,7 @@
   return (Cur + 1) < End && isAsciiIdentifierContinue(*(Cur + 1));
 }
 
-static void skipLine(const char *&First, const char *const End) {
+void Scanner::skipLine(const char *&First, const char *const End) {
   for (;;) {
     assert(First <= End);
     if (First == End)
@@ -379,6 +383,7 @@
       // Iterate over strings correctly to avoid comments and newlines.
       if (*First == '"' ||
           (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) {
+        LastTokenPtr = First;
         if (isRawStringLiteral(Start, First))
           skipRawString(First, End);
         else
@@ -388,6 +393,7 @@
 
       // Iterate over comments correctly.
       if (*First != '/' || End - First < 2) {
+        LastTokenPtr = First;
         ++First;
         continue;
       }
@@ -399,6 +405,7 @@
       }
 
       if (First[1] != '*') {
+        LastTokenPtr = First;
         ++First;
         continue;
       }
@@ -416,8 +423,8 @@
   }
 }
 
-static void skipDirective(StringRef Name, const char *&First,
-                          const char *const End) {
+void Scanner::skipDirective(StringRef Name, const char *&First,
+                            const char *const End) {
   if (llvm::StringSwitch<bool>(Name)
           .Case("warning", true)
           .Case("error", true)
@@ -710,6 +717,8 @@
     return false;
   }
 
+  LastTokenPtr = First;
+
   TheLexer.seek(getOffsetAt(First), /*IsAtStartOfLine*/ true);
 
   auto ScEx1 = make_scope_exit([&]() {
@@ -803,6 +812,9 @@
 
   if (!Error) {
     // Add an EOF on success.
+    if (LastTokenPtr &&
+        (Tokens.empty() || LastTokenPtr > Input.begin() + Tokens.back().Offset))
+      pushDirective(tokens_present_before_eof);
     pushDirective(pp_eof);
   }
 
@@ -828,7 +840,7 @@
 void clang::printDependencyDirectivesAsSource(
     StringRef Source,
     ArrayRef<dependency_directives_scan::Directive> Directives,
-    llvm::raw_ostream &OS) {
+    llvm::raw_ostream &OS, bool PrintMarkerForTokensBeforeEOF) {
   // Add a space separator where it is convenient for testing purposes.
   auto needsSpaceSeparator =
       [](tok::TokenKind Prev,
@@ -851,6 +863,9 @@
   };
 
   for (const dependency_directives_scan::Directive &Directive : Directives) {
+    if (Directive.Kind == tokens_present_before_eof &&
+        PrintMarkerForTokensBeforeEOF)
+      OS << "<TokBEOF>";
     Optional<tok::TokenKind> PrevTokenKind;
     for (const dependency_directives_scan::Token &Tok : Directive.Tokens) {
       if (PrevTokenKind && needsSpaceSeparator(*PrevTokenKind, Tok))
Index: clang/include/clang/Lex/DependencyDirectivesScanner.h
===================================================================
--- clang/include/clang/Lex/DependencyDirectivesScanner.h
+++ clang/include/clang/Lex/DependencyDirectivesScanner.h
@@ -82,6 +82,9 @@
   cxx_import_decl,
   cxx_export_module_decl,
   cxx_export_import_decl,
+  /// Indicates that there are tokens present between the last scanned directive
+  /// and eof. The \p Directive::Tokens array will be empty for this kind.
+  tokens_present_before_eof,
   pp_eof,
 };
 
@@ -123,13 +126,16 @@
 /// \param Directives The previously scanned dependency
 /// directives.
 /// \param OS the stream to print the dependency directives on.
+/// \param PrintMarkerForTokensBeforeEOF if true also prints
+/// \p dependency_directives_scan::tokens_present_before_eof, otherwise this
+/// directive will be ignored.
 ///
 /// This is used primarily for testing purposes, during dependency scanning the
 /// \p Lexer uses the tokens directly, not their printed version.
 void printDependencyDirectivesAsSource(
     StringRef Source,
     ArrayRef<dependency_directives_scan::Directive> Directives,
-    llvm::raw_ostream &OS);
+    llvm::raw_ostream &OS, bool PrintMarkerForTokensBeforeEOF = false);
 
 } // end namespace clang
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to