steakhal updated this revision to Diff 314857.
steakhal marked 11 inline comments as done.
steakhal added a comment.

- move `MacroExpansionRangeRecorder` to `clang::detail` and mark it as a friend 
class
- fix comment typo in `getExpandedMacroForLocation`
- rename `getExpandedMacroForLocation` -> `getExpandedText`
- rename `getSubstitutedTextForLocation` -> `getOriginalText`
- introduce `llvm_unreachable` where applicable.

No tests were changed.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D93222/new/

https://reviews.llvm.org/D93222

Files:
  clang/include/clang/Analysis/MacroExpansionContext.h
  clang/lib/Analysis/CMakeLists.txt
  clang/lib/Analysis/MacroExpansionContext.cpp
  clang/unittests/Analysis/CMakeLists.txt
  clang/unittests/Analysis/MacroExpansionContextTest.cpp

Index: clang/unittests/Analysis/MacroExpansionContextTest.cpp
===================================================================
--- /dev/null
+++ clang/unittests/Analysis/MacroExpansionContextTest.cpp
@@ -0,0 +1,338 @@
+//===- unittests/Analysis/MacroExpansionContextTest.cpp - -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Analysis/MacroExpansionContext.h"
+#include "clang/AST/ASTConsumer.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/TargetOptions.h"
+#include "clang/Lex/HeaderSearch.h"
+#include "clang/Lex/HeaderSearchOptions.h"
+#include "clang/Lex/Preprocessor.h"
+#include "clang/Lex/PreprocessorOptions.h"
+#include "clang/Parse/Parser.h"
+#include "llvm/ADT/SmallString.h"
+#include "gtest/gtest.h"
+
+// static bool HACK_EnableDebugInUnitTest = (::llvm::DebugFlag = true);
+
+namespace clang {
+namespace analysis {
+namespace {
+
+class MacroExpansionContextTest : public ::testing::Test {
+protected:
+  MacroExpansionContextTest()
+      : InMemoryFileSystem(new llvm::vfs::InMemoryFileSystem),
+        FileMgr(FileSystemOptions(), InMemoryFileSystem),
+        DiagID(new DiagnosticIDs()), DiagOpts(new DiagnosticOptions()),
+        Diags(DiagID, DiagOpts.get(), new IgnoringDiagConsumer()),
+        SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions()) {
+    TargetOpts->Triple = "x86_64-pc-linux-unknown";
+    Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);
+    LangOpts.CPlusPlus20 = 1; // For __VA_OPT__
+  }
+
+  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> InMemoryFileSystem;
+  FileManager FileMgr;
+  IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
+  IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts;
+  DiagnosticsEngine Diags;
+  SourceManager SourceMgr;
+  LangOptions LangOpts;
+  std::shared_ptr<TargetOptions> TargetOpts;
+  IntrusiveRefCntPtr<TargetInfo> Target;
+
+  std::unique_ptr<MacroExpansionContext>
+  getMacroExpansionContextFor(StringRef SourceText) {
+    std::unique_ptr<llvm::MemoryBuffer> Buf =
+        llvm::MemoryBuffer::getMemBuffer(SourceText);
+    SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf)));
+    TrivialModuleLoader ModLoader;
+    HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr,
+                            Diags, LangOpts, Target.get());
+    Preprocessor PP(std::make_shared<PreprocessorOptions>(), Diags, LangOpts,
+                    SourceMgr, HeaderInfo, ModLoader,
+                    /*IILookup =*/nullptr,
+                    /*OwnsHeaderSearch =*/false);
+
+    PP.Initialize(*Target);
+    auto Ctx = std::make_unique<MacroExpansionContext>(PP, LangOpts);
+
+    // Lex source text.
+    PP.EnterMainSourceFile();
+
+    while (true) {
+      Token Tok;
+      PP.Lex(Tok);
+      if (Tok.is(tok::eof))
+        break;
+    }
+
+    // Callbacks have been executed at this point.
+    return Ctx;
+  }
+
+  /// Returns the expansion location to main file at the given row and column.
+  SourceLocation at(unsigned row, unsigned col) const {
+    SourceLocation Loc =
+        SourceMgr.translateLineCol(SourceMgr.getMainFileID(), row, col);
+    return SourceMgr.getExpansionLoc(Loc);
+  }
+};
+
+TEST_F(MacroExpansionContextTest, EmptyExpansions) {
+  const auto Ctx = getMacroExpansionContextFor(R"code(
+  #define EMPTY
+  A b cd EMPTY ef EMPTY gh
+EMPTY zz
+      )code");
+  // After preprocessing:
+  //  A b cd ef gh
+  //      zz
+
+  EXPECT_EQ("", Ctx->getExpandedText(at(3, 10)));
+  EXPECT_EQ("EMPTY", Ctx->getOriginalText(at(3, 10)));
+
+  EXPECT_EQ("", Ctx->getExpandedText(at(3, 19)));
+  EXPECT_EQ("EMPTY", Ctx->getOriginalText(at(3, 19)));
+
+  EXPECT_EQ("", Ctx->getExpandedText(at(4, 1)));
+  EXPECT_EQ("EMPTY", Ctx->getOriginalText(at(4, 1)));
+}
+
+TEST_F(MacroExpansionContextTest, TransitiveExpansions) {
+  const auto Ctx = getMacroExpansionContextFor(R"code(
+  #define EMPTY
+  #define WOOF EMPTY ) EMPTY   1
+  A b cd WOOF ef EMPTY gh
+      )code");
+  // After preprocessing:
+  //  A b cd ) 1 ef gh
+
+  EXPECT_EQ(")1", Ctx->getExpandedText(at(4, 10)));
+  EXPECT_EQ("WOOF", Ctx->getOriginalText(at(4, 10)));
+
+  EXPECT_EQ("", Ctx->getExpandedText(at(4, 18)));
+  EXPECT_EQ("EMPTY", Ctx->getOriginalText(at(4, 18)));
+}
+
+TEST_F(MacroExpansionContextTest, MacroFunctions) {
+  const auto Ctx = getMacroExpansionContextFor(R"code(
+  #define EMPTY
+  #define WOOF(x) x(EMPTY ) )  ) EMPTY   1
+  A b cd WOOF($$ ef) EMPTY gh
+  WOOF(WOOF)
+  WOOF(WOOF(bar barr))),,),')
+      )code");
+  // After preprocessing:
+  //  A b cd $$ ef( ) ) ) 1 gh
+  //  WOOF( ) ) ) 1
+  //  bar barr( ) ) ) 1( ) ) ) 1),,),')
+
+  EXPECT_EQ("$$ ef ()))1", Ctx->getExpandedText(at(4, 10)));
+  EXPECT_EQ("WOOF($$ ef)", Ctx->getOriginalText(at(4, 10)));
+
+  EXPECT_EQ("", Ctx->getExpandedText(at(4, 22)));
+  EXPECT_EQ("EMPTY", Ctx->getOriginalText(at(4, 22)));
+
+  EXPECT_EQ("WOOF ()))1", Ctx->getExpandedText(at(5, 3)));
+  EXPECT_EQ("WOOF(WOOF)", Ctx->getOriginalText(at(5, 3)));
+
+  EXPECT_EQ("bar barr ()))1()))1", Ctx->getExpandedText(at(6, 3)));
+  EXPECT_EQ("WOOF(WOOF(bar barr))", Ctx->getOriginalText(at(6, 3)));
+}
+
+TEST_F(MacroExpansionContextTest, VariadicMacros) {
+  // From the GCC website.
+  const auto Ctx = getMacroExpansionContextFor(R"code(
+  #define eprintf(format, ...) fprintf (stderr, format, __VA_ARGS__)
+  eprintf("success!\n", );
+  eprintf("success!\n");
+
+  #define eprintf2(format, ...) \
+    fprintf (stderr, format __VA_OPT__(,) __VA_ARGS__)
+  eprintf2("success!\n", );
+  eprintf2("success!\n");
+      )code");
+  // After preprocessing:
+  //  fprintf (stderr, "success!\n", );
+  //  fprintf (stderr, "success!\n", );
+  //  fprintf (stderr, "success!\n" );
+  //  fprintf (stderr, "success!\n" );
+
+  EXPECT_EQ(R"(fprintf (stderr ,"success!\n",))",
+            Ctx->getExpandedText(at(3, 3)));
+  EXPECT_EQ(R"(eprintf("success!\n", ))", Ctx->getOriginalText(at(3, 3)));
+
+  EXPECT_EQ(R"(fprintf (stderr ,"success!\n",))",
+            Ctx->getExpandedText(at(4, 3)));
+  EXPECT_EQ(R"(eprintf("success!\n"))", Ctx->getOriginalText(at(4, 3)));
+
+  EXPECT_EQ(R"(fprintf (stderr ,"success!\n"))",
+            Ctx->getExpandedText(at(8, 3)));
+  EXPECT_EQ(R"(eprintf2("success!\n", ))", Ctx->getOriginalText(at(8, 3)));
+
+  EXPECT_EQ(R"(fprintf (stderr ,"success!\n"))",
+            Ctx->getExpandedText(at(9, 3)));
+  EXPECT_EQ(R"(eprintf2("success!\n"))", Ctx->getOriginalText(at(9, 3)));
+}
+
+TEST_F(MacroExpansionContextTest, ConcatenationMacros) {
+  // From the GCC website.
+  const auto Ctx = getMacroExpansionContextFor(R"code(
+  #define COMMAND(NAME)  { #NAME, NAME ## _command }
+  struct command commands[] = {
+    COMMAND(quit),
+    COMMAND(help),
+  };)code");
+  // After preprocessing:
+  //  struct command commands[] = {
+  //    { "quit", quit_command },
+  //    { "help", help_command },
+  //  };
+
+  EXPECT_EQ(R"({"quit",quit_command })", Ctx->getExpandedText(at(4, 5)));
+  EXPECT_EQ("COMMAND(quit)", Ctx->getOriginalText(at(4, 5)));
+
+  EXPECT_EQ(R"({"help",help_command })", Ctx->getExpandedText(at(5, 5)));
+  EXPECT_EQ("COMMAND(help)", Ctx->getOriginalText(at(5, 5)));
+}
+
+TEST_F(MacroExpansionContextTest, StringizingMacros) {
+  // From the GCC website.
+  const auto Ctx = getMacroExpansionContextFor(R"code(
+  #define WARN_IF(EXP) \
+  do { if (EXP) \
+          fprintf (stderr, "Warning: " #EXP "\n"); } \
+  while (0)
+  WARN_IF (x == 0);
+
+  #define xstr(s) str(s)
+  #define str(s) #s
+  #define foo 4
+  str (foo)
+  xstr (foo)
+      )code");
+  // After preprocessing:
+  //  do { if (x == 0) fprintf (stderr, "Warning: " "x == 0" "\n"); } while (0);
+  //  "foo"
+  //  "4"
+
+  EXPECT_EQ(
+      R"(do {if (x ==0)fprintf (stderr ,"Warning: ""x == 0""\n");}while (0))",
+      Ctx->getExpandedText(at(6, 3)));
+  EXPECT_EQ("WARN_IF (x == 0)", Ctx->getOriginalText(at(6, 3)));
+
+  EXPECT_EQ(R"("foo")", Ctx->getExpandedText(at(11, 3)));
+  EXPECT_EQ("str (foo)", Ctx->getOriginalText(at(11, 3)));
+
+  EXPECT_EQ(R"("4")", Ctx->getExpandedText(at(12, 3)));
+  EXPECT_EQ("xstr (foo)", Ctx->getOriginalText(at(12, 3)));
+}
+
+TEST_F(MacroExpansionContextTest, StringizingVariadicMacros) {
+  const auto Ctx = getMacroExpansionContextFor(R"code(
+  #define xstr(...) str(__VA_ARGS__)
+  #define str(...) #__VA_ARGS__
+  #define RParen2x ) )
+  #define EMPTY
+  #define f(x, ...) __VA_ARGS__ ! x * x
+  #define g(...) zz EMPTY f(__VA_ARGS__ ! x) f() * y
+  #define h(x, G) G(x) G(x ## x RParen2x
+  #define q(G) h(apple, G(apple)) RParen2x
+
+  q(g)
+  q(xstr)
+  g(RParen2x)
+  f( RParen2x )s
+      )code");
+  // clang-format off
+  // After preprocessing:
+  //  zz ! apple ! x * apple ! x ! * * y(apple) zz ! apple ! x * apple ! x ! * * y(appleapple ) ) ) )
+  //  "apple"(apple) "apple"(appleapple ) ) ) )
+  //  zz ! * ) ! x) ! * * y
+  //  ! ) ) * ) )
+  // clang-format on
+
+  EXPECT_EQ("zz !apple !x *apple !x !**y (apple )zz !apple !x *apple !x !**y "
+            "(appleapple ))))",
+            Ctx->getExpandedText(at(11, 3)));
+  EXPECT_EQ("q(g)", Ctx->getOriginalText(at(11, 3)));
+
+  EXPECT_EQ(R"res("apple"(apple )"apple"(appleapple )))))res",
+            Ctx->getExpandedText(at(12, 3)));
+  EXPECT_EQ("q(xstr)", Ctx->getOriginalText(at(12, 3)));
+
+  EXPECT_EQ("zz !*)!x )!**y ", Ctx->getExpandedText(at(13, 3)));
+  EXPECT_EQ("g(RParen2x)", Ctx->getOriginalText(at(13, 3)));
+
+  EXPECT_EQ("!))*))", Ctx->getExpandedText(at(14, 3)));
+  EXPECT_EQ("f( RParen2x )", Ctx->getOriginalText(at(14, 3)));
+}
+
+TEST_F(MacroExpansionContextTest, RedefUndef) {
+  const auto Ctx = getMacroExpansionContextFor(R"code(
+  #define Hi(x) Welcome x
+  Hi(Adam)
+  #define Hi Willkommen
+  Hi Hans
+  #undef Hi
+  Hi(Hi)
+      )code");
+  // After preprocessing:
+  //  Welcome Adam
+  //  Willkommen Hans
+  //  Hi(Hi)
+
+  // FIXME: Extra space follows every identifier.
+  EXPECT_EQ("Welcome Adam ", Ctx->getExpandedText(at(3, 3)));
+  EXPECT_EQ("Hi(Adam)", Ctx->getOriginalText(at(3, 3)));
+
+  EXPECT_EQ("Willkommen ", Ctx->getExpandedText(at(5, 3)));
+  EXPECT_EQ("Hi", Ctx->getOriginalText(at(5, 3)));
+
+  // There was no macro expansion at 7:3, empty returned in that case.
+  EXPECT_EQ("", Ctx->getExpandedText(at(7, 3)));
+  EXPECT_EQ("", Ctx->getOriginalText(at(7, 3)));
+}
+
+TEST_F(MacroExpansionContextTest, UnbalacedParenthesis) {
+  const auto Ctx = getMacroExpansionContextFor(R"code(
+  #define retArg(x) x
+  #define retArgUnclosed retArg(fun()
+  #define BB CC
+  #define applyInt BB(int)
+  #define CC(x) retArgUnclosed
+
+  applyInt );
+
+  #define expandArgUnclosedCommaExpr(x) (x, fun(), 1
+  #define f expandArgUnclosedCommaExpr
+
+  int x =  f(f(1))  ));
+      )code");
+  // After preprocessing:
+  //  fun();
+  //  int x = ((1, fun(), 1, fun(), 1 ));
+
+  EXPECT_EQ("fun ()", Ctx->getExpandedText(at(8, 3)));
+  EXPECT_EQ("applyInt )", Ctx->getOriginalText(at(8, 3)));
+
+  EXPECT_EQ("((1,fun (),1,fun (),1", Ctx->getExpandedText(at(13, 12)));
+  EXPECT_EQ("f(f(1))", Ctx->getOriginalText(at(13, 12)));
+}
+
+} // namespace
+} // namespace analysis
+} // namespace clang
Index: clang/unittests/Analysis/CMakeLists.txt
===================================================================
--- clang/unittests/Analysis/CMakeLists.txt
+++ clang/unittests/Analysis/CMakeLists.txt
@@ -8,6 +8,7 @@
   CFGTest.cpp
   CloneDetectionTest.cpp
   ExprMutationAnalyzerTest.cpp
+  MacroExpansionContextTest.cpp
   )
 
 clang_target_link_libraries(ClangAnalysisTests
@@ -17,6 +18,13 @@
   clangASTMatchers
   clangBasic
   clangFrontend
+  clangLex
   clangSerialization
+  clangTesting
   clangTooling
   )
+
+target_link_libraries(ClangAnalysisTests
+  PRIVATE
+  LLVMTestingSupport
+  )
Index: clang/lib/Analysis/MacroExpansionContext.cpp
===================================================================
--- /dev/null
+++ clang/lib/Analysis/MacroExpansionContext.cpp
@@ -0,0 +1,209 @@
+//===- MacroExpansionContext.h - Macro expansion information ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Analysis/MacroExpansionContext.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "macro-expansion-context"
+
+static void dumpTokenInto(const clang::Preprocessor &PP, clang::raw_ostream &OS,
+                          clang::Token Tok);
+
+namespace clang {
+namespace detail {
+class MacroExpansionRangeRecorder : public PPCallbacks {
+  const Preprocessor &PP;
+  SourceManager &SM;
+  MacroExpansionContext::ExpansionRangeMap &ExpansionRanges;
+
+public:
+  explicit MacroExpansionRangeRecorder(
+      const Preprocessor &PP, SourceManager &SM,
+      MacroExpansionContext::ExpansionRangeMap &ExpansionRanges)
+      : PP(PP), SM(SM), ExpansionRanges(ExpansionRanges) {}
+
+  void MacroExpands(const Token &MacroName, const MacroDefinition &MD,
+                    SourceRange Range, const MacroArgs *Args) override {
+    SourceLocation MacroNameBegin = SM.getExpansionLoc(MacroName.getLocation());
+    assert(MacroNameBegin == SM.getExpansionLoc(Range.getBegin()));
+
+    const SourceLocation ExpansionEnd = [Range, &SM = SM, &MacroName] {
+      // If the range is empty, use the length of the macro.
+      if (Range.getBegin() == Range.getEnd())
+        return SM.getExpansionLoc(
+            MacroName.getLocation().getLocWithOffset(MacroName.getLength()));
+
+      // Include the last character.
+      return SM.getExpansionLoc(Range.getEnd()).getLocWithOffset(1);
+    }();
+
+    LLVM_DEBUG(llvm::dbgs() << "MacroExpands event: '";
+               dumpTokenInto(PP, llvm::dbgs(), MacroName);
+               llvm::dbgs()
+               << "' with length " << MacroName.getLength() << " at ";
+               MacroNameBegin.print(llvm::dbgs(), SM);
+               llvm::dbgs() << ", expansion end at ";
+               ExpansionEnd.print(llvm::dbgs(), SM); llvm::dbgs() << '\n';);
+
+    // If the expansion range is empty, use the identifier of the macro as a
+    // range.
+    MacroExpansionContext::ExpansionRangeMap::iterator It;
+    bool Inserted;
+    std::tie(It, Inserted) =
+        ExpansionRanges.try_emplace(MacroNameBegin, ExpansionEnd);
+    if (Inserted) {
+      LLVM_DEBUG(llvm::dbgs() << "maps ";
+                 It->getFirst().print(llvm::dbgs(), SM); llvm::dbgs() << " to ";
+                 It->getSecond().print(llvm::dbgs(), SM);
+                 llvm::dbgs() << '\n';);
+    } else {
+      if (SM.isBeforeInTranslationUnit(It->getSecond(), ExpansionEnd)) {
+        It->getSecond() = ExpansionEnd;
+        LLVM_DEBUG(
+            llvm::dbgs() << "remaps "; It->getFirst().print(llvm::dbgs(), SM);
+            llvm::dbgs() << " to "; It->getSecond().print(llvm::dbgs(), SM);
+            llvm::dbgs() << '\n';);
+      }
+    }
+  }
+};
+} // namespace detail
+} // namespace clang
+
+using namespace clang;
+using MacroExpansionText = MacroExpansionContext::MacroExpansionText;
+
+MacroExpansionContext::MacroExpansionContext(Preprocessor &PP,
+                                             const LangOptions &LangOpts)
+    : PP(PP), SM(PP.getSourceManager()), LangOpts(LangOpts) {
+
+  // Make sure that the Preprocessor does not outlive the MacroExpansionContext.
+  PP.addPPCallbacks(std::make_unique<detail::MacroExpansionRangeRecorder>(
+      PP, PP.getSourceManager(), ExpansionRanges));
+  // Same applies here.
+  PP.setTokenWatcher([this](const Token &Tok) { onTokenLexed(Tok); });
+}
+
+MacroExpansionText
+MacroExpansionContext::getExpandedText(SourceLocation MacroExpansionLoc) const {
+  assert(MacroExpansionLoc.isFileID() &&
+         "It has a spelling location, use the expansion location instead.");
+
+  const auto it = ExpandedTokens.find_as(MacroExpansionLoc);
+  if (it == ExpandedTokens.end())
+    llvm_unreachable(
+        "Every macro expansion must expand to some (possibly empty) text.");
+  return it->getSecond();
+}
+
+StringRef
+MacroExpansionContext::getOriginalText(SourceLocation MacroExpansionLoc) const {
+  assert(MacroExpansionLoc.isFileID() &&
+         "It has a spelling location, use the expansion location instead.");
+
+  const auto it = ExpansionRanges.find_as(MacroExpansionLoc);
+  if (it == ExpansionRanges.end())
+    llvm_unreachable("Every macro expansion must have a range whose text will "
+                     "be substituted.");
+
+  return Lexer::getSourceText(
+      CharSourceRange::getCharRange(it->getFirst(), it->getSecond()), SM,
+      LangOpts);
+}
+
+void MacroExpansionContext::dumpExpansionRanges() const {
+  dumpExpansionRangesToStream(llvm::dbgs());
+}
+void MacroExpansionContext::dumpExpandedTexts() const {
+  dumpExpandedTextsToStream(llvm::dbgs());
+}
+
+void MacroExpansionContext::dumpExpansionRangesToStream(raw_ostream &OS) const {
+  std::vector<std::pair<SourceLocation, SourceLocation>> LocalExpansionRanges;
+  LocalExpansionRanges.reserve(ExpansionRanges.size());
+  for (const auto &Record : ExpansionRanges)
+    LocalExpansionRanges.emplace_back(
+        std::make_pair(Record.getFirst(), Record.getSecond()));
+  llvm::sort(LocalExpansionRanges);
+
+  OS << "\n=============== ExpansionRanges ===============\n";
+  for (const auto &Record : LocalExpansionRanges) {
+    OS << "> ";
+    Record.first.print(OS, SM);
+    OS << ", ";
+    Record.second.print(OS, SM);
+    OS << '\n';
+  }
+}
+
+void MacroExpansionContext::dumpExpandedTextsToStream(raw_ostream &OS) const {
+  std::vector<std::pair<SourceLocation, MacroExpansionText>>
+      LocalExpandedTokens;
+  LocalExpandedTokens.reserve(ExpandedTokens.size());
+  for (const auto &Record : ExpandedTokens)
+    LocalExpandedTokens.emplace_back(
+        std::make_pair(Record.getFirst(), Record.getSecond()));
+  llvm::sort(LocalExpandedTokens);
+
+  OS << "\n=============== ExpandedTokens ===============\n";
+  for (const auto &Record : LocalExpandedTokens) {
+    OS << "> ";
+    Record.first.print(OS, SM);
+    OS << " -> '" << Record.second << "'\n";
+  }
+}
+
+static void dumpTokenInto(const Preprocessor &PP, raw_ostream &OS, Token Tok) {
+  if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
+    // FIXME: For now, we don't respect whitespaces between macro expanded
+    // tokens. We just emit a space after every identifier to produce a valid
+    // code for `int a ;` like expansions.
+    //              ^-^-- Space after the 'int' and 'a' identifiers.
+    OS << II->getName() << ' ';
+  } else if (Tok.isLiteral() && !Tok.needsCleaning() && Tok.getLiteralData()) {
+    OS << StringRef(Tok.getLiteralData(), Tok.getLength());
+  } else {
+    char Tmp[256];
+    if (Tok.getLength() < sizeof(Tmp)) {
+      const char *TokPtr = Tmp;
+      // FIXME: Might use a different overload for cleaner callsite.
+      unsigned Len = PP.getSpelling(Tok, TokPtr);
+      OS.write(TokPtr, Len);
+    } else {
+      OS << "<too long token>";
+    }
+  }
+}
+
+void MacroExpansionContext::onTokenLexed(const Token &Tok) {
+  SourceLocation SLoc = Tok.getLocation();
+  if (SLoc.isFileID())
+    return;
+
+  LLVM_DEBUG(llvm::dbgs() << "lexed macro expansion token '";
+             dumpTokenInto(PP, llvm::dbgs(), Tok); llvm::dbgs() << "' at ";
+             SLoc.print(llvm::dbgs(), SM); llvm::dbgs() << '\n';);
+
+  // Remove spelling location.
+  SourceLocation CurrExpansionLoc = SM.getExpansionLoc(SLoc);
+
+  MacroExpansionText TokenAsString;
+  llvm::raw_svector_ostream OS(TokenAsString);
+
+  // FIXME: Prepend newlines and space to produce the exact same output as the
+  // preprocessor would for this token.
+
+  dumpTokenInto(PP, OS, Tok);
+
+  ExpansionMap::iterator It;
+  bool Inserted;
+  std::tie(It, Inserted) =
+      ExpandedTokens.try_emplace(CurrExpansionLoc, std::move(TokenAsString));
+  if (!Inserted)
+    It->getSecond().append(TokenAsString);
+}
\ No newline at end of file
Index: clang/lib/Analysis/CMakeLists.txt
===================================================================
--- clang/lib/Analysis/CMakeLists.txt
+++ clang/lib/Analysis/CMakeLists.txt
@@ -19,6 +19,7 @@
   ExprMutationAnalyzer.cpp
   IssueHash.cpp
   LiveVariables.cpp
+  MacroExpansionContext.cpp
   ObjCNoReturn.cpp
   PathDiagnostic.cpp
   PostOrderCFGView.cpp
Index: clang/include/clang/Analysis/MacroExpansionContext.h
===================================================================
--- /dev/null
+++ clang/include/clang/Analysis/MacroExpansionContext.h
@@ -0,0 +1,123 @@
+//===- MacroExpansionContext.h - Macro expansion information ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_ANALYSIS_MACROEXPANSIONCONTEXT_H
+#define LLVM_CLANG_ANALYSIS_MACROEXPANSIONCONTEXT_H
+
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace clang {
+
+namespace detail {
+class MacroExpansionRangeRecorder;
+} // namespace detail
+
+/// MacroExpansionContext tracks the macro expansions processed by the
+/// Preprocessor. It means that it can track source locations from a single
+/// translation unit. For every macro expansion it can tell you what text will
+/// be substituted.
+///
+/// It was designed to deal with:
+///  - regular macros
+///  - macro functions
+///  - variadic macros
+///  - transitive macro expansions
+///  - macro redefinition
+///  - unbalanced parenthesis
+///
+/// \code{.c}
+///   void bar();
+///   #define retArg(x) x
+///   #define retArgUnclosed retArg(bar()
+///   #define BB CC
+///   #define applyInt BB(int)
+///   #define CC(x) retArgUnclosed
+///
+///   void unbalancedMacros() {
+///     applyInt  );
+///   //^~~~~~~~~~^ is the substituted range
+///   // Substituted text is "applyInt  )"
+///   // Expanded text is "bar()"
+///   }
+///
+///   #define expandArgUnclosedCommaExpr(x) (x, bar(), 1
+///   #define f expandArgUnclosedCommaExpr
+///
+///   void unbalancedMacros2() {
+///     int x =  f(f(1))  ));  // Look at the parenthesis!
+///   //         ^~~~~~^ is the substituted range
+///   // Substituted text is "f(f(1))"
+///   // Expanded text is "((1,bar(),1,bar(),1"
+///   }
+/// \endcode
+/// \remark Currently we don't respect the whitespaces between expanded tokens,
+///         so the output for this example might differ from the -E compiler
+///         invocation.
+/// \remark All whitespaces are consumed while constructing the expansion.
+///         After all identifier a single space inserted to produce a valid C
+///         code even if identifier follows an other identifiers such as
+///         variable declarations.
+/// \remark MacroExpansionContext object must outlive the Preprocessor
+///         parameter.
+class MacroExpansionContext {
+public:
+  /// Register the necessary callbacks to the Preprocessor to record the
+  /// expansion events and the generated tokens. Must ensure that this object
+  /// outlives the given Preprocessor.
+  MacroExpansionContext(Preprocessor &PP, const LangOptions &LangOpts);
+  using MacroExpansionText = SmallString<40>;
+
+  /// \param MacroExpansionLoc Must be the expansion location of a macro.
+  /// \return The textual representation of the token sequence which was
+  ///         substituted in place of the macro.
+  ///         If no macro was expanded at that location, returns an empty
+  ///         string.
+  MacroExpansionText getExpandedText(SourceLocation MacroExpansionLoc) const;
+
+  /// \param MacroExpansionLoc Must be the expansion location of a macro.
+  /// \return The text from the original source code which were substituted by
+  ///         the macro expansion chain from the given location.
+  ///         If no macro was expanded at that location, returns an empty
+  ///         string.
+  StringRef getOriginalText(SourceLocation MacroExpansionLoc) const;
+
+  LLVM_DUMP_METHOD void dumpExpansionRangesToStream(raw_ostream &OS) const;
+  LLVM_DUMP_METHOD void dumpExpandedTextsToStream(raw_ostream &OS) const;
+  LLVM_DUMP_METHOD void dumpExpansionRanges() const;
+  LLVM_DUMP_METHOD void dumpExpandedTexts() const;
+
+private:
+  friend class detail::MacroExpansionRangeRecorder;
+  using ExpansionMap = llvm::DenseMap<SourceLocation, MacroExpansionText>;
+  using ExpansionRangeMap = llvm::DenseMap<SourceLocation, SourceLocation>;
+
+  /// Associates the textual representation of the expanded tokens at the given
+  /// macro expansion location.
+  ExpansionMap ExpandedTokens;
+
+  /// Tracks which source location was the last affected by any macro
+  /// substitution starting from a given macro expansion location.
+  ExpansionRangeMap ExpansionRanges;
+
+  const Preprocessor &PP;
+  const SourceManager &SM;
+  const LangOptions &LangOpts;
+
+  /// This callback is called by the preprocessor.
+  /// It stores the textual representation of the expanded token sequence for a
+  /// macro expansion location.
+  void onTokenLexed(const Token &Tok);
+};
+} // end namespace clang
+
+#endif // LLVM_CLANG_ANALYSIS_MACROEXPANSIONCONTEXT_H
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to