ilya-biryukov created this revision.
ilya-biryukov added a reviewer: sammccall.
Herald added subscribers: ioeric, jkorous-apple, klimek.

Found by asan. Fiddling with code completion AST after
FrontendAction::Exceute can lead to errors.
Calling the callback in ProcessCodeCompleteResults to make sure we
don't access uninitialized state.

This particular issue comes from the fact that Sema::TUScope is
deleted when destructor of ~Parser runs, but still present in
Sema::TUScope and accessed when building completion items.

I'm still struggling to come up with a small repro. The relevant
stackframes reported by asan are:
ERROR: AddressSanitizer: heap-use-after-free on address
READ of size 8 at 0x61400020d090 thread T175

  #0 0x5632dff7821b in llvm::SmallPtrSetImplBase::isSmall() const 
include/llvm/ADT/SmallPtrSet.h:195:33
  #1 0x5632e0335901 in llvm::SmallPtrSetImplBase::insert_imp(void const*) 
include/llvm/ADT/SmallPtrSet.h:127:9
  #2 0x5632e067347d in 
llvm::SmallPtrSetImpl<clang::Decl*>::insert(clang::Decl*) 
include/llvm/ADT/SmallPtrSet.h:372:14
  #3 0x5632e065df80 in clang::Scope::AddDecl(clang::Decl*) 
tools/clang/include/clang/Sema/Scope.h:287:18
  #4 0x5632e0623eea in 
clang::ASTReader::pushExternalDeclIntoScope(clang::NamedDecl*, 
clang::DeclarationName) clang/lib/Serialization/ASTReader.cpp
  #5 0x5632e062ce74 in clang::ASTReader::finishPendingActions() 
tools/clang/lib/Serialization/ASTReader.cpp:9164:9
  ....
  #30 0x5632e02009c4 in clang::index::generateUSRForDecl(clang::Decl const*, 
llvm::SmallVectorImpl<char>&) tools/clang/lib/Index/USRGeneration.cpp:1037:6
  #31 0x5632dff73eab in clang::clangd::(anonymous 
namespace)::getSymbolID(clang::CodeCompletionResult const&) 
tools/clang/tools/extra/clangd/CodeComplete.cpp:326:20
  #32 0x5632dff6fe91 in 
clang::clangd::CodeCompleteFlow::mergeResults(std::vector<clang::CodeCompletionResult,
 std::allocator<clang::CodeCompletionResult> > const&, 
clang::clangd::SymbolSlab const&)::'lambda'(clang::CodeCompletionResult 
const&)::operator()(clang::CodeCompletionResult const&) 
tools/clang/tools/extra/clangd/CodeComplete.cpp:938:24
  #33 0x5632dff6e426 in 
clang::clangd::CodeCompleteFlow::mergeResults(std::vector<clang::CodeCompletionResult,
 std::allocator<clang::CodeCompletionResult> > const&, 
clang::clangd::SymbolSlab const&) 
third_party/llvm/llvm/tools/clang/tools/extra/clangd/CodeComplete.cpp:949:38
  #34 0x5632dff7a34d in clang::clangd::CodeCompleteFlow::runWithSema() 
llvm/tools/clang/tools/extra/clangd/CodeComplete.cpp:894:16
  #35 0x5632dff6df6a in 
clang::clangd::CodeCompleteFlow::run(clang::clangd::(anonymous 
namespace)::SemaCompleteInput const&) &&::'lambda'()::operator()() const 
third_party/llvm/llvm/tools/clang/tools/extra/clangd/CodeComplete.cpp:858:35
  #36 0x5632dff6cd42 in clang::clangd::(anonymous 
namespace)::semaCodeComplete(std::unique_ptr<clang::CodeCompleteConsumer, 
std::default_delete<clang::CodeCompleteConsumer> >, clang::CodeCompleteOptions 
const&, clang::clangd::(anonymous namespace)::SemaCompleteInput const&, 
llvm::function_ref<void ()>) 
tools/clang/tools/extra/clangd/CodeComplete.cpp:735:5

0x61400020d090 is located 80 bytes inside of 432-byte region 
[0x61400020d040,0x61400020d1f0)
freed by thread T175  here:

  #0 0x5632df74e115 in operator delete(void*, unsigned long) 
projects/compiler-rt/lib/asan/asan_new_delete.cc:161:3
  #1 0x5632e0b06973 in clang::Parser::~Parser() 
tools/clang/lib/Parse/Parser.cpp:410:3
  #2 0x5632e0b06ddd in clang::Parser::~Parser() 
clang/lib/Parse/Parser.cpp:408:19
  #3 0x5632e0b03286 in std::unique_ptr<clang::Parser, 
std::default_delete<clang::Parser> >::~unique_ptr() .../bits/unique_ptr.h:236:4
  #4 0x5632e0b021c4 in clang::ParseAST(clang::Sema&, bool, bool) 
tools/clang/lib/Parse/ParseAST.cpp:182:1
  #5 0x5632e0726544 in clang::FrontendAction::Execute() 
tools/clang/lib/Frontend/FrontendAction.cpp:904:8
  #6 0x5632dff6cd05 in clang::clangd::(anonymous 
namespace)::semaCodeComplete(std::unique_ptr<clang::CodeCompleteConsumer, 
std::default_delete<clang::CodeCompleteConsumer> >, clang::CodeCompleteOptions 
const&, clang::clangd::(anonymous namespace)::SemaCompleteInput const&, 
llvm::function_ref<void ()>) 
tools/clang/tools/extra/clangd/CodeComplete.cpp:728:15


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D44000

Files:
  clangd/CodeComplete.cpp

Index: clangd/CodeComplete.cpp
===================================================================
--- clangd/CodeComplete.cpp
+++ clangd/CodeComplete.cpp
@@ -425,30 +425,32 @@
   return Info.scopesForIndexQuery();
 }
 
+struct SemaCtx {
+  Sema &CCSema;
+  CodeCompletionContext &CCContext;
+  GlobalCodeCompletionAllocator &CCAllocator;
+  CodeCompletionTUInfo &CCTUInfo;
+};
+
 // The CompletionRecorder captures Sema code-complete output, including context.
 // It filters out ignored results (but doesn't apply fuzzy-filtering yet).
 // It doesn't do scoring or conversion to CompletionItem yet, as we want to
 // merge with index results first.
 struct CompletionRecorder : public CodeCompleteConsumer {
-  CompletionRecorder(const CodeCompleteOptions &Opts)
+  using ResultsFn =
+      UniqueFunction<void(SemaCtx, llvm::ArrayRef<CodeCompletionResult>)>;
+
+  CompletionRecorder(const CodeCompleteOptions &Opts, ResultsFn ResultsCallback)
       : CodeCompleteConsumer(Opts.getClangCompleteOpts(),
                              /*OutputIsBinary=*/false),
-        CCContext(CodeCompletionContext::CCC_Other), Opts(Opts),
+        Opts(Opts),
         CCAllocator(std::make_shared<GlobalCodeCompletionAllocator>()),
-        CCTUInfo(CCAllocator) {}
-  std::vector<CodeCompletionResult> Results;
-  CodeCompletionContext CCContext;
-  Sema *CCSema = nullptr; // Sema that created the results.
-  // FIXME: Sema is scary. Can we store ASTContext and Preprocessor, instead?
+        CCTUInfo(CCAllocator), ResultsCallback(std::move(ResultsCallback)) {}
 
   void ProcessCodeCompleteResults(class Sema &S, CodeCompletionContext Context,
                                   CodeCompletionResult *InResults,
                                   unsigned NumResults) override final {
-    // Record the completion context.
-    assert(!CCSema && "ProcessCodeCompleteResults called multiple times!");
-    CCSema = &S;
-    CCContext = Context;
-
+    std::vector<CodeCompletionResult> Results;
     // Retain the results we might want.
     for (unsigned I = 0; I < NumResults; ++I) {
       auto &Result = InResults[I];
@@ -466,45 +468,51 @@
         continue;
       Results.push_back(Result);
     }
+    if (ResultsCallback)
+      ResultsCallback(SemaCtx{S, Context, *CCAllocator, CCTUInfo}, Results);
   }
 
   CodeCompletionAllocator &getAllocator() override { return *CCAllocator; }
   CodeCompletionTUInfo &getCodeCompletionTUInfo() override { return CCTUInfo; }
 
-  // Returns the filtering/sorting name for Result, which must be from Results.
-  // Returned string is owned by this recorder (or the AST).
-  llvm::StringRef getName(const CodeCompletionResult &Result) {
-    switch (Result.Kind) {
-    case CodeCompletionResult::RK_Declaration:
-      if (auto *ID = Result.Declaration->getIdentifier())
-        return ID->getName();
-      break;
-    case CodeCompletionResult::RK_Keyword:
-      return Result.Keyword;
-    case CodeCompletionResult::RK_Macro:
-      return Result.Macro->getName();
-    case CodeCompletionResult::RK_Pattern:
-      return Result.Pattern->getTypedText();
-    }
-    auto *CCS = codeCompletionString(Result, /*IncludeBriefComments=*/false);
-    return CCS->getTypedText();
-  }
-
-  // Build a CodeCompletion string for R, which must be from Results.
-  // The CCS will be owned by this recorder.
-  CodeCompletionString *codeCompletionString(const CodeCompletionResult &R,
-                                             bool IncludeBriefComments) {
-    // CodeCompletionResult doesn't seem to be const-correct. We own it, anyway.
-    return const_cast<CodeCompletionResult &>(R).CreateCodeCompletionString(
-        *CCSema, CCContext, *CCAllocator, CCTUInfo, IncludeBriefComments);
-  }
-
 private:
   CodeCompleteOptions Opts;
   std::shared_ptr<GlobalCodeCompletionAllocator> CCAllocator;
   CodeCompletionTUInfo CCTUInfo;
+  ResultsFn ResultsCallback;
 };
 
+// Build a CodeCompletion string for R, which must be from Results.
+// The CCS will be owned by this recorder.
+CodeCompletionString *codeCompletionString(SemaCtx &Ctx,
+                                           const CodeCompletionResult &R,
+                                           bool IncludeBriefComments) {
+  // CodeCompletionResult doesn't seem to be const-correct. We own it, anyway.
+  return const_cast<CodeCompletionResult &>(R).CreateCodeCompletionString(
+      Ctx.CCSema, Ctx.CCContext, Ctx.CCAllocator, Ctx.CCTUInfo,
+      IncludeBriefComments);
+}
+
+// Returns the filtering/sorting name for Result, which must be from Results.
+// Returned string is owned by this recorder (or the AST).
+llvm::StringRef getName(SemaCtx &Ctx, const CodeCompletionResult &Result) {
+  switch (Result.Kind) {
+  case CodeCompletionResult::RK_Declaration:
+    if (auto *ID = Result.Declaration->getIdentifier())
+      return ID->getName();
+    break;
+  case CodeCompletionResult::RK_Keyword:
+    return Result.Keyword;
+  case CodeCompletionResult::RK_Macro:
+    return Result.Macro->getName();
+  case CodeCompletionResult::RK_Pattern:
+    return Result.Pattern->getTypedText();
+  }
+  auto *CCS = codeCompletionString(Ctx, Result,
+                                   /*IncludeBriefComments=*/false);
+  return CCS->getTypedText();
+}
+
 // Tracks a bounded number of candidates with the best scores.
 class TopN {
 public:
@@ -660,8 +668,7 @@
 // Callback will be invoked once completion is done, but before cleaning up.
 bool semaCodeComplete(std::unique_ptr<CodeCompleteConsumer> Consumer,
                       const clang::CodeCompleteOptions &Options,
-                      const SemaCompleteInput &Input,
-                      llvm::function_ref<void()> Callback = nullptr) {
+                      const SemaCompleteInput &Input) {
   auto Tracer = llvm::make_unique<trace::Span>("Sema completion");
   std::vector<const char *> ArgStrs;
   for (const auto &S : Input.Command.CommandLine)
@@ -680,7 +687,7 @@
                                           &DummyDiagsConsumer, false),
       Input.VFS);
   if (!CI) {
-    log("Couldn't create CompilerInvocation");;
+    log("Couldn't create CompilerInvocation");
     return false;
   }
   CI->getFrontendOpts().DisableFree = false;
@@ -729,11 +736,6 @@
     log("Execute() failed when running codeComplete for " + Input.FileName);
     return false;
   }
-  Tracer.reset();
-
-  if (Callback)
-    Callback();
-
   Tracer = llvm::make_unique<trace::Span>("Sema completion cleanup");
   Action.EndSourceFile();
 
@@ -833,35 +835,35 @@
 class CodeCompleteFlow {
   PathRef FileName;
   const CodeCompleteOptions &Opts;
-  // Sema takes ownership of Recorder. Recorder is valid until Sema cleanup.
-  std::unique_ptr<CompletionRecorder> RecorderOwner;
-  CompletionRecorder &Recorder;
   int NSema = 0, NIndex = 0, NBoth = 0; // Counters for logging.
   bool Incomplete = false; // Would more be available with a higher limit?
   llvm::Optional<FuzzyMatcher> Filter; // Initialized once Sema runs.
 
 public:
   // A CodeCompleteFlow object is only useful for calling run() exactly once.
   CodeCompleteFlow(PathRef FileName, const CodeCompleteOptions &Opts)
-      : FileName(FileName), Opts(Opts),
-        RecorderOwner(new CompletionRecorder(Opts)), Recorder(*RecorderOwner) {}
+      : FileName(FileName), Opts(Opts) {}
 
   CompletionList run(const SemaCompleteInput &SemaCCInput) && {
     trace::Span Tracer("CodeCompleteFlow");
     // We run Sema code completion first. It builds an AST and calculates:
     //   - completion results based on the AST. These are saved for merging.
     //   - partial identifier and context. We need these for the index query.
     CompletionList Output;
-    semaCodeComplete(std::move(RecorderOwner), Opts.getClangCompleteOpts(),
-                     SemaCCInput, [&] {
-                       if (Recorder.CCSema) {
-                         Output = runWithSema();
-                         SPAN_ATTACH(
-                             Tracer, "sema_completion_kind",
-                             getCompletionKindString(Recorder.CCContext.getKind()));
-                       } else
-                         log("Code complete: no Sema callback, 0 results");
-                     });
+    bool SemaCalled = false;
+    auto Recorder = llvm::make_unique<CompletionRecorder>(
+        Opts, [&](SemaCtx Ctx, llvm::ArrayRef<CodeCompletionResult> Results) {
+          assert(!SemaCalled && "Sema callback called twice");
+          SemaCalled = true;
+
+          Output = runWithSema(Ctx, Results);
+          SPAN_ATTACH(Tracer, "sema_completion_kind",
+                      getCompletionKindString(Ctx.CCContext.getKind()));
+        });
+    semaCodeComplete(std::move(Recorder), Opts.getClangCompleteOpts(),
+                     SemaCCInput);
+    if (!SemaCalled)
+      log("Code complete: no Sema callback, 0 results");
 
     SPAN_ATTACH(Tracer, "sema_results", NSema);
     SPAN_ATTACH(Tracer, "index_results", NIndex);
@@ -881,27 +883,28 @@
 private:
   // This is called by run() once Sema code completion is done, but before the
   // Sema data structures are torn down. It does all the real work.
-  CompletionList runWithSema() {
-    Filter = FuzzyMatcher(
-        Recorder.CCSema->getPreprocessor().getCodeCompletionFilter());
+  CompletionList runWithSema(SemaCtx Ctx,
+                             llvm::ArrayRef<CodeCompletionResult> Results) {
+    Filter =
+        FuzzyMatcher(Ctx.CCSema.getPreprocessor().getCodeCompletionFilter());
     // Sema provides the needed context to query the index.
     // FIXME: in addition to querying for extra/overlapping symbols, we should
     //        explicitly request symbols corresponding to Sema results.
     //        We can use their signals even if the index can't suggest them.
     // We must copy index results to preserve them, but there are at most Limit.
-    auto IndexResults = queryIndex();
+    auto IndexResults = queryIndex(Ctx);
     // Merge Sema and Index results, score them, and pick the winners.
-    auto Top = mergeResults(Recorder.Results, IndexResults);
+    auto Top = mergeResults(Ctx, Results, IndexResults);
     // Convert the results to the desired LSP structs.
     CompletionList Output;
     for (auto &C : Top)
-      Output.items.push_back(toCompletionItem(C.first, C.second));
+      Output.items.push_back(toCompletionItem(Ctx, C.first, C.second));
     Output.isIncomplete = Incomplete;
     return Output;
   }
 
-  SymbolSlab queryIndex() {
-    if (!Opts.Index || !allowIndex(Recorder.CCContext.getKind()))
+  SymbolSlab queryIndex(SemaCtx &Ctx) {
+    if (!Opts.Index || !allowIndex(Ctx.CCContext.getKind()))
       return SymbolSlab();
     trace::Span Tracer("Query index");
     SPAN_ATTACH(Tracer, "limit", Opts.Limit);
@@ -912,8 +915,7 @@
     if (Opts.Limit)
       Req.MaxCandidateCount = Opts.Limit;
     Req.Query = Filter->pattern();
-    Req.Scopes =
-        getQueryScopes(Recorder.CCContext, Recorder.CCSema->getSourceManager());
+    Req.Scopes = getQueryScopes(Ctx.CCContext, Ctx.CCSema.getSourceManager());
     log(llvm::formatv("Code complete: fuzzyFind(\"{0}\", scopes=[{1}])",
                       Req.Query,
                       llvm::join(Req.Scopes.begin(), Req.Scopes.end(), ",")));
@@ -927,7 +929,7 @@
   // Merges the Sema and Index results where possible, scores them, and
   // returns the top results from best to worst.
   std::vector<std::pair<CompletionCandidate, CompletionItemScores>>
-  mergeResults(const std::vector<CodeCompletionResult> &SemaResults,
+  mergeResults(SemaCtx &Ctx, llvm::ArrayRef<CodeCompletionResult> SemaResults,
                const SymbolSlab &IndexResults) {
     trace::Span Tracer("Merge and score results");
     // We only keep the best N results at any time, in "native" format.
@@ -945,24 +947,25 @@
       return nullptr;
     };
     // Emit all Sema results, merging them with Index results if possible.
-    for (auto &SemaResult : Recorder.Results)
-      addCandidate(Top, &SemaResult, CorrespondingIndexResult(SemaResult));
+    for (auto &SemaResult : SemaResults)
+      addCandidate(Ctx, Top, &SemaResult, CorrespondingIndexResult(SemaResult));
     // Now emit any Index-only results.
     for (const auto &IndexResult : IndexResults) {
       if (UsedIndexResults.count(&IndexResult))
         continue;
-      addCandidate(Top, /*SemaResult=*/nullptr, &IndexResult);
+      addCandidate(Ctx, Top, /*SemaResult=*/nullptr, &IndexResult);
     }
     return std::move(Top).items();
   }
 
   // Scores a candidate and adds it to the TopN structure.
-  void addCandidate(TopN &Candidates, const CodeCompletionResult *SemaResult,
+  void addCandidate(SemaCtx &Ctx, TopN &Candidates,
+                    const CodeCompletionResult *SemaResult,
                     const Symbol *IndexResult) {
     CompletionCandidate C;
     C.SemaResult = SemaResult;
     C.IndexResult = IndexResult;
-    C.Name = IndexResult ? IndexResult->Name : Recorder.getName(*SemaResult);
+    C.Name = IndexResult ? IndexResult->Name : getName(Ctx, *SemaResult);
 
     CompletionItemScores Scores;
     if (auto FuzzyScore = Filter->match(C.Name))
@@ -982,11 +985,12 @@
       Incomplete = true;
   }
 
-  CompletionItem toCompletionItem(const CompletionCandidate &Candidate,
+  CompletionItem toCompletionItem(SemaCtx &Ctx,
+                                  const CompletionCandidate &Candidate,
                                   const CompletionItemScores &Scores) {
     CodeCompletionString *SemaCCS = nullptr;
     if (auto *SR = Candidate.SemaResult)
-      SemaCCS = Recorder.codeCompletionString(*SR, Opts.IncludeBriefComments);
+      SemaCCS = codeCompletionString(Ctx, *SR, Opts.IncludeBriefComments);
     return Candidate.build(FileName, Scores, Opts, SemaCCS);
   }
 };
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to