teemperor updated this revision to Diff 64982.
teemperor marked 3 inline comments as done.
teemperor added a comment.

- Added `FIXME:` and removed duplicate "for example".


https://reviews.llvm.org/D22514

Files:
  lib/Analysis/CloneDetection.cpp
  test/Analysis/copypaste/test-min-max.cpp

Index: test/Analysis/copypaste/test-min-max.cpp
===================================================================
--- test/Analysis/copypaste/test-min-max.cpp
+++ test/Analysis/copypaste/test-min-max.cpp
@@ -18,14 +18,6 @@
 
 // False positives below. These clones should not be reported.
 
-// FIXME: Detect different binary operator kinds.
-int min1(int a, int b) { // expected-note{{Related code clone is here.}}
-  log();
-  if (a < b)
-    return a;
-  return b;
-}
-
 // FIXME: Detect different variable patterns.
 int min2(int a, int b) { // expected-note{{Related code clone is here.}}
   log();
@@ -36,6 +28,13 @@
 
 // Functions below are not clones and should not be reported.
 
+int min1(int a, int b) { // no-warning
+  log();
+  if (a < b)
+    return a;
+  return b;
+}
+
 int foo(int a, int b) { // no-warning
   return a + b;
 }
Index: lib/Analysis/CloneDetection.cpp
===================================================================
--- lib/Analysis/CloneDetection.cpp
+++ lib/Analysis/CloneDetection.cpp
@@ -78,6 +78,199 @@
 SourceLocation StmtSequence::getEndLoc() const { return back()->getLocEnd(); }
 
 namespace {
+/// \brief Defines empty addData methods for each Stmt class.
+///
+/// StmtDataCollector inherits from this class so that each Stmt has an empty
+/// fallback addData method. This prevents compilation failures from missing
+/// methods if a new Stmt subclass is added to clang. Also, many subclasses
+/// don't have any special data attached to them, so we don't need to manually
+/// define empty addData methods in StmtDataCollector if they are already
+/// defined here.
+class StmtDataCollectorDefiner {
+protected:
+  #define STMT(CLASS, PARENT)                                                  \
+  void addData##CLASS(CLASS const *S) {                                        \
+  }
+  #include "clang/AST/StmtNodes.inc"
+};
+} // end anonymous namespace
+
+namespace {
+/// \brief Collects the data of a single Stmt.
+///
+/// The data of a statement that isn't collected by this class is considered to
+/// be unimportant when comparing two statements. This means that this class
+/// defines what a 'similar' clone is. For example, this class doesn't collect
+/// names of variables, which makes statements that only differ in the names of
+/// the referenced variables clones of each other.
+class StmtDataCollector : StmtDataCollectorDefiner {
+  ASTContext &Context;
+  llvm::FoldingSetNodeID &D;
+
+public:
+
+  /// \brief Collects data from the given Stmt and saves it into the given
+  ///        FoldingSetNodeID.
+  StmtDataCollector(Stmt const *S, ASTContext &Context,
+                    llvm::FoldingSetNodeID &D) : Context(Context), D(D) {
+    collectData(S);
+  }
+
+protected:
+  // Below are utility methods for adding generic data to the FoldingSetNodeID.
+
+  void addData(unsigned Data) {
+    D.AddInteger(Data);
+  }
+
+  void addData(llvm::StringRef const &String) {
+    D.AddString(String);
+  }
+
+  void addData(std::string const &String) {
+    D.AddString(String);
+  }
+
+  void addData(QualType QT) {
+    addData(QT.getAsString());
+  }
+
+  // Utility functions for calling the addData method for each parent class
+  // of a given Stmt class.
+  #define STMT(CLASS, PARENT)                                                  \
+  void callAddData##CLASS(CLASS const *S) {                                    \
+    callAddData##PARENT(S);                                                    \
+    addData##CLASS(S);                                                         \
+  }
+  #include "clang/AST/StmtNodes.inc"
+
+  // Above code won't define callAddDataStmt, so we have to manually define it.
+  void callAddDataStmt(Stmt const *S) {
+    addDataStmt(S);
+  }
+
+  /// \brief Calls the appropriate callAddData method for the actual class of
+  ///        the given Stmt.
+  void collectData(Stmt const *S) {
+    switch(S->getStmtClass()) {
+      case Stmt::NoStmtClass:
+        break;
+    #define ABSTRACT_STMT(STMT)
+    #define STMT(CLASS, PARENT)                                                \
+      case Stmt::CLASS##Class:                                                 \
+        callAddData##CLASS(cast<CLASS const>(S)); break;
+    #include "clang/AST/StmtNodes.inc"
+    }
+  }
+
+  // Below are functions that collect the specific data for each Stmt subclass.
+  #define DEF_ADD_DATA(CLASS, CODE)                                            \
+  void addData##CLASS(CLASS const *S) {                                        \
+    CODE;                                                                      \
+  }
+
+  DEF_ADD_DATA(Stmt, { addData(S->getStmtClass()); })
+  DEF_ADD_DATA(Expr, { addData(S->getType()); })
+
+  //--- Builtin functionality ------------------------------------------------//
+  DEF_ADD_DATA(ArrayTypeTraitExpr, { addData(S->getTrait()); })
+  DEF_ADD_DATA(AtomicExpr, {
+    addData(S->isVolatile());
+    addData(S->getOp());
+  })
+  DEF_ADD_DATA(ExpressionTraitExpr, { addData(S->getTrait()); })
+  DEF_ADD_DATA(PredefinedExpr, { addData(S->getIdentType()); })
+  DEF_ADD_DATA(TypeTraitExpr, { addData(S->getTrait()); })
+
+  //--- Calls ----------------------------------------------------------------//
+  DEF_ADD_DATA(CXXOperatorCallExpr, { addData(S->getOperator()); })
+  DEF_ADD_DATA(CallExpr, {
+    addData(S->getDirectCallee()->getQualifiedNameAsString());
+  })
+
+  //--- Exceptions -----------------------------------------------------------//
+  DEF_ADD_DATA(CXXCatchStmt, { addData(S->getCaughtType()); })
+
+  //--- C++ OOP Stmts --------------------------------------------------------//
+  DEF_ADD_DATA(CXXDeleteExpr, {
+    addData(S->isArrayFormAsWritten());
+    addData(S->isGlobalDelete());
+  })
+
+  //--- Casts ----------------------------------------------------------------//
+  DEF_ADD_DATA(ObjCBridgedCastExpr, { addData(S->getBridgeKind()); })
+
+  //--- Miscellaneous Exprs --------------------------------------------------//
+  DEF_ADD_DATA(BinaryOperator, { addData(S->getOpcode()); })
+  DEF_ADD_DATA(UnaryOperator, { addData(S->getOpcode()); })
+
+  //--- Control flow ---------------------------------------------------------//
+  DEF_ADD_DATA(GotoStmt, { addData(S->getLabel()->getName()); })
+  DEF_ADD_DATA(IndirectGotoStmt, {
+                 if (S->getConstantTarget())
+                   addData(S->getConstantTarget()->getName());
+               })
+  DEF_ADD_DATA(LabelStmt, { addData(S->getDecl()->getName()); })
+  DEF_ADD_DATA(MSDependentExistsStmt, { addData(S->isIfExists()); })
+  DEF_ADD_DATA(AddrLabelExpr, { addData(S->getLabel()->getName()); })
+
+  //--- Objective-C ----------------------------------------------------------//
+  DEF_ADD_DATA(ObjCIndirectCopyRestoreExpr, { addData(S->shouldCopy()); })
+  DEF_ADD_DATA(ObjCPropertyRefExpr, {
+    addData(S->isSuperReceiver());
+    addData(S->isImplicitProperty());
+  })
+  DEF_ADD_DATA(ObjCAtCatchStmt, { addData(S->hasEllipsis()); })
+
+  //--- Miscellaneous Stmts --------------------------------------------------//
+  DEF_ADD_DATA(CXXFoldExpr, {
+    addData(S->isRightFold());
+    addData(S->getOperator());
+  })
+  DEF_ADD_DATA(GenericSelectionExpr, { addData(S->getNumAssocs()); })
+  DEF_ADD_DATA(LambdaExpr, {
+    for (const LambdaCapture &C : S->captures()) {
+      addData(C.isPackExpansion());
+      addData(C.getCaptureKind());
+      if (C.capturesVariable())
+        addData(C.getCapturedVar()->getType());
+    }
+    addData(S->isGenericLambda());
+    addData(S->isMutable());
+    addData(S->getCallOperator()->param_size());
+  })
+  DEF_ADD_DATA(DeclStmt, {
+    auto numDecls = std::distance(S->decl_begin(), S->decl_end());
+    addData(static_cast<unsigned>(numDecls));
+    for (Decl *D : S->decls()) {
+      if (VarDecl* VD = dyn_cast<VarDecl>(D)) {
+        addData(VD->getType());
+      }
+    }
+  })
+  DEF_ADD_DATA(AsmStmt, {
+    addData(S->isSimple());
+    addData(S->isVolatile());
+    addData(S->generateAsmString(Context));
+    for (unsigned i = 0; i < S->getNumInputs(); ++i) {
+      addData(S->getInputConstraint(i));
+    }
+    for (unsigned i = 0; i < S->getNumOutputs(); ++i) {
+      addData(S->getOutputConstraint(i));
+    }
+    for (unsigned i = 0; i < S->getNumClobbers(); ++i) {
+      addData(S->getClobber(i));
+    }
+  })
+  DEF_ADD_DATA(AttributedStmt, {
+    for (Attr const * A : S->getAttrs()) {
+      addData(std::string(A->getSpelling()));
+    }
+  })
+};
+} // end anonymous namespace
+
+namespace {
 /// \brief A cache for temporarily storing a small amount of CloneSignatures.
 ///
 /// This cache should be manually cleaned from CloneSignatures that are
@@ -110,6 +303,8 @@
   /// \param Context The ASTContext that contains Parent.
   void removeChildren(Stmt const *Parent, ASTContext &Context) {
     for (Stmt const *Child : Parent->children()) {
+      if (!Child)
+        continue;
       StmtSequence ChildSequence(Child, Context);
       remove(ChildSequence);
     }
@@ -126,7 +321,13 @@
         return *I;
       }
     }
-    llvm_unreachable("Couldn't find CloneSignature for StmtSequence");
+    // FIXME: We return an empty signature on a cache miss. This isn't a perfect
+    // solution, but it's better than crashing when RecursiveASTVisitor is
+    // skipping some generated statements when generating signatures (which
+    // leads to this situation where signatures are missing in the cache).
+    // Unless RecursiveASTVisitor starts skipping important nodes, this won't
+    // influence the false-positive rate.
+    return CloneDetector::CloneSignature();
   }
 };
 } // end anonymous namespace
@@ -249,10 +450,7 @@
   llvm::FoldingSetNodeID Hash;
   unsigned Complexity = 1;
 
-  // The only relevant data for now is the class of the statement, so we
-  // calculate the hash class into the current hash value.
-  // TODO: Handle statement class specific data.
-  Hash.AddInteger(S->getStmtClass());
+  StmtDataCollector(S, Context, Hash);
 
   // Incorporate the hash values of all child Stmts into the current hash value.
   HandleChildHashes(Hash, Complexity, S->children());
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to