teemperor updated this revision to Diff 108837.
teemperor added a comment.
Herald added a subscriber: klimek.

- Updated according to Artem's comments.


https://reviews.llvm.org/D34182

Files:
  include/clang/Analysis/CloneDetection.h
  lib/Analysis/CloneDetection.cpp
  lib/Format/UnwrappedLineParser.cpp
  lib/StaticAnalyzer/Checkers/CloneChecker.cpp

Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp
===================================================================
--- lib/StaticAnalyzer/Checkers/CloneChecker.cpp
+++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp
@@ -81,11 +81,11 @@
   // because reportSuspiciousClones() wants to search them for errors.
   std::vector<CloneDetector::CloneGroup> AllCloneGroups;
 
-  Detector.findClones(AllCloneGroups,
-                      FilenamePatternConstraint(IgnoredFilesPattern),
-                      RecursiveCloneTypeIIConstraint(),
-                      MinComplexityConstraint(MinComplexity),
-                      MinGroupSizeConstraint(2), OnlyLargestCloneConstraint());
+  Detector.findClones(
+      AllCloneGroups, FilenamePatternConstraint(IgnoredFilesPattern),
+      RecursiveCloneTypeIIHashConstraint(), MinGroupSizeConstraint(2),
+      MinComplexityConstraint(MinComplexity),
+      RecursiveCloneTypeIIVerifyConstraint(), OnlyLargestCloneConstraint());
 
   if (ReportSuspiciousClones)
     reportSuspiciousClones(BR, Mgr, AllCloneGroups);
Index: lib/Format/UnwrappedLineParser.cpp
===================================================================
--- lib/Format/UnwrappedLineParser.cpp
+++ lib/Format/UnwrappedLineParser.cpp
@@ -494,8 +494,8 @@
 
   if (MunchSemi && FormatTok->Tok.is(tok::semi))
     nextToken();
-  Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
   Line->Level = InitialLevel;
+  Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
   if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
     // Update the opening line to add the forward reference as well
     (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex =
Index: lib/Analysis/CloneDetection.cpp
===================================================================
--- lib/Analysis/CloneDetection.cpp
+++ lib/Analysis/CloneDetection.cpp
@@ -216,9 +216,18 @@
   return HashCode;
 }
 
-size_t RecursiveCloneTypeIIConstraint::saveHash(
-    const Stmt *S, const Decl *D,
-    std::vector<std::pair<size_t, StmtSequence>> &StmtsByHash) {
+/// Generates and saves a hash code for the given Stmt.
+/// \param S The given Stmt.
+/// \param D The Decl containing S.
+/// \param StmtsByHash Output parameter that will contain the hash codes for
+///                    each StmtSequence in the given Stmt.
+/// \return The hash code of the given Stmt.
+///
+/// If the given Stmt is a CompoundStmt, this method will also generate
+/// hashes for all possible StmtSequences in the children of this Stmt.
+static size_t
+saveHash(const Stmt *S, const Decl *D,
+         std::vector<std::pair<size_t, StmtSequence>> &StmtsByHash) {
   llvm::MD5 Hash;
   ASTContext &Context = D->getASTContext();
 
@@ -320,6 +329,14 @@
 
 void RecursiveCloneTypeIIConstraint::constrain(
     std::vector<CloneDetector::CloneGroup> &Sequences) {
+  RecursiveCloneTypeIIHashConstraint Hash;
+  Hash.constrain(Sequences);
+  RecursiveCloneTypeIIVerifyConstraint Verify;
+  Verify.constrain(Sequences);
+}
+
+void RecursiveCloneTypeIIHashConstraint::constrain(
+    std::vector<CloneDetector::CloneGroup> &Sequences) {
   // FIXME: Maybe we can do this in-place and don't need this additional vector.
   std::vector<CloneDetector::CloneGroup> Result;
 
@@ -359,8 +376,7 @@
 
       for (; i < StmtsByHash.size(); ++i) {
         // A different hash value means we have reached the end of the sequence.
-        if (PrototypeHash != StmtsByHash[i].first ||
-            !areSequencesClones(StmtsByHash[i].second, Current.second)) {
+        if (PrototypeHash != StmtsByHash[i].first) {
           // The current sequence could be the start of a new CloneGroup. So we
           // decrement i so that we visit it again in the outer loop.
           // Note: i can never be 0 at this point because we are just comparing
@@ -383,6 +399,14 @@
   Sequences = Result;
 }
 
+void RecursiveCloneTypeIIVerifyConstraint::constrain(
+    std::vector<CloneDetector::CloneGroup> &Sequences) {
+  CloneConstraint::splitCloneGroups(
+      Sequences, [](const StmtSequence &A, const StmtSequence &B) {
+        return areSequencesClones(A, B);
+      });
+}
+
 size_t MinComplexityConstraint::calculateStmtComplexity(
     const StmtSequence &Seq, const std::string &ParentMacroStack) {
   if (Seq.empty())
Index: include/clang/Analysis/CloneDetection.h
===================================================================
--- include/clang/Analysis/CloneDetection.h
+++ include/clang/Analysis/CloneDetection.h
@@ -442,22 +442,25 @@
       std::function<bool(const StmtSequence &, const StmtSequence &)> Compare);
 };
 
-/// Searches all children of the given clones for type II clones (i.e. they are
-/// identical in every aspect beside the used variable names).
-class RecursiveCloneTypeIIConstraint {
-
-  /// Generates and saves a hash code for the given Stmt.
-  /// \param S The given Stmt.
-  /// \param D The Decl containing S.
-  /// \param StmtsByHash Output parameter that will contain the hash codes for
-  ///                    each StmtSequence in the given Stmt.
-  /// \return The hash code of the given Stmt.
-  ///
-  /// If the given Stmt is a CompoundStmt, this method will also generate
-  /// hashes for all possible StmtSequences in the children of this Stmt.
-  size_t saveHash(const Stmt *S, const Decl *D,
-                  std::vector<std::pair<size_t, StmtSequence>> &StmtsByHash);
+/// This constraint moves clones into clone groups of type II via hashing.
+///
+/// Clones with different hash values are moved into separate clone groups.
+/// Collisions are possible, and this constraint does nothing to address this
+/// them. Add the slower RecursiveCloneTypeIIVerifyConstraint later in the
+/// constraint chain, not necessarily immediately, to eliminate hash collisions
+/// through a more detailed analysis.
+class RecursiveCloneTypeIIHashConstraint {
+public:
+  void constrain(std::vector<CloneDetector::CloneGroup> &Sequences);
+};
 
+/// This constraint moves clones into clone groups of type II by comparing them.
+///
+/// Clones that aren't type II clones are moved into separate clone groups.
+/// In contrast to the RecursiveCloneTypeIIHashConstraint, all clones in a clone
+/// group are guaranteed to be be type II clones of each other, but it is too
+/// slow to efficiently handle large amounts of clones.
+class RecursiveCloneTypeIIVerifyConstraint {
 public:
   void constrain(std::vector<CloneDetector::CloneGroup> &Sequences);
 };
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D34182: [analyzer]... Raphael Isemann via Phabricator via cfe-commits

Reply via email to