johannes created this revision.

This adds another pass to the matching algorithm that tries to match nodes with
common parents, if they have similar values.


https://reviews.llvm.org/D36687

Files:
  include/clang/Tooling/ASTDiff/ASTDiff.h
  lib/Tooling/ASTDiff/ASTDiff.cpp
  test/Tooling/clang-diff-bottomup.cpp
  test/Tooling/clang-diff-heuristics.cpp
  test/Tooling/clang-diff-opt.cpp
  tools/clang-diff/ClangDiff.cpp

Index: tools/clang-diff/ClangDiff.cpp
===================================================================
--- tools/clang-diff/ClangDiff.cpp
+++ tools/clang-diff/ClangDiff.cpp
@@ -535,7 +535,9 @@
   if (!StopAfter.empty()) {
     if (StopAfter == "topdown")
       Options.StopAfterTopDown = true;
-    else if (StopAfter != "bottomup") {
+    else if (StopAfter == "bottomup")
+      Options.StopAfterBottomUp = true;
+    else {
       llvm::errs() << "Error: Invalid argument for -stop-after\n";
       return 1;
     }
Index: test/Tooling/clang-diff-opt.cpp
===================================================================
--- test/Tooling/clang-diff-opt.cpp
+++ test/Tooling/clang-diff-opt.cpp
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -E %s > %t.src.cpp
 // RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST
-// RUN: clang-diff -dump-matches -s=10 %t.src.cpp %t.dst.cpp -- | FileCheck %s
+// RUN: clang-diff -dump-matches -s=10 -stop-after=bottomup %t.src.cpp %t.dst.cpp -- | FileCheck %s
 //
 // Test the behaviour of the matching according to the optimal tree edit
 // distance, implemented with Zhang and Shasha's algorithm.
@@ -41,5 +41,5 @@
   // CHECK: Delete NullStmt(22)
   ;; {{;;;;;;}}
 }
- 
+
 #endif
Index: test/Tooling/clang-diff-heuristics.cpp
===================================================================
--- /dev/null
+++ test/Tooling/clang-diff-heuristics.cpp
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -E %s > %t.src.cpp
+// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST
+// RUN: clang-diff -dump-matches -s=0 %t.src.cpp %t.dst.cpp -- | FileCheck %s
+//
+// Test the heuristics, with maxsize set to 0, so that the optimal matching will never be applied.
+
+#ifndef DEST
+
+void f1() {;}
+
+void f2(int) {;}
+
+#else
+
+// same parents, same value
+// CHECK: Match FunctionDecl: f1(void ())(1) to FunctionDecl: f1(void ())(1)
+// CHECK: Match CompoundStmt
+void f1() {}
+
+// same parents, same identifier
+// CHECK: Match FunctionDecl: f2(void (int))(4) to FunctionDecl: f2(void ())(3)
+// CHECK: Match CompoundStmt
+void f2() {}
+
+#endif
Index: test/Tooling/clang-diff-bottomup.cpp
===================================================================
--- test/Tooling/clang-diff-bottomup.cpp
+++ test/Tooling/clang-diff-bottomup.cpp
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -E %s > %t.src.cpp
 // RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST
-// RUN: clang-diff -dump-matches -s=0 %t.src.cpp %t.dst.cpp -- | FileCheck %s
+// RUN: clang-diff -dump-matches -s=0 -stop-after=bottomup %t.src.cpp %t.dst.cpp -- | FileCheck %s
 //
 // Test the bottom-up matching, with maxsize set to 0, so that the optimal matching will never be applied.
 
Index: lib/Tooling/ASTDiff/ASTDiff.cpp
===================================================================
--- lib/Tooling/ASTDiff/ASTDiff.cpp
+++ lib/Tooling/ASTDiff/ASTDiff.cpp
@@ -94,15 +94,23 @@
   // Descendants are only considered to be equal when they are mapped in M.
   double getJaccardSimilarity(const Mapping &M, NodeId Id1, NodeId Id2) const;
 
+  double getNodeSimilarity(const Mapping &M, NodeId Id1, NodeId Id2) const;
+
   // Returns the node that has the highest degree of similarity.
   NodeId findCandidate(const Mapping &M, NodeId Id1) const;
 
+  NodeId findCandidateFromChildren(const Mapping &M, NodeId Id1,
+                                   NodeId P2) const;
+
   // Returns a mapping of identical subtrees.
   Mapping matchTopDown() const;
 
   // Tries to match any yet unmapped nodes, in a bottom-up fashion.
   void matchBottomUp(Mapping &M) const;
 
+  // Matches nodes, whose parents are matched.
+  void matchChildren(Mapping &M);
+
   const ComparisonOptions &Options;
 
   friend class ZhangShashaMatcher;
@@ -828,6 +836,23 @@
   return CommonDescendants / Denominator;
 }
 
+double ASTDiff::Impl::getNodeSimilarity(const Mapping &M, NodeId Id1,
+                                        NodeId Id2) const {
+  const Node &N1 = T1.getNode(Id1);
+  const Node &N2 = T2.getNode(Id2);
+  auto Ident1 = N1.getIdentifier(), Ident2 = N2.getIdentifier();
+
+  bool SameValue = T1.getNodeValue(Id1) == T2.getNodeValue(Id2);
+  auto SameIdent = Ident1 && Ident2 && *Ident1 == *Ident2;
+
+  double NodeSimilarity = 0;
+  NodeSimilarity += SameValue;
+  NodeSimilarity += SameIdent;
+
+  assert(haveSameParents(M, Id1, Id2));
+  return NodeSimilarity * Options.MinSimilarity;
+}
+
 NodeId ASTDiff::Impl::findCandidate(const Mapping &M, NodeId Id1) const {
   NodeId Candidate;
   double HighestSimilarity = 0.0;
@@ -845,6 +870,25 @@
   return Candidate;
 }
 
+NodeId ASTDiff::Impl::findCandidateFromChildren(const Mapping &M, NodeId Id1,
+                                                NodeId P2) const {
+  NodeId Candidate;
+  double HighestSimilarity = 0.0;
+  for (NodeId Id2 : T2.getNode(P2).Children) {
+    if (!isMatchingPossible(Id1, Id2))
+      continue;
+    if (M.hasDst(Id2))
+      continue;
+    double Similarity = getJaccardSimilarity(M, Id1, Id2);
+    Similarity += getNodeSimilarity(M, Id1, Id2);
+    if (Similarity >= Options.MinSimilarity && Similarity > HighestSimilarity) {
+      HighestSimilarity = Similarity;
+      Candidate = Id2;
+    }
+  }
+  return Candidate;
+}
+
 void ASTDiff::Impl::matchBottomUp(Mapping &M) const {
   std::vector<NodeId> Postorder = getSubtreePostorder(T1, T1.getRootId());
   for (NodeId Id1 : Postorder) {
@@ -871,6 +915,22 @@
   }
 }
 
+void ASTDiff::Impl::matchChildren(Mapping &M) {
+  for (NodeId Id1 : T1) {
+    NodeId P1 = T1.getNode(Id1).Parent;
+    if (P1.isInvalid() || !M.hasSrc(P1))
+      continue;
+    if (M.hasSrc(Id1))
+      continue;
+    NodeId P2 = M.getDst(P1);
+    NodeId Id2 = findCandidateFromChildren(M, Id1, P2);
+    if (Id2.isValid()) {
+      M.link(Id1, Id2);
+      addOptimalMapping(M, Id1, Id2);
+    }
+  }
+}
+
 Mapping ASTDiff::Impl::matchTopDown() const {
   PriorityList L1(T1);
   PriorityList L2(T2);
@@ -928,6 +988,9 @@
   if (Options.StopAfterTopDown)
     return;
   matchBottomUp(TheMapping);
+  if (Options.StopAfterBottomUp)
+    return;
+  matchChildren(TheMapping);
 }
 
 void ASTDiff::Impl::computeChangeKinds(Mapping &M) {
Index: include/clang/Tooling/ASTDiff/ASTDiff.h
===================================================================
--- include/clang/Tooling/ASTDiff/ASTDiff.h
+++ include/clang/Tooling/ASTDiff/ASTDiff.h
@@ -114,6 +114,7 @@
   int MaxSize = 100;
 
   bool StopAfterTopDown = false;
+  bool StopAfterBottomUp = false;
 
   /// Returns false if the nodes should never be matched.
   bool isMatchingAllowed(const Node &N1, const Node &N2) const {
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to