Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-26 Thread Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL276782: [analyzer] Add basic capabilities to detect source 
code clones. (authored by dergachev).

Changed prior to commit:
  https://reviews.llvm.org/D20795?vs=65506&id=65563#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D20795

Files:
  cfe/trunk/include/clang/Analysis/CloneDetection.h
  cfe/trunk/include/clang/StaticAnalyzer/Checkers/Checkers.td
  cfe/trunk/lib/Analysis/CMakeLists.txt
  cfe/trunk/lib/Analysis/CloneDetection.cpp
  cfe/trunk/lib/StaticAnalyzer/Checkers/CMakeLists.txt
  cfe/trunk/lib/StaticAnalyzer/Checkers/CloneChecker.cpp
  cfe/trunk/test/Analysis/copypaste/blocks.cpp
  cfe/trunk/test/Analysis/copypaste/false-positives.cpp
  cfe/trunk/test/Analysis/copypaste/functions.cpp
  cfe/trunk/test/Analysis/copypaste/objc-methods.m
  cfe/trunk/test/Analysis/copypaste/sub-sequences.cpp

Index: cfe/trunk/include/clang/StaticAnalyzer/Checkers/Checkers.td
===
--- cfe/trunk/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ cfe/trunk/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -77,6 +77,8 @@
 def LLVM : Package<"llvm">;
 def Debug : Package<"debug">;
 
+def CloneDetectionAlpha : Package<"clone">, InPackage, Hidden;
+
 //===--===//
 // Core Checkers.
 //===--===//
@@ -661,3 +663,17 @@
   DescFile<"DebugCheckers.cpp">;
 
 } // end "debug"
+
+
+//===--===//
+// Clone Detection
+//===--===//
+
+let ParentPackage = CloneDetectionAlpha in {
+
+def CloneChecker : Checker<"CloneChecker">,
+  HelpText<"Reports similar pieces of code.">,
+  DescFile<"CloneChecker.cpp">;
+
+} // end "clone"
+
Index: cfe/trunk/include/clang/Analysis/CloneDetection.h
===
--- cfe/trunk/include/clang/Analysis/CloneDetection.h
+++ cfe/trunk/include/clang/Analysis/CloneDetection.h
@@ -0,0 +1,235 @@
+//===--- CloneDetection.h - Finds code clones in an AST -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+///
+/// /file
+/// This file defines classes for searching and anlyzing source code clones.
+///
+//===--===//
+
+#ifndef LLVM_CLANG_AST_CLONEDETECTION_H
+#define LLVM_CLANG_AST_CLONEDETECTION_H
+
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/StringMap.h"
+
+#include 
+
+namespace clang {
+
+class Stmt;
+class Decl;
+class ASTContext;
+class CompoundStmt;
+
+/// \brief Identifies a list of statements.
+///
+/// Can either identify a single arbitrary Stmt object, a continuous sequence of
+/// child statements inside a CompoundStmt or no statements at all.
+class StmtSequence {
+  /// If this object identifies a sequence of statements inside a CompoundStmt,
+  /// S points to this CompoundStmt. If this object only identifies a single
+  /// Stmt, then S is a pointer to this Stmt.
+  const Stmt *S;
+
+  /// The related ASTContext for S.
+  ASTContext *Context;
+
+  /// If EndIndex is non-zero, then S is a CompoundStmt and this StmtSequence
+  /// instance is representing the CompoundStmt children inside the array
+  /// [StartIndex, EndIndex).
+  unsigned StartIndex;
+  unsigned EndIndex;
+
+public:
+  /// \brief Constructs a StmtSequence holding multiple statements.
+  ///
+  /// The resulting StmtSequence identifies a continuous sequence of statements
+  /// in the body of the given CompoundStmt. Which statements of the body should
+  /// be identified needs to be specified by providing a start and end index
+  /// that describe a non-empty sub-array in the body of the given CompoundStmt.
+  ///
+  /// \param Stmt A CompoundStmt that contains all statements in its body.
+  /// \param Context The ASTContext for the given CompoundStmt.
+  /// \param StartIndex The inclusive start index in the children array of
+  ///   \p Stmt
+  /// \param EndIndex The exclusive end index in the children array of \p Stmt.
+  StmtSequence(const CompoundStmt *Stmt, ASTContext &Context,
+   unsigned StartIndex, unsigned EndIndex);
+
+  /// \brief Constructs a StmtSequence holding a single statement.
+  ///
+  /// \param Stmt An arbitrary Stmt.
+  /// \param Context The ASTContext for the given Stmt.
+  StmtSequence(const Stmt *Stmt, ASTContext &Context);
+
+  /// \brief Constructs an empty StmtSequence.
+  StmtSequence();
+
+  typedef const Stmt *const *iterator;
+
+  /// Returns an iterator pointing to the 

Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-26 Thread Artem Dergachev via cfe-commits
NoQ accepted this revision.
NoQ added a reviewer: NoQ.
NoQ added a comment.

Great! Will commit.


https://reviews.llvm.org/D20795



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-26 Thread Raphael Isemann via cfe-commits
teemperor updated this revision to Diff 65506.
teemperor added a comment.

- Now passing ChildSignatures by const reference.


https://reviews.llvm.org/D20795

Files:
  include/clang/Analysis/CloneDetection.h
  include/clang/StaticAnalyzer/Checkers/Checkers.td
  lib/Analysis/CMakeLists.txt
  lib/Analysis/CloneDetection.cpp
  lib/StaticAnalyzer/Checkers/CMakeLists.txt
  lib/StaticAnalyzer/Checkers/CloneChecker.cpp
  test/Analysis/copypaste/blocks.cpp
  test/Analysis/copypaste/false-positives.cpp
  test/Analysis/copypaste/functions.cpp
  test/Analysis/copypaste/objc-methods.m
  test/Analysis/copypaste/sub-sequences.cpp

Index: test/Analysis/copypaste/sub-sequences.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/sub-sequences.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This tests if sub-sequences can match with normal sequences.
+
+void log2(int a);
+void log();
+
+int max(int a, int b) {
+  log2(a);
+  log(); // expected-warning{{Detected code clone.}}
+  if (a > b)
+return a;
+  return b;
+}
+
+int maxClone(int a, int b) {
+  log(); // expected-note{{Related code clone is here.}}
+  if (a > b)
+return a;
+  return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) { // no-warning
+  return a + b;
+}
Index: test/Analysis/copypaste/objc-methods.m
===
--- /dev/null
+++ test/Analysis/copypaste/objc-methods.m
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -analyze -Wno-objc-root-class -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This tests if we search for clones in Objective-C methods.
+
+@interface A
+- (int) setOk : (int) a : (int) b;
+@end
+
+@implementation A
+- (int) setOk : (int) a : (int) b {  // expected-warning{{Detected code clone.}}
+  if (a > b)
+return a;
+  return b;
+}
+@end
+
+@interface B
+- (int) setOk : (int) a : (int) b;
+@end
+
+@implementation B
+- (int) setOk : (int) a : (int) b { // expected-note{{Related code clone is here.}}
+  if (a > b)
+return a;
+  return b;
+}
+@end
Index: test/Analysis/copypaste/functions.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/functions.cpp
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This tests if we search for clones in functions.
+
+void log();
+
+int max(int a, int b) { // expected-warning{{Detected code clone.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+}
+
+int maxClone(int x, int y) { // expected-note{{Related code clone is here.}}
+  log();
+  if (x > y)
+return x;
+  return y;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) { // no-warning
+  return a + b;
+}
Index: test/Analysis/copypaste/false-positives.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/false-positives.cpp
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This test contains false-positive reports from the CloneChecker that need to
+// be fixed.
+
+void log();
+
+int max(int a, int b) { // expected-warning{{Detected code clone.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+}
+
+// FIXME: Detect different binary operator kinds.
+int min1(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (a < b)
+return a;
+  return b;
+}
+
+// FIXME: Detect different variable patterns.
+int min2(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (b > a)
+return a;
+  return b;
+}
Index: test/Analysis/copypaste/blocks.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/blocks.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -analyze -fblocks -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This tests if we search for clones in blocks.
+
+void log();
+
+auto BlockA = ^(int a, int b){ // expected-warning{{Detected code clone.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+};
+
+auto BlockB = ^(int a, int b){ // expected-note{{Related code clone is here.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+};
Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp
===
--- /dev/null
+++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp
@@ -0,0 +1,96 @@
+//===--- CloneChecker.cpp - Clone detection checker -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+//

Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-26 Thread Artem Dergachev via cfe-commits
NoQ added inline comments.


Comment at: lib/Analysis/CloneDetection.cpp:148
@@ +147,3 @@
+
+// FIXME: This function has quadratic runtime right now. Check if skipping
+// this function for too long CompoundStmts is an option.

I've a feeling this comment was forgotten :o


https://reviews.llvm.org/D20795



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-25 Thread Raphael Isemann via cfe-commits
teemperor marked 3 inline comments as done.
teemperor added a comment.

https://reviews.llvm.org/D20795



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-25 Thread Raphael Isemann via cfe-commits
teemperor added inline comments.


Comment at: lib/Analysis/CloneDetection.cpp:178
@@ +177,3 @@
+
+  bool VisitFunctionDecl(FunctionDecl *D) {
+// If we found a function, we start the clone search on its body statement.

NoQ wrote:
> You'd probably want to add `ObjCMethodDecl` and `BlockDecl`, because they 
> aren't sub-classes of `FunctionDecl` (and probably even tests for that).
> 
> Because this part of the code essentially re-implements `AnalysisConsumer`, 
> (a `RecursiveASTVisitor` that tells us what code bodies to analyze with the 
> static analyzer - i should've looked there earlier!!).
> 
> Alternatively, you can just rely on `AnalysisConsumer`, which would eliminate 
> the recursive visitor completely: {F2205103} This way you'd be analyzing 
> exactly the same code bodies that the analyzer itself would analyze; if you'd 
> want to extend to various declarations, you'd be able to do that by 
> subscribing on `check::ASTDecl`. But i dare not to predict what kind of 
> different flexibility you'd need next.
Thanks for the patch!


https://reviews.llvm.org/D20795



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-25 Thread Raphael Isemann via cfe-commits
teemperor updated this revision to Diff 65441.
teemperor added a comment.

- The CloneGroup values in StringMap no longer store a copy of their own key.


https://reviews.llvm.org/D20795

Files:
  include/clang/Analysis/CloneDetection.h
  include/clang/StaticAnalyzer/Checkers/Checkers.td
  lib/Analysis/CMakeLists.txt
  lib/Analysis/CloneDetection.cpp
  lib/StaticAnalyzer/Checkers/CMakeLists.txt
  lib/StaticAnalyzer/Checkers/CloneChecker.cpp
  test/Analysis/copypaste/blocks.cpp
  test/Analysis/copypaste/false-positives.cpp
  test/Analysis/copypaste/functions.cpp
  test/Analysis/copypaste/objc-methods.m
  test/Analysis/copypaste/sub-sequences.cpp

Index: test/Analysis/copypaste/sub-sequences.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/sub-sequences.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This tests if sub-sequences can match with normal sequences.
+
+void log2(int a);
+void log();
+
+int max(int a, int b) {
+  log2(a);
+  log(); // expected-warning{{Detected code clone.}}
+  if (a > b)
+return a;
+  return b;
+}
+
+int maxClone(int a, int b) {
+  log(); // expected-note{{Related code clone is here.}}
+  if (a > b)
+return a;
+  return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) { // no-warning
+  return a + b;
+}
Index: test/Analysis/copypaste/objc-methods.m
===
--- /dev/null
+++ test/Analysis/copypaste/objc-methods.m
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -analyze -Wno-objc-root-class -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This tests if we search for clones in Objective-C methods.
+
+@interface A
+- (int) setOk : (int) a : (int) b;
+@end
+
+@implementation A
+- (int) setOk : (int) a : (int) b {  // expected-warning{{Detected code clone.}}
+  if (a > b)
+return a;
+  return b;
+}
+@end
+
+@interface B
+- (int) setOk : (int) a : (int) b;
+@end
+
+@implementation B
+- (int) setOk : (int) a : (int) b { // expected-note{{Related code clone is here.}}
+  if (a > b)
+return a;
+  return b;
+}
+@end
Index: test/Analysis/copypaste/functions.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/functions.cpp
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This tests if we search for clones in functions.
+
+void log();
+
+int max(int a, int b) { // expected-warning{{Detected code clone.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+}
+
+int maxClone(int x, int y) { // expected-note{{Related code clone is here.}}
+  log();
+  if (x > y)
+return x;
+  return y;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) { // no-warning
+  return a + b;
+}
Index: test/Analysis/copypaste/false-positives.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/false-positives.cpp
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This test contains false-positive reports from the CloneChecker that need to
+// be fixed.
+
+void log();
+
+int max(int a, int b) { // expected-warning{{Detected code clone.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+}
+
+// FIXME: Detect different binary operator kinds.
+int min1(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (a < b)
+return a;
+  return b;
+}
+
+// FIXME: Detect different variable patterns.
+int min2(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (b > a)
+return a;
+  return b;
+}
Index: test/Analysis/copypaste/blocks.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/blocks.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -analyze -fblocks -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This tests if we search for clones in blocks.
+
+void log();
+
+auto BlockA = ^(int a, int b){ // expected-warning{{Detected code clone.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+};
+
+auto BlockB = ^(int a, int b){ // expected-note{{Related code clone is here.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+};
Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp
===
--- /dev/null
+++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp
@@ -0,0 +1,96 @@
+//===--- CloneChecker.cpp - Clone detection checker -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---

Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-25 Thread Raphael Isemann via cfe-commits
teemperor updated this revision to Diff 65439.
teemperor marked 7 inline comments as done.
teemperor added a comment.

- Fixed the minor problems as pointed out by Artem.
- Now using AnalysisConsumer instead of RecursiveASTVisitor.
- Function names are now confirming to LLVM code-style.
- Renamed DataCollectVisitor to CloneSignatureGenerator (as it's no longer 
following the Visitor pattern).
- Added tests for clone detection in ObjC-methods and blocks.
- Moved false-positive tests into own file.


https://reviews.llvm.org/D20795

Files:
  include/clang/Analysis/CloneDetection.h
  include/clang/StaticAnalyzer/Checkers/Checkers.td
  lib/Analysis/CMakeLists.txt
  lib/Analysis/CloneDetection.cpp
  lib/StaticAnalyzer/Checkers/CMakeLists.txt
  lib/StaticAnalyzer/Checkers/CloneChecker.cpp
  test/Analysis/copypaste/blocks.cpp
  test/Analysis/copypaste/false-positives.cpp
  test/Analysis/copypaste/functions.cpp
  test/Analysis/copypaste/objc-methods.m
  test/Analysis/copypaste/sub-sequences.cpp

Index: test/Analysis/copypaste/sub-sequences.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/sub-sequences.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This tests if sub-sequences can match with normal sequences.
+
+void log2(int a);
+void log();
+
+int max(int a, int b) {
+  log2(a);
+  log(); // expected-warning{{Detected code clone.}}
+  if (a > b)
+return a;
+  return b;
+}
+
+int maxClone(int a, int b) {
+  log(); // expected-note{{Related code clone is here.}}
+  if (a > b)
+return a;
+  return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) { // no-warning
+  return a + b;
+}
Index: test/Analysis/copypaste/objc-methods.m
===
--- /dev/null
+++ test/Analysis/copypaste/objc-methods.m
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -analyze -Wno-objc-root-class -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This tests if we search for clones in Objective-C methods.
+
+@interface A
+- (int) setOk : (int) a : (int) b;
+@end
+
+@implementation A
+- (int) setOk : (int) a : (int) b {  // expected-warning{{Detected code clone.}}
+  if (a > b)
+return a;
+  return b;
+}
+@end
+
+@interface B
+- (int) setOk : (int) a : (int) b;
+@end
+
+@implementation B
+- (int) setOk : (int) a : (int) b { // expected-note{{Related code clone is here.}}
+  if (a > b)
+return a;
+  return b;
+}
+@end
Index: test/Analysis/copypaste/functions.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/functions.cpp
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This tests if we search for clones in functions.
+
+void log();
+
+int max(int a, int b) { // expected-warning{{Detected code clone.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+}
+
+int maxClone(int x, int y) { // expected-note{{Related code clone is here.}}
+  log();
+  if (x > y)
+return x;
+  return y;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) { // no-warning
+  return a + b;
+}
Index: test/Analysis/copypaste/false-positives.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/false-positives.cpp
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This test contains false-positive reports from the CloneChecker that need to
+// be fixed.
+
+void log();
+
+int max(int a, int b) { // expected-warning{{Detected code clone.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+}
+
+// FIXME: Detect different binary operator kinds.
+int min1(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (a < b)
+return a;
+  return b;
+}
+
+// FIXME: Detect different variable patterns.
+int min2(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (b > a)
+return a;
+  return b;
+}
Index: test/Analysis/copypaste/blocks.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/blocks.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -analyze -fblocks -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// This tests if we search for clones in blocks.
+
+void log();
+
+auto BlockA = ^(int a, int b){ // expected-warning{{Detected code clone.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+};
+
+auto BlockB = ^(int a, int b){ // expected-note{{Related code clone is here.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+};
Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp
===
--- /dev/null
+++ lib/StaticAnalyzer/Checkers/CloneChec

Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-25 Thread Artem Dergachev via cfe-commits
NoQ added a comment.

The idea with strings as keys is curious! I've got a few more minor comments :)



Comment at: lib/Analysis/CloneDetection.cpp:104
@@ +103,3 @@
+// Storage for the signatures of the direct child statements. This is only
+// needed if the current statemnt is a CompoundStmt.
+std::vector ChildSignatures;

Typo: statement.


Comment at: lib/Analysis/CloneDetection.cpp:147
@@ +146,3 @@
+  const CompoundStmt *CS,
+  std::vector ChildSignatures) {
+

Maybe pass `ChildSignatures` by reference?


Comment at: lib/Analysis/CloneDetection.cpp:178
@@ +177,3 @@
+
+  bool VisitFunctionDecl(FunctionDecl *D) {
+// If we found a function, we start the clone search on its body statement.

You'd probably want to add `ObjCMethodDecl` and `BlockDecl`, because they 
aren't sub-classes of `FunctionDecl` (and probably even tests for that).

Because this part of the code essentially re-implements `AnalysisConsumer`, (a 
`RecursiveASTVisitor` that tells us what code bodies to analyze with the static 
analyzer - i should've looked there earlier!!).

Alternatively, you can just rely on `AnalysisConsumer`, which would eliminate 
the recursive visitor completely: {F2205103} This way you'd be analyzing 
exactly the same code bodies that the analyzer itself would analyze; if you'd 
want to extend to various declarations, you'd be able to do that by subscribing 
on `check::ASTDecl`. But i dare not to predict what kind of different 
flexibility you'd need next.


Comment at: lib/Analysis/CloneDetection.cpp:188
@@ +187,3 @@
+void CloneDetector::analyzeTranslationUnit(TranslationUnitDecl *TU) {
+  DataCollectVisitor visitor(*this, TU->getASTContext());
+  visitor.TraverseDecl(TU);

Micro-nit: Capitalize 'V' in 'visitor'?


https://reviews.llvm.org/D20795



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-24 Thread Raphael Isemann via cfe-commits
teemperor marked 9 inline comments as done.


Comment at: include/clang/AST/CloneDetection.h:149
@@ +148,3 @@
+/// (e.g. function bodies). Other clones (e.g. cloned comments or declarations)
+/// are not supported.
+///

Put the idea on the future TODO list, but we probably can't reuse a big part of 
the current infrastructure for it.


https://reviews.llvm.org/D20795



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-24 Thread Raphael Isemann via cfe-commits
teemperor updated this revision to Diff 65291.
teemperor added a comment.

Ok, so I think I've addressed the points from last meeting in this patch:

It was planned to replace custom hashing with FoldingSetNodeID and a hashmap: 
In this patch I removed all custom hashing. It's now done via LLVM's StringMap 
and strings which brings the same functionality. Using FoldingSetNodeID is not 
possible because it hides it's internal data vector which StringMap doesn't 
support, so I've just used a normal vector instead.

It was also planned to refactor CloneSignatureCache: In this patch I've 
completely removed the cache because it is no longer necessary. Fixing it was 
not an option because we assumed that the statements RecursiveASTVisitor visits 
are the same that Stmt::children() delivers, which is for example for 
LambdaExprs not true (and RecursiveASTVisitor makes no promise about that in 
it's API). The only sensible fix for this was to use Artem's initial suggestion 
to not use RecursiveASTVisitor to traverse statements.

I've also adressed Artems and Vassils comments regarding documentation and code 
style in this patch (Thanks!).


https://reviews.llvm.org/D20795

Files:
  include/clang/Analysis/CloneDetection.h
  include/clang/StaticAnalyzer/Checkers/Checkers.td
  lib/Analysis/CMakeLists.txt
  lib/Analysis/CloneDetection.cpp
  lib/StaticAnalyzer/Checkers/CMakeLists.txt
  lib/StaticAnalyzer/Checkers/CloneChecker.cpp
  test/Analysis/copypaste/test-min-max.cpp
  test/Analysis/copypaste/test-sub-sequences.cpp

Index: test/Analysis/copypaste/test-sub-sequences.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/test-sub-sequences.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// We test if sub-sequences can match with normal sequences containing only
+// a single statement.
+
+void log2(int a);
+void log();
+
+int max(int a, int b) {
+  log2(a);
+  log(); // expected-warning{{Detected code clone.}}
+  if (a > b)
+return a;
+  return b;
+}
+
+int maxClone(int a, int b) {
+  log(); // expected-note{{Related code clone is here.}}
+  if (a > b)
+return a;
+  return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) { // no-warning
+  return a + b;
+}
Index: test/Analysis/copypaste/test-min-max.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/test-min-max.cpp
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+void log();
+
+int max(int a, int b) { // expected-warning{{Detected code clone.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+}
+
+int maxClone(int x, int y) { // expected-note{{Related code clone is here.}}
+  log();
+  if (x > y)
+return x;
+  return y;
+}
+
+// False positives below. These clones should not be reported.
+
+// FIXME: Detect different binary operator kinds.
+int min1(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (a < b)
+return a;
+  return b;
+}
+
+// FIXME: Detect different variable patterns.
+int min2(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (b > a)
+return a;
+  return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) { // no-warning
+  return a + b;
+}
Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp
===
--- /dev/null
+++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp
@@ -0,0 +1,85 @@
+//===--- CloneChecker.cpp - Clone detection checker -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+///
+/// \file
+/// CloneChecker is a checker that reports clones in the current translation
+/// unit.
+///
+//===--===//
+
+#include "ClangSACheckers.h"
+#include "clang/Analysis/CloneDetection.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+class CloneChecker : public Checker {
+
+public:
+  void checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
+ AnalysisManager &Mgr, BugReporter &BR) const;
+};
+} // end anonymous namespace
+
+void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
+ AnalysisManager &Mgr,
+ BugR

Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-22 Thread Vassil Vassilev via cfe-commits
v.g.vassilev accepted this revision.
v.g.vassilev added a comment.
This revision is now accepted and ready to land.

LGTM, given the comments are addressed.



Comment at: include/clang/Analysis/CloneDetection.h:37
@@ +36,3 @@
+  /// Stmt, then S is a pointer to this Stmt.
+  Stmt const *S;
+

It is more common in the codebase to use Did you mean `const Stmt *S`. Could 
you update all similar occurrences to it?


Comment at: lib/Analysis/CloneDetection.cpp:119
@@ +118,3 @@
+  /// \brief Retrieves the CloneSignature that describes the given Sequence.
+  CloneDetector::CloneSignature get(StmtSequence const &S) const {
+// This cache promises good lookup time for recently added CloneSignatures

Same here `const StmtSequence &S` is the common style in the codebase.


https://reviews.llvm.org/D20795



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-22 Thread Artem Dergachev via cfe-commits
NoQ added a comment.

Regarding the cache stack - it feels easier for me to allocate a separate stack 
for each statement, and put the stack on stack (!) rather than having it 
global. This way it'd be automatically cleaned for you when VisitStmt() exits, 
and you'd be able to address child cache by index rather then through scanning.



Comment at: lib/Analysis/CloneDetection.cpp:142
@@ +141,3 @@
+/// Afterwards, the hash values of the children are calculated into the
+/// computed hash value.
+class HashVisitor : public RecursiveASTVisitor {

> This calculation happens in linear time and each statement is only visited a
> fixed amount of times during this process.

Isn't `SaveAllSubSequences()` quadratic?


Comment at: lib/Analysis/CloneDetection.cpp:424
@@ +423,3 @@
+  // remove any duplicates.
+  std::sort(IndexesToRemove.begin(), IndexesToRemove.end(),
+std::greater());

I suspect that all //i//'s that make it into the `IndexesToRemove` vector are 
already unique and increasing, by construction. We take some //i//, see if we 
need to remove it, then push it at most once and take (//i// + 1).


https://reviews.llvm.org/D20795



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-13 Thread Raphael Isemann via cfe-commits
teemperor added inline comments.


Comment at: include/clang/AST/CloneDetection.h:131
@@ +130,3 @@
+  bool operator<(const StmtSequence &Other) const {
+return std::tie(S, StartIndex, EndIndex) <
+   std::tie(Other.S, Other.StartIndex, Other.EndIndex);

zaks.anna wrote:
> We are comparing the Stmt pointers here. Does this make results 
> non-deterministic?
> 
Good point. The updated patch only sorts by deterministic hash values which 
should fix this.


Comment at: lib/AST/CMakeLists.txt:10
@@ -9,3 +9,3 @@
   ASTImporter.cpp
   ASTTypeTraits.cpp
   AttrImpl.cpp

zaks.anna wrote:
> Again, I do not think this should be in the AST. Who the users of this will 
> be? If you envision users outside of Clang Static Analyzer, maybe we could 
> add this to lib/Analysis/.
Oh, sorry! I wanted to bring this up in a meeting but it seems I've forgot to 
do so.

The point with this being in AST is that we might want to merge the 
Stmt::Profile implementation with the implementation of CloneDtection. And 
Stmt::Profile is a part of AST AFAIK, so I think we need to stay in the same 
library? I don't have a good overview over the clang build infrastructure yet, 
so I would appreciate opinions on that.

If that's something we will handle later, what would be a better place for the 
code?


Comment at: lib/AST/CloneDetection.cpp:103
@@ +102,3 @@
+/// \endcode
+class HashValue {
+  unsigned Value = 0;

zaks.anna wrote:
> This should probably be a utility instead of something specific to this 
> checker. Do we have something similar to this in LLVM already?
Yes, there is similar builtin hashing available in llvm::hashing, but the API 
is designed to work with iterator pairs. So we either have to record all the 
data in a vector and hash that (which is what FoldingSetNodeID does) or use 
code that isn't exposed in the API (llvm::hashing::detail::hash_state). The 
first option creates unnecessary overhead and the second option would require 
either patching LLVM or using code that isn't part of the public API.

I updated the patch with something that uses FoldingSetNodeID for now. I'll 
update it later if we can land a patch in LLVM that adds an API for hashing 
without recording all necessary data beforehand.


http://reviews.llvm.org/D20795



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-13 Thread Raphael Isemann via cfe-commits
teemperor updated this revision to Diff 63851.
teemperor marked 18 inline comments as done.
teemperor added a comment.

- Checker is now in the alpha package and hidden.
- MinGroupComplexity is now a parameter for the checker.
- StmtData is now called CloneSignature.
- Replaced the std::map inside CloneDetector with a vector and a small cache in 
the HashVisitor.
- Moved code from AST to Analysis.
- Fixed multiple other smaller problems pointed out in the review. (Thanks, 
Vassil, Anna and Artem!)


http://reviews.llvm.org/D20795

Files:
  include/clang/Analysis/CloneDetection.h
  include/clang/StaticAnalyzer/Checkers/Checkers.td
  lib/Analysis/CMakeLists.txt
  lib/Analysis/CloneDetection.cpp
  lib/StaticAnalyzer/Checkers/CMakeLists.txt
  lib/StaticAnalyzer/Checkers/CloneChecker.cpp
  test/Analysis/copypaste/test-min-max.cpp
  test/Analysis/copypaste/test-sub-sequences.cpp

Index: test/Analysis/copypaste/test-sub-sequences.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/test-sub-sequences.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+// We test if sub-sequences can match with normal sequences containing only
+// a single statement.
+
+void log2(int a);
+void log();
+
+int max(int a, int b) {
+  log2(a);
+  log(); // expected-warning{{Detected code clone.}}
+  if (a > b)
+return a;
+  return b;
+}
+
+int maxClone(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) { // no-warning
+  return a + b;
+}
Index: test/Analysis/copypaste/test-min-max.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/test-min-max.cpp
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s
+
+void log();
+
+int max(int a, int b) { // expected-warning{{Detected code clone.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+}
+
+int maxClone(int x, int y) { // expected-note{{Related code clone is here.}}
+  log();
+  if (x > y)
+return x;
+  return y;
+}
+
+// False positives below. These clones should not be reported.
+
+// FIXME: Detect different binary operator kinds.
+int min1(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (a < b)
+return a;
+  return b;
+}
+
+// FIXME: Detect different variable patterns.
+int min2(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (b > a)
+return a;
+  return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) { // no-warning
+  return a + b;
+}
Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp
===
--- /dev/null
+++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp
@@ -0,0 +1,80 @@
+//===--- CloneChecker.cpp - Clone detection checker -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+///
+/// \file
+/// CloneChecker is a checker that reports clones in the current translation
+/// unit.
+///
+//===--===//
+
+#include "ClangSACheckers.h"
+#include "clang/Analysis/CloneDetection.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+class CloneChecker : public Checker {
+
+public:
+  void checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
+ AnalysisManager &Mgr, BugReporter &BR) const;
+};
+} // end anonymous namespace
+
+void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
+ AnalysisManager &Mgr,
+ BugReporter &BR) const {
+
+  int MinComplexity = Mgr.getAnalyzerOptions().getOptionAsInteger(
+  "MinimumCloneComplexity", 10, this);
+
+  assert(MinComplexity >= 0);
+
+  SourceManager &SM = BR.getSourceManager();
+
+  CloneDetector CloneDetector;
+  CloneDetector.analyzeTranslationUnit(const_cast(TU));
+
+  std::vector CloneGroups;
+  CloneDetector.findClones(CloneGroups, MinComplexity);
+
+  DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic();
+
+  unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning,
+   "Detected code clone.");
+
+  unsigned NoteID = DiagEngine.getCustomDi

Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-11 Thread Artem Dergachev via cfe-commits
NoQ added a comment.

Your code is well-written and easy to understand, and makes me want to use it 
on my code! Added some minor comments, and there seems to be a small logic 
error in the compound statement hasher.

Not sure if that was already explained in detail, but i seem to agree that the 
only point for this project to integrate into Clang Static Analyzer is to 
integrate with the `BugReporter` facility - at least optionally: it would allow 
the reports of the clone detector checker to be gathered and viewed in a manner 
similar to other checkers - i.e. through scan-build/scan-view, probably 
CodeChecker and other tools. That should make the check easier to run and 
attract more users. However, the `BugReporter` mechanism is tweaked for the 
analyzer's path-sensitive checkers, so it'd need to be modified to suit your 
purpose, so in my opinion this is not critical for the initial commit.



Comment at: lib/AST/CloneDetection.cpp:52
@@ +51,3 @@
+  Other.getEndLoc() == getEndLoc();
+  return StartIsInBounds && EndIsInBounds;
+}

Perhaps we could early-return when we see that `StartIsInBounds` is false (for 
performance, because `isBeforeInTranslationUnit` looks heavy).


Comment at: lib/AST/CloneDetection.cpp:88
@@ +87,3 @@
+/// It is defined as:
+///   H(D) = PrimeFactor^(N-1) * D[0] + PrimeFactor^(N-2) * D[1] + ... + D[N]
+/// where

It seems that the last index is off-by-one, i think you meant something like:

```
H(D) = PrimeFactor^(N-1) * D[0] + PrimeFactor^(N-2) * D[1] + ... + D[N-1]
```


Comment at: lib/AST/CloneDetection.cpp:175
@@ +174,3 @@
+  ++StartIterator;
+auto EndIterator = Iterator;
+for (unsigned I = 0; I < Length; ++I)

Shouldn't it say something like "`EndIterator = StartIterator`"? Because when 
`StartPos` is non-zero, in your code we get [`StartPos`, `Length`) instead of 
[`StartPos`, `StartPos` + `Length`). In your code, `Length` acts as some kind 
of `EndPos` instead.


Comment at: lib/AST/CloneDetection.cpp:194
@@ +193,3 @@
+  } else {
+StmtSequence ChildSequence(StmtSequence(Child, Context));
+

Simplifies to:

```
StmtSequence ChildSequence(Child, Context);
```


Comment at: lib/AST/CloneDetection.cpp:238
@@ +237,3 @@
+  // the CompoundStmt.
+  auto CS = dyn_cast(CurrentStmt.front());
+  if (!CS)

I think that the code that deals with //computing// data for sections of 
compound statements (as opposed to //saving// this data) should be moved to 
`VisitStmt()`. Or, even better, to `VisitCompoundStmt()` - that's the whole 
point of having a visitor, after all. Upon expanding the complexity of the 
hash, you'd probably provide more special cases for special statement classes, 
which would all sit in their own Visit method.

That's for the future though, not really critical.


Comment at: lib/AST/CloneDetection.cpp:262
@@ +261,3 @@
+
+  CloneDetector::StmtData SubData(SubHash.getValue(), SubComplexity);
+  CD.add(Sequence, SubData);

This code is duplicated from the beginning of the function, which synergizes 
with my point above: if `SaveData()` contained only that much, then it could 
have been re-used on both call sites.


Comment at: lib/StaticAnalyzer/Checkers/CloneChecker.cpp:40
@@ +39,3 @@
+  CloneDetector.AnalyzeTranslationUnitDecl(
+  TU->getASTContext().getTranslationUnitDecl());
+

`TU->getASTContext().getTranslationUnitDecl()` is always equal to `TU` - there 
should, in most cases, be only one TranslationUnitDecl object around, but even 
if there'd be more some day, i think this invariant would still hold. 


Comment at: lib/StaticAnalyzer/Checkers/CloneChecker.cpp:43
@@ +42,3 @@
+  std::vector CloneGroups;
+  CloneDetector.findClones(CloneGroups);
+

An idea: because this function optionally accepts `MinGroupComplexity`, you may 
probably want to expose this feature as a //checker-specific option// - see 
`MallocChecker::IsOptimistic` as an example, shouldn't be hard.


Comment at: lib/StaticAnalyzer/Checkers/CloneChecker.cpp:45
@@ +44,3 @@
+
+  DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic();
+

zaks.anna wrote:
> Is it possible to report these through BugReporter?
I think that'd require changes in the BugReporter to allow easily adding extra 
pieces to path-insensitive reports (which is quite a wanted feature in many 
AST-based checkers - assuming we want more AST-based checkers to be moved into 
the analyzer specifically for better reporting, integration with scan-build, 
and stuff like that).


Comment at: test/Analysis/copypaste/test-min-max.cpp:39
@@ +38,3 @@
+
+int foo(int a, int b) {
+  return a + b;

Perhaps add a `// no-warning` comment to spots that should not cause a warning? 
This comment has no

Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-11 Thread Anna Zaks via cfe-commits
zaks.anna added a comment.

Hi,

Thank you for working on this!

Here are several comments from a **partial** review.

Can you talk a bit about how do you envision to generalize this to copy and 
paste detection, where the pasted code is only partially modified? Do you think 
this could be generalized in that way?



Comment at: include/clang/AST/CloneDetection.h:20
@@ +19,3 @@
+
+#include 
+#include 

Please, check if there are better data structures you could be using: 
http://llvm.org/docs/ProgrammersManual.html#map-like-containers-std-map-densemap-etc


Comment at: include/clang/AST/CloneDetection.h:131
@@ +130,3 @@
+  bool operator<(const StmtSequence &Other) const {
+return std::tie(S, StartIndex, EndIndex) <
+   std::tie(Other.S, Other.StartIndex, Other.EndIndex);

We are comparing the Stmt pointers here. Does this make results 
non-deterministic?



Comment at: include/clang/AST/CloneDetection.h:145
@@ +144,3 @@
+///
+/// This class first needs to be provided with (possibly multiple) translation
+/// units by passing them to \p AnalyzeTranslationUnitDecl .

Let's not talk about multiple TU here since much more is needed to support 
them. Maybe we should just say that, first, given a TU, the class generates the 
clone identifiers (hashes?) of statements in it (by running 
AnalyzeTranslationUnitDecl)?

The description of the method already states that it can be called multiple 
times.


Comment at: include/clang/AST/CloneDetection.h:161
@@ +160,3 @@
+  /// Holds the generated data for a StmtSequence
+  struct StmtData {
+/// The generated hash value.

StmtData is too generic of a name. Maybe it could be called something like code 
signature, clone signature, or code hash?


Comment at: include/clang/AST/CloneDetection.h:166
@@ +165,3 @@
+///
+/// This scalar values serves as a simple way of filtering clones
+/// that are too small to be reported. A greater value indicates that the

serves -> serve


Comment at: include/clang/AST/CloneDetection.h:186
@@ +185,3 @@
+  /// \param S The given StmtSequence
+  /// \param Error Output parameter that is set to true if no data was 
generated
+  ///for the given StmtSequence. This can happen if the given

Could this return an llvm Optional instead of the error code?


Comment at: include/clang/AST/CloneDetection.h:201
@@ +200,3 @@
+  /// \brief Stores the StmtSequence with its associated data in the search
+  ///database of this CloneDetector.
+  ///

database -> map?

I would just say "Stores the StmtSequence with its associated data to allow 
future querying."


Comment at: include/clang/AST/CloneDetection.h:209
@@ +208,3 @@
+
+  /// \brief Searches all translation units in this CloneDetector for clones.
+  ///

Please, do not talk about "all translation units" here. Maybe say something 
like previously seen/stored statements/code or something like that.


Comment at: include/clang/AST/CloneDetection.h:217
@@ +216,3 @@
+  void findClones(std::vector &Result,
+  unsigned MinGroupComplexity = 10);
+

Make 10 into a constant. It probably should be a checker parameter. 


Comment at: include/clang/StaticAnalyzer/Checkers/Checkers.td:668
@@ +667,3 @@
+
+let ParentPackage = CloneDetection in {
+

This should be an "alpha" package until it's considered to be ready for end 
users.


Comment at: lib/AST/CMakeLists.txt:10
@@ -9,3 +9,3 @@
   ASTImporter.cpp
   ASTTypeTraits.cpp
   AttrImpl.cpp

Again, I do not think this should be in the AST. Who the users of this will be? 
If you envision users outside of Clang Static Analyzer, maybe we could add this 
to lib/Analysis/.


Comment at: lib/AST/CloneDetection.cpp:56
@@ +55,3 @@
+StmtSequence::iterator StmtSequence::begin() const {
+  if (holdsSequence()) {
+auto CS = cast(S);

http://llvm.org/docs/CodingStandards.html#use-early-exits-and-continue-to-simplify-code


Comment at: lib/AST/CloneDetection.cpp:103
@@ +102,3 @@
+/// \endcode
+class HashValue {
+  unsigned Value = 0;

This should probably be a utility instead of something specific to this 
checker. Do we have something similar to this in LLVM already?


Comment at: lib/StaticAnalyzer/Checkers/CloneChecker.cpp:45
@@ +44,3 @@
+
+  DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic();
+

Is it possible to report these through BugReporter?


http://reviews.llvm.org/D20795



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-08 Thread Raphael Isemann via cfe-commits
teemperor updated this revision to Diff 63266.
teemperor added a comment.

- StmtSequence is now treating all Stmt objects as const.


http://reviews.llvm.org/D20795

Files:
  include/clang/AST/CloneDetection.h
  include/clang/StaticAnalyzer/Checkers/Checkers.td
  lib/AST/CMakeLists.txt
  lib/AST/CloneDetection.cpp
  lib/StaticAnalyzer/Checkers/CMakeLists.txt
  lib/StaticAnalyzer/Checkers/CloneChecker.cpp
  test/Analysis/copypaste/test-min-max.cpp
  test/Analysis/copypaste/test-sub-sequences.cpp

Index: test/Analysis/copypaste/test-sub-sequences.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/test-sub-sequences.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s
+
+// We test if sub-sequences can match with normal sequences containing only
+// a single statement.
+
+void log2(int a);
+void log();
+
+int max(int a, int b) {
+  log2(a);
+  log(); // expected-warning{{Detected code clone.}}
+  if (a > b)
+return a;
+  return b;
+}
+
+int maxClone(int a, int b) {
+  log(); // expected-note{{Related code clone is here.}}
+  if (a > b)
+return a;
+  return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) {
+  return a + b;
+}
Index: test/Analysis/copypaste/test-min-max.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/test-min-max.cpp
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s
+
+void log();
+
+int max(int a, int b) { // expected-warning{{Detected code clone.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+}
+
+int maxClone(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+}
+
+// False positives below. These clones should not be reported.
+
+// FIXME: Detect different binary operator kinds.
+int min1(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (a < b)
+return a;
+  return b;
+}
+
+// FIXME: Detect different variable patterns.
+int min2(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (b > a)
+return a;
+  return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) {
+  return a + b;
+}
Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp
===
--- /dev/null
+++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp
@@ -0,0 +1,67 @@
+//===--- CloneDetection.cpp - Clone detection checkers --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+///
+/// \file
+/// CloneChecker is a checker that reports clones in the current translation
+/// unit.
+///
+//===--===//
+
+#include "ClangSACheckers.h"
+#include "clang/AST/CloneDetection.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+class CloneChecker : public Checker {
+
+public:
+  void checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
+ AnalysisManager &Mgr, BugReporter &BR) const;
+};
+} // end anonymous namespace
+
+void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
+ AnalysisManager &Mgr,
+ BugReporter &BR) const {
+  CloneDetector CloneDetector;
+  CloneDetector.AnalyzeTranslationUnitDecl(
+  TU->getASTContext().getTranslationUnitDecl());
+
+  std::vector CloneGroups;
+  CloneDetector.findClones(CloneGroups);
+
+  DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic();
+
+  unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning,
+   "Detected code clone.");
+
+  unsigned NoteID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Note,
+   "Related code clone is here.");
+
+  for (CloneDetector::CloneGroup &Group : CloneGroups) {
+DiagEngine.Report(Group.front().getStartLoc(), WarnID);
+for (unsigned J = 1; J < Group.size(); ++J) {
+  DiagEngine.Report(Group[J].getStartLoc(), NoteID);
+}
+  }
+}
+
+//===--===//
+// Register CloneChecker
+//===--===//
+
+void ento::registerCloneChecker(CheckerManager &Mg

Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-08 Thread Raphael Isemann via cfe-commits
teemperor updated this revision to Diff 63262.
teemperor marked 7 inline comments as done.

http://reviews.llvm.org/D20795

Files:
  include/clang/AST/CloneDetection.h
  include/clang/StaticAnalyzer/Checkers/Checkers.td
  lib/AST/CMakeLists.txt
  lib/AST/CloneDetection.cpp
  lib/StaticAnalyzer/Checkers/CMakeLists.txt
  lib/StaticAnalyzer/Checkers/CloneChecker.cpp
  test/Analysis/copypaste/test-min-max.cpp
  test/Analysis/copypaste/test-sub-sequences.cpp

Index: test/Analysis/copypaste/test-sub-sequences.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/test-sub-sequences.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s
+
+// We test if sub-sequences can match with normal sequences containing only
+// a single statement.
+
+void log2(int a);
+void log();
+
+int max(int a, int b) {
+  log2(a);
+  log(); // expected-warning{{Detected code clone.}}
+  if (a > b)
+return a;
+  return b;
+}
+
+int maxClone(int a, int b) {
+  log(); // expected-note{{Related code clone is here.}}
+  if (a > b)
+return a;
+  return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) {
+  return a + b;
+}
Index: test/Analysis/copypaste/test-min-max.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/test-min-max.cpp
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s
+
+void log();
+
+int max(int a, int b) { // expected-warning{{Detected code clone.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+}
+
+int maxClone(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+}
+
+// False positives below. These clones should not be reported.
+
+// FIXME: Detect different binary operator kinds.
+int min1(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (a < b)
+return a;
+  return b;
+}
+
+// FIXME: Detect different variable patterns.
+int min2(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (b > a)
+return a;
+  return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) {
+  return a + b;
+}
Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp
===
--- /dev/null
+++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp
@@ -0,0 +1,67 @@
+//===--- CloneDetection.cpp - Clone detection checkers --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+///
+/// \file
+/// CloneChecker is a checker that reports clones in the current translation
+/// unit.
+///
+//===--===//
+
+#include "ClangSACheckers.h"
+#include "clang/AST/CloneDetection.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+class CloneChecker : public Checker {
+
+public:
+  void checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
+ AnalysisManager &Mgr, BugReporter &BR) const;
+};
+} // end anonymous namespace
+
+void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
+ AnalysisManager &Mgr,
+ BugReporter &BR) const {
+  CloneDetector CloneDetector;
+  CloneDetector.AnalyzeTranslationUnitDecl(
+  TU->getASTContext().getTranslationUnitDecl());
+
+  std::vector CloneGroups;
+  CloneDetector.findClones(CloneGroups);
+
+  DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic();
+
+  unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning,
+   "Detected code clone.");
+
+  unsigned NoteID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Note,
+   "Related code clone is here.");
+
+  for (CloneDetector::CloneGroup &Group : CloneGroups) {
+DiagEngine.Report(Group.front().getStartLoc(), WarnID);
+for (unsigned J = 1; J < Group.size(); ++J) {
+  DiagEngine.Report(Group[J].getStartLoc(), NoteID);
+}
+  }
+}
+
+//===--===//
+// Register CloneChecker
+//===--===//
+
+void ento::registerCloneChecker(CheckerManager &Mgr) {
+  Mgr.registerChecker();
+}
Index: li

Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-07 Thread Raphael Isemann via cfe-commits
teemperor updated this revision to Diff 63161.
teemperor marked 5 inline comments as done.
teemperor added a comment.

- Fixed type of StmtSequence::iterator.


http://reviews.llvm.org/D20795

Files:
  include/clang/AST/CloneDetection.h
  include/clang/StaticAnalyzer/Checkers/Checkers.td
  lib/AST/CMakeLists.txt
  lib/AST/CloneDetection.cpp
  lib/StaticAnalyzer/Checkers/CMakeLists.txt
  lib/StaticAnalyzer/Checkers/CloneChecker.cpp
  test/Analysis/copypaste/test-min-max.cpp
  test/Analysis/copypaste/test-sub-sequences.cpp

Index: test/Analysis/copypaste/test-sub-sequences.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/test-sub-sequences.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s
+
+// We test if sub-sequences can match with normal sequences containing only
+// a single statement.
+
+void log2(int a);
+void log();
+
+int max(int a, int b) {
+  log2(a);
+  log(); // expected-warning{{Detected code clone.}}
+  if (a > b)
+return a;
+  return b;
+}
+
+int maxClone(int a, int b) {
+  log(); // expected-note{{Related code clone is here.}}
+  if (a > b)
+return a;
+  return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) {
+  return a + b;
+}
Index: test/Analysis/copypaste/test-min-max.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/test-min-max.cpp
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s
+
+void log();
+
+int max(int a, int b) { // expected-warning{{Detected code clone.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+}
+
+int maxClone(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+}
+
+// False positives below. These clones should not be reported.
+
+// FIXME: Detect different binary operator kinds.
+int min1(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (a < b)
+return a;
+  return b;
+}
+
+// FIXME: Detect different variable patterns.
+int min2(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (b > a)
+return a;
+  return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) {
+  return a + b;
+}
Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp
===
--- /dev/null
+++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp
@@ -0,0 +1,67 @@
+//===--- CloneDetection.cpp - Clone detection checkers --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+///
+/// \file
+/// CloneChecker is a checker that reports clones in the current translation
+/// unit.
+///
+//===--===//
+
+#include "ClangSACheckers.h"
+#include "clang/AST/CloneDetection.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+class CloneChecker : public Checker {
+
+public:
+  void checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
+ AnalysisManager &Mgr, BugReporter &BR) const;
+};
+} // end anonymous namespace
+
+void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
+ AnalysisManager &Mgr,
+ BugReporter &BR) const {
+  CloneDetector CloneDetector;
+  CloneDetector.AnalyzeTranslationUnitDecl(
+  TU->getASTContext().getTranslationUnitDecl());
+
+  std::vector CloneGroups;
+  CloneDetector.findClones(CloneGroups);
+
+  DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic();
+
+  unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning,
+   "Detected code clone.");
+
+  unsigned NoteID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Note,
+   "Related code clone is here.");
+
+  for (CloneDetector::CloneGroup &Group : CloneGroups) {
+DiagEngine.Report(Group.front().getStartLoc(), WarnID);
+for (unsigned J = 1; J < Group.size(); ++J) {
+  DiagEngine.Report(Group[J].getStartLoc(), NoteID);
+}
+  }
+}
+
+//===--===//
+// Register CloneChecker
+//===--===//
+
+void ento::registerClone

Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-07 Thread Raphael Isemann via cfe-commits
teemperor updated this revision to Diff 63159.
teemperor added a comment.

- Fixed the problems pointed out by Vassil (Thanks!)
- Comments now have less line breaks.
- Added more documentation to HashValue, it's hash function and the used prime 
numbers.
- Added a typedef for StmtSequence::iterator.
- StmtSequence::end() is now always returning a correct iterator.


http://reviews.llvm.org/D20795

Files:
  include/clang/AST/CloneDetection.h
  include/clang/StaticAnalyzer/Checkers/Checkers.td
  lib/AST/CMakeLists.txt
  lib/AST/CloneDetection.cpp
  lib/StaticAnalyzer/Checkers/CMakeLists.txt
  lib/StaticAnalyzer/Checkers/CloneChecker.cpp
  test/Analysis/copypaste/test-min-max.cpp
  test/Analysis/copypaste/test-sub-sequences.cpp

Index: test/Analysis/copypaste/test-sub-sequences.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/test-sub-sequences.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s
+
+// We test if sub-sequences can match with normal sequences containing only
+// a single statement.
+
+void log2(int a);
+void log();
+
+int max(int a, int b) {
+  log2(a);
+  log(); // expected-warning{{Detected code clone.}}
+  if (a > b)
+return a;
+  return b;
+}
+
+int maxClone(int a, int b) {
+  log(); // expected-note{{Related code clone is here.}}
+  if (a > b)
+return a;
+  return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) {
+  return a + b;
+}
Index: test/Analysis/copypaste/test-min-max.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/test-min-max.cpp
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s
+
+void log();
+
+int max(int a, int b) { // expected-warning{{Detected code clone.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+}
+
+int maxClone(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+}
+
+// False positives below. These clones should not be reported.
+
+// FIXME: Detect different binary operator kinds.
+int min1(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (a < b)
+return a;
+  return b;
+}
+
+// FIXME: Detect different variable patterns.
+int min2(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (b > a)
+return a;
+  return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) {
+  return a + b;
+}
Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp
===
--- /dev/null
+++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp
@@ -0,0 +1,67 @@
+//===--- CloneDetection.cpp - Clone detection checkers --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+///
+/// \file
+/// CloneChecker is a checker that reports clones in the current translation
+/// unit.
+///
+//===--===//
+
+#include "ClangSACheckers.h"
+#include "clang/AST/CloneDetection.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+class CloneChecker : public Checker {
+
+public:
+  void checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
+ AnalysisManager &Mgr, BugReporter &BR) const;
+};
+} // end anonymous namespace
+
+void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
+ AnalysisManager &Mgr,
+ BugReporter &BR) const {
+  CloneDetector CloneDetector;
+  CloneDetector.AnalyzeTranslationUnitDecl(
+  TU->getASTContext().getTranslationUnitDecl());
+
+  std::vector CloneGroups;
+  CloneDetector.findClones(CloneGroups);
+
+  DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic();
+
+  unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning,
+   "Detected code clone.");
+
+  unsigned NoteID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Note,
+   "Related code clone is here.");
+
+  for (CloneDetector::CloneGroup &Group : CloneGroups) {
+DiagEngine.Report(Group.front().getStartLoc(), WarnID);
+for (unsigned J = 1; J < Group.size(); ++J) {
+  DiagEngine.Report(Group[J].getStartLoc(), NoteID);
+}
+  }
+}
+
+//===---

Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-07 Thread Vassil Vassilev via cfe-commits
v.g.vassilev requested changes to this revision.
This revision now requires changes to proceed.


Comment at: include/clang/AST/CloneDetection.h:148
@@ +147,3 @@
+/// This class only searches for clones in exectuable source code
+/// (e.g. function bodies). Other clones (e.g. cloned comments or declarations)
+/// are not supported.

It might be a good idea to detect cloned comments, too.


Comment at: include/clang/AST/CloneDetection.h:153
@@ +152,3 @@
+/// in the given statements.
+/// This is done by hashing all statements using a locality-sensitive hash
+/// function that generates identical hash values for similar statement

Can you move this on the previous line?


Comment at: include/clang/AST/CloneDetection.h:166
@@ +165,3 @@
+/// that are too small to be reported.
+/// A greater value indicates that the related StmtSequence is probably
+/// more interesting to the user.

This should go on prev line too. Please check all comments for this pattern.


Comment at: include/clang/AST/CloneDetection.h:215
@@ +214,3 @@
+  /// StmtSequences that were identified to be clones of each other.
+  std::vector findClones(unsigned MinGroupComplexity = 10);
+

We shouldn't copy (or hope this would be std::moved). Can you pass it as a 
reference to the argument list?


Comment at: lib/AST/CloneDetection.cpp:22
@@ +21,3 @@
+
+StmtSequence::StmtSequence(CompoundStmt *Stmt, ASTContext *Context,
+   unsigned StartIndex, unsigned EndIndex)

It is a very common pattern in clang the ASTContext to be passed as 
ASTContext&, this should simplify the body of the constructors.


Comment at: lib/AST/CloneDetection.cpp:45
@@ +44,3 @@
+  // surround the other sequence.
+  bool StartIsInBounds = Context->getSourceManager().isBeforeInTranslationUnit(
+ getStartLoc(), Other.getStartLoc()) ||

I'd factor out the Context->getSourceManager() in a local variable.


Comment at: lib/AST/CloneDetection.cpp:54
@@ +53,3 @@
+
+Stmt *const *StmtSequence::begin() const {
+  if (holdsSequence()) {

I am not sure what is the intent but shouldn't that be `Stmt const* const*`?


Comment at: lib/AST/CloneDetection.cpp:56
@@ +55,3 @@
+  if (holdsSequence()) {
+auto CS = static_cast(S);
+return CS->body_begin() + StartIndex;

Please use `auto CS = cast(S);`

This logic will crash on `void f() try {} catch(...){}` In this case we do not 
generate a CompoundStmt.


Comment at: lib/AST/CloneDetection.cpp:64
@@ +63,3 @@
+  if (holdsSequence()) {
+auto CS = static_cast(S);
+return CS->body_begin() + EndIndex;

Same as above.


Comment at: lib/AST/CloneDetection.cpp:84
@@ +83,3 @@
+  void addData(unsigned Data) {
+Value *= 53;
+Value += Data;

Still a floating magic constant. Could you factor it out in a variable with a 
meaningful name?


Comment at: lib/AST/CloneDetection.cpp:112
@@ +111,3 @@
+
+  // We need to traverse postorder over the AST for our algorithm
+  // to work as each parent expects that its children were already hashed.

Doxygen style /// ?


Comment at: lib/AST/CloneDetection.cpp:139
@@ +138,3 @@
+// Iterate over each child in the sub-sequence.
+for (auto I = StartIterator; I != EndIterator; ++I) {
+  Stmt *Child = *I;

Could we use a range-based for loop. This looks odd.


Comment at: lib/AST/CloneDetection.cpp:161
@@ +160,3 @@
+  void SaveData(StmtSequence CurrentStmt, HashValue Hash,
+   unsigned Complexity);
+

Indent.


Comment at: lib/AST/CloneDetection.cpp:164
@@ +163,3 @@
+  CloneDetector &CD;
+  ASTContext &Context;
+};

Could you move these members above. I don't think this is common in the 
codebase.


Comment at: lib/AST/CloneDetection.cpp:293
@@ +292,3 @@
+  // contains another group, we only need to return the bigger group.
+  for (unsigned I = 0; I < Result.size(); ++I) {
+for (unsigned J = 0; J < Result.size(); ++J) {

Capital letters are more common when naming iterators. What about small `i` and 
`j` here. And down you can use `I` and you won't need to break the line.


http://reviews.llvm.org/D20795



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-06 Thread Raphael Isemann via cfe-commits
teemperor updated this revision to Diff 62899.
teemperor added a comment.

- Fixed a few typos in comments and documentation.


http://reviews.llvm.org/D20795

Files:
  include/clang/AST/CloneDetection.h
  include/clang/StaticAnalyzer/Checkers/Checkers.td
  lib/AST/CMakeLists.txt
  lib/AST/CloneDetection.cpp
  lib/StaticAnalyzer/Checkers/CMakeLists.txt
  lib/StaticAnalyzer/Checkers/CloneChecker.cpp
  test/Analysis/copypaste/test-min-max.cpp
  test/Analysis/copypaste/test-sub-sequences.cpp

Index: test/Analysis/copypaste/test-sub-sequences.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/test-sub-sequences.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s
+
+// We test if sub-sequences can match with normal sequences containing only
+// a single statement.
+
+void log2(int a);
+void log();
+
+int max(int a, int b) {
+  log2(a);
+  log(); // expected-warning{{Detected code clone.}}
+  if (a > b)
+return a;
+  return b;
+}
+
+int maxClone(int a, int b) {
+  log(); // expected-note{{Related code clone is here.}}
+  if (a > b)
+return a;
+  return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) {
+  return a + b;
+}
Index: test/Analysis/copypaste/test-min-max.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/test-min-max.cpp
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s
+
+void log();
+
+int max(int a, int b) { // expected-warning{{Detected code clone.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+}
+
+int maxClone(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+}
+
+// False positives below. These clones should not be reported.
+
+// FIXME: Detect different binary operator kinds.
+int min1(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (a < b)
+return a;
+  return b;
+}
+
+// FIXME: Detect different variable patterns.
+int min2(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (b > a)
+return a;
+  return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) {
+  return a + b;
+}
Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp
===
--- /dev/null
+++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp
@@ -0,0 +1,67 @@
+//===--- CloneDetection.cpp - Clone detection checkers --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+///
+/// \file
+/// CloneChecker is a checker that reports clones in the current translation
+/// unit.
+///
+//===--===//
+
+#include "ClangSACheckers.h"
+#include "clang/AST/CloneDetection.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+class CloneChecker : public Checker {
+
+public:
+  void checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
+ AnalysisManager &Mgr, BugReporter &BR) const;
+};
+} // end anonymous namespace
+
+void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
+ AnalysisManager &Mgr,
+ BugReporter &BR) const {
+  CloneDetector CloneDetector;
+  CloneDetector.AnalyzeTranslationUnitDecl(
+  TU->getASTContext().getTranslationUnitDecl());
+
+  std::vector CloneGroups =
+  CloneDetector.findClones();
+
+  DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic();
+
+  unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning,
+   "Detected code clone.");
+
+  unsigned NoteID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Note,
+   "Related code clone is here.");
+
+  for (CloneDetector::CloneGroup &Group : CloneGroups) {
+DiagEngine.Report(Group.front().getStartLoc(), WarnID);
+for (unsigned J = 1; J < Group.size(); ++J) {
+  DiagEngine.Report(Group[J].getStartLoc(), NoteID);
+}
+  }
+}
+
+//===--===//
+// Register CloneChecker
+//===--===//
+
+void ento::registerCloneChecker(CheckerManager &Mgr) {
+  Mgr.r

Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-06 Thread Raphael Isemann via cfe-commits
teemperor updated this revision to Diff 62888.
teemperor added a comment.

- Using doxygen-style comments for all private members.


http://reviews.llvm.org/D20795

Files:
  include/clang/AST/CloneDetection.h
  include/clang/StaticAnalyzer/Checkers/Checkers.td
  lib/AST/CMakeLists.txt
  lib/AST/CloneDetection.cpp
  lib/StaticAnalyzer/Checkers/CMakeLists.txt
  lib/StaticAnalyzer/Checkers/CloneChecker.cpp
  test/Analysis/copypaste/test-min-max.cpp
  test/Analysis/copypaste/test-sub-sequences.cpp

Index: test/Analysis/copypaste/test-sub-sequences.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/test-sub-sequences.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s
+
+// We test if sub-sequences can match with normal sequences containing only
+// a single statement.
+
+void log2(int a);
+void log();
+
+int max(int a, int b) {
+  log2(a);
+  log(); // expected-warning{{Detected code clone.}}
+  if (a > b)
+return a;
+  return b;
+}
+
+int maxClone(int a, int b) {
+  log(); // expected-note{{Related code clone is here.}}
+  if (a > b)
+return a;
+  return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) {
+  return a + b;
+}
Index: test/Analysis/copypaste/test-min-max.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/test-min-max.cpp
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s
+
+void log();
+
+int max(int a, int b) { // expected-warning{{Detected code clone.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+}
+
+int maxClone(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+}
+
+// False positives below. These clones should not be reported.
+
+// FIXME: Detect different binary operator kinds.
+int min1(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (a < b)
+return a;
+  return b;
+}
+
+// FIXME: Detect different variable patterns.
+int min2(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (b > a)
+return a;
+  return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) {
+  return a + b;
+}
Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp
===
--- /dev/null
+++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp
@@ -0,0 +1,67 @@
+//===--- CloneDetection.cpp - Clone detection checkers --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+///
+/// \file
+/// CloneChecker is a checker that reports clones in the current translation
+/// unit.
+///
+//===--===//
+
+#include "ClangSACheckers.h"
+#include "clang/AST/CloneDetection.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+class CloneChecker : public Checker {
+
+public:
+  void checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
+ AnalysisManager &Mgr, BugReporter &BR) const;
+};
+} // end anonymous namespace
+
+void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
+ AnalysisManager &Mgr,
+ BugReporter &BR) const {
+  CloneDetector CloneDetector;
+  CloneDetector.AnalyzeTranslationUnitDecl(
+  TU->getASTContext().getTranslationUnitDecl());
+
+  std::vector CloneGroups =
+  CloneDetector.findClones();
+
+  DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic();
+
+  unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning,
+   "Detected code clone.");
+
+  unsigned NoteID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Note,
+   "Related code clone is here.");
+
+  for (CloneDetector::CloneGroup &Group : CloneGroups) {
+DiagEngine.Report(Group.front().getStartLoc(), WarnID);
+for (unsigned J = 1; J < Group.size(); ++J) {
+  DiagEngine.Report(Group[J].getStartLoc(), NoteID);
+}
+  }
+}
+
+//===--===//
+// Register CloneChecker
+//===--===//
+
+void ento::registerCloneChecker(CheckerManager &Mgr) {
+  

Re: [PATCH] D20795: Added basic capabilities to detect source code clones.

2016-07-06 Thread Raphael Isemann via cfe-commits
teemperor retitled this revision from "Added ASTStructure for analyzing the 
structure of Stmts." to "Added basic capabilities to detect source code 
clones.".
teemperor updated the summary for this revision.
teemperor updated this revision to Diff 62877.
teemperor added a comment.

- Patch now only provides basic generic hashing and is smaller.
- Code style is now respecting LLVM/clang guidelines.
- Documented all non-trivial functions.
- Moved testing from unittests to normal tests.
- Added tests for false-positives.
- Fixed the problems pointed out by Vassil (beside "Is that the LLVM/Clang 
common notion for documenting private members: (i.e. doxygen disabled) instead 
of /").
- No longer patching SourceManager.h.


http://reviews.llvm.org/D20795

Files:
  include/clang/AST/CloneDetection.h
  include/clang/StaticAnalyzer/Checkers/Checkers.td
  lib/AST/CMakeLists.txt
  lib/AST/CloneDetection.cpp
  lib/StaticAnalyzer/Checkers/CMakeLists.txt
  lib/StaticAnalyzer/Checkers/CloneChecker.cpp
  test/Analysis/copypaste/test-min-max.cpp
  test/Analysis/copypaste/test-sub-sequences.cpp

Index: test/Analysis/copypaste/test-sub-sequences.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/test-sub-sequences.cpp
@@ -0,0 +1,28 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s
+
+// We test if sub-sequences can match with normal sequences containing only
+// a single statement.
+
+void log2(int a);
+void log();
+
+int max(int a, int b) {
+  log2(a);
+  log(); // expected-warning{{Detected code clone.}}
+  if (a > b)
+return a;
+  return b;
+}
+
+int maxClone(int a, int b) {
+  log(); // expected-note{{Related code clone is here.}}
+  if (a > b)
+return a;
+  return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) {
+  return a + b;
+}
Index: test/Analysis/copypaste/test-min-max.cpp
===
--- /dev/null
+++ test/Analysis/copypaste/test-min-max.cpp
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s
+
+void log();
+
+int max(int a, int b) { // expected-warning{{Detected code clone.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+}
+
+int maxClone(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (a > b)
+return a;
+  return b;
+}
+
+// False positives below. These clones should not be reported.
+
+// FIXME: Detect different binary operator kinds.
+int min1(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (a < b)
+return a;
+  return b;
+}
+
+// FIXME: Detect different variable patterns.
+int min2(int a, int b) { // expected-note{{Related code clone is here.}}
+  log();
+  if (b > a)
+return a;
+  return b;
+}
+
+// Functions below are not clones and should not be reported.
+
+int foo(int a, int b) {
+  return a + b;
+}
Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp
===
--- /dev/null
+++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp
@@ -0,0 +1,67 @@
+//===--- CloneDetection.cpp - Clone detection checkers --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--===//
+///
+/// \file
+/// CloneChecker is a checker that reports clones in the current translation
+/// unit.
+///
+//===--===//
+
+#include "ClangSACheckers.h"
+#include "clang/AST/CloneDetection.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/StaticAnalyzer/Core/Checker.h"
+#include "clang/StaticAnalyzer/Core/CheckerManager.h"
+#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
+
+using namespace clang;
+using namespace ento;
+
+namespace {
+class CloneChecker : public Checker {
+
+public:
+  void checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
+ AnalysisManager &Mgr, BugReporter &BR) const;
+};
+} // end anonymous namespace
+
+void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
+ AnalysisManager &Mgr,
+ BugReporter &BR) const {
+  CloneDetector CloneDetector;
+  CloneDetector.AnalyzeTranslationUnitDecl(
+  TU->getASTContext().getTranslationUnitDecl());
+
+  std::vector CloneGroups =
+  CloneDetector.findClones();
+
+  DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic();
+
+  unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning,
+   "Detected code clone.");
+
+  unsigned NoteID = DiagEngine.getCustomDiagID(Diagnos