Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
This revision was automatically updated to reflect the committed changes. Closed by commit rL276782: [analyzer] Add basic capabilities to detect source code clones. (authored by dergachev). Changed prior to commit: https://reviews.llvm.org/D20795?vs=65506&id=65563#toc Repository: rL LLVM https://reviews.llvm.org/D20795 Files: cfe/trunk/include/clang/Analysis/CloneDetection.h cfe/trunk/include/clang/StaticAnalyzer/Checkers/Checkers.td cfe/trunk/lib/Analysis/CMakeLists.txt cfe/trunk/lib/Analysis/CloneDetection.cpp cfe/trunk/lib/StaticAnalyzer/Checkers/CMakeLists.txt cfe/trunk/lib/StaticAnalyzer/Checkers/CloneChecker.cpp cfe/trunk/test/Analysis/copypaste/blocks.cpp cfe/trunk/test/Analysis/copypaste/false-positives.cpp cfe/trunk/test/Analysis/copypaste/functions.cpp cfe/trunk/test/Analysis/copypaste/objc-methods.m cfe/trunk/test/Analysis/copypaste/sub-sequences.cpp Index: cfe/trunk/include/clang/StaticAnalyzer/Checkers/Checkers.td === --- cfe/trunk/include/clang/StaticAnalyzer/Checkers/Checkers.td +++ cfe/trunk/include/clang/StaticAnalyzer/Checkers/Checkers.td @@ -77,6 +77,8 @@ def LLVM : Package<"llvm">; def Debug : Package<"debug">; +def CloneDetectionAlpha : Package<"clone">, InPackage, Hidden; + //===--===// // Core Checkers. //===--===// @@ -661,3 +663,17 @@ DescFile<"DebugCheckers.cpp">; } // end "debug" + + +//===--===// +// Clone Detection +//===--===// + +let ParentPackage = CloneDetectionAlpha in { + +def CloneChecker : Checker<"CloneChecker">, + HelpText<"Reports similar pieces of code.">, + DescFile<"CloneChecker.cpp">; + +} // end "clone" + Index: cfe/trunk/include/clang/Analysis/CloneDetection.h === --- cfe/trunk/include/clang/Analysis/CloneDetection.h +++ cfe/trunk/include/clang/Analysis/CloneDetection.h @@ -0,0 +1,235 @@ +//===--- CloneDetection.h - Finds code clones in an AST -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// +/// +/// /file +/// This file defines classes for searching and anlyzing source code clones. +/// +//===--===// + +#ifndef LLVM_CLANG_AST_CLONEDETECTION_H +#define LLVM_CLANG_AST_CLONEDETECTION_H + +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/StringMap.h" + +#include + +namespace clang { + +class Stmt; +class Decl; +class ASTContext; +class CompoundStmt; + +/// \brief Identifies a list of statements. +/// +/// Can either identify a single arbitrary Stmt object, a continuous sequence of +/// child statements inside a CompoundStmt or no statements at all. +class StmtSequence { + /// If this object identifies a sequence of statements inside a CompoundStmt, + /// S points to this CompoundStmt. If this object only identifies a single + /// Stmt, then S is a pointer to this Stmt. + const Stmt *S; + + /// The related ASTContext for S. + ASTContext *Context; + + /// If EndIndex is non-zero, then S is a CompoundStmt and this StmtSequence + /// instance is representing the CompoundStmt children inside the array + /// [StartIndex, EndIndex). + unsigned StartIndex; + unsigned EndIndex; + +public: + /// \brief Constructs a StmtSequence holding multiple statements. + /// + /// The resulting StmtSequence identifies a continuous sequence of statements + /// in the body of the given CompoundStmt. Which statements of the body should + /// be identified needs to be specified by providing a start and end index + /// that describe a non-empty sub-array in the body of the given CompoundStmt. + /// + /// \param Stmt A CompoundStmt that contains all statements in its body. + /// \param Context The ASTContext for the given CompoundStmt. + /// \param StartIndex The inclusive start index in the children array of + /// \p Stmt + /// \param EndIndex The exclusive end index in the children array of \p Stmt. + StmtSequence(const CompoundStmt *Stmt, ASTContext &Context, + unsigned StartIndex, unsigned EndIndex); + + /// \brief Constructs a StmtSequence holding a single statement. + /// + /// \param Stmt An arbitrary Stmt. + /// \param Context The ASTContext for the given Stmt. + StmtSequence(const Stmt *Stmt, ASTContext &Context); + + /// \brief Constructs an empty StmtSequence. + StmtSequence(); + + typedef const Stmt *const *iterator; + + /// Returns an iterator pointing to the
Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
NoQ accepted this revision. NoQ added a reviewer: NoQ. NoQ added a comment. Great! Will commit. https://reviews.llvm.org/D20795 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
teemperor updated this revision to Diff 65506. teemperor added a comment. - Now passing ChildSignatures by const reference. https://reviews.llvm.org/D20795 Files: include/clang/Analysis/CloneDetection.h include/clang/StaticAnalyzer/Checkers/Checkers.td lib/Analysis/CMakeLists.txt lib/Analysis/CloneDetection.cpp lib/StaticAnalyzer/Checkers/CMakeLists.txt lib/StaticAnalyzer/Checkers/CloneChecker.cpp test/Analysis/copypaste/blocks.cpp test/Analysis/copypaste/false-positives.cpp test/Analysis/copypaste/functions.cpp test/Analysis/copypaste/objc-methods.m test/Analysis/copypaste/sub-sequences.cpp Index: test/Analysis/copypaste/sub-sequences.cpp === --- /dev/null +++ test/Analysis/copypaste/sub-sequences.cpp @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// This tests if sub-sequences can match with normal sequences. + +void log2(int a); +void log(); + +int max(int a, int b) { + log2(a); + log(); // expected-warning{{Detected code clone.}} + if (a > b) +return a; + return b; +} + +int maxClone(int a, int b) { + log(); // expected-note{{Related code clone is here.}} + if (a > b) +return a; + return b; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { // no-warning + return a + b; +} Index: test/Analysis/copypaste/objc-methods.m === --- /dev/null +++ test/Analysis/copypaste/objc-methods.m @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -analyze -Wno-objc-root-class -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// This tests if we search for clones in Objective-C methods. + +@interface A +- (int) setOk : (int) a : (int) b; +@end + +@implementation A +- (int) setOk : (int) a : (int) b { // expected-warning{{Detected code clone.}} + if (a > b) +return a; + return b; +} +@end + +@interface B +- (int) setOk : (int) a : (int) b; +@end + +@implementation B +- (int) setOk : (int) a : (int) b { // expected-note{{Related code clone is here.}} + if (a > b) +return a; + return b; +} +@end Index: test/Analysis/copypaste/functions.cpp === --- /dev/null +++ test/Analysis/copypaste/functions.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// This tests if we search for clones in functions. + +void log(); + +int max(int a, int b) { // expected-warning{{Detected code clone.}} + log(); + if (a > b) +return a; + return b; +} + +int maxClone(int x, int y) { // expected-note{{Related code clone is here.}} + log(); + if (x > y) +return x; + return y; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { // no-warning + return a + b; +} Index: test/Analysis/copypaste/false-positives.cpp === --- /dev/null +++ test/Analysis/copypaste/false-positives.cpp @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// This test contains false-positive reports from the CloneChecker that need to +// be fixed. + +void log(); + +int max(int a, int b) { // expected-warning{{Detected code clone.}} + log(); + if (a > b) +return a; + return b; +} + +// FIXME: Detect different binary operator kinds. +int min1(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (a < b) +return a; + return b; +} + +// FIXME: Detect different variable patterns. +int min2(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (b > a) +return a; + return b; +} Index: test/Analysis/copypaste/blocks.cpp === --- /dev/null +++ test/Analysis/copypaste/blocks.cpp @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -analyze -fblocks -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// This tests if we search for clones in blocks. + +void log(); + +auto BlockA = ^(int a, int b){ // expected-warning{{Detected code clone.}} + log(); + if (a > b) +return a; + return b; +}; + +auto BlockB = ^(int a, int b){ // expected-note{{Related code clone is here.}} + log(); + if (a > b) +return a; + return b; +}; Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp === --- /dev/null +++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp @@ -0,0 +1,96 @@ +//===--- CloneChecker.cpp - Clone detection checker -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// +//
Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
NoQ added inline comments. Comment at: lib/Analysis/CloneDetection.cpp:148 @@ +147,3 @@ + +// FIXME: This function has quadratic runtime right now. Check if skipping +// this function for too long CompoundStmts is an option. I've a feeling this comment was forgotten :o https://reviews.llvm.org/D20795 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
teemperor marked 3 inline comments as done. teemperor added a comment. https://reviews.llvm.org/D20795 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
teemperor added inline comments. Comment at: lib/Analysis/CloneDetection.cpp:178 @@ +177,3 @@ + + bool VisitFunctionDecl(FunctionDecl *D) { +// If we found a function, we start the clone search on its body statement. NoQ wrote: > You'd probably want to add `ObjCMethodDecl` and `BlockDecl`, because they > aren't sub-classes of `FunctionDecl` (and probably even tests for that). > > Because this part of the code essentially re-implements `AnalysisConsumer`, > (a `RecursiveASTVisitor` that tells us what code bodies to analyze with the > static analyzer - i should've looked there earlier!!). > > Alternatively, you can just rely on `AnalysisConsumer`, which would eliminate > the recursive visitor completely: {F2205103} This way you'd be analyzing > exactly the same code bodies that the analyzer itself would analyze; if you'd > want to extend to various declarations, you'd be able to do that by > subscribing on `check::ASTDecl`. But i dare not to predict what kind of > different flexibility you'd need next. Thanks for the patch! https://reviews.llvm.org/D20795 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
teemperor updated this revision to Diff 65441. teemperor added a comment. - The CloneGroup values in StringMap no longer store a copy of their own key. https://reviews.llvm.org/D20795 Files: include/clang/Analysis/CloneDetection.h include/clang/StaticAnalyzer/Checkers/Checkers.td lib/Analysis/CMakeLists.txt lib/Analysis/CloneDetection.cpp lib/StaticAnalyzer/Checkers/CMakeLists.txt lib/StaticAnalyzer/Checkers/CloneChecker.cpp test/Analysis/copypaste/blocks.cpp test/Analysis/copypaste/false-positives.cpp test/Analysis/copypaste/functions.cpp test/Analysis/copypaste/objc-methods.m test/Analysis/copypaste/sub-sequences.cpp Index: test/Analysis/copypaste/sub-sequences.cpp === --- /dev/null +++ test/Analysis/copypaste/sub-sequences.cpp @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// This tests if sub-sequences can match with normal sequences. + +void log2(int a); +void log(); + +int max(int a, int b) { + log2(a); + log(); // expected-warning{{Detected code clone.}} + if (a > b) +return a; + return b; +} + +int maxClone(int a, int b) { + log(); // expected-note{{Related code clone is here.}} + if (a > b) +return a; + return b; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { // no-warning + return a + b; +} Index: test/Analysis/copypaste/objc-methods.m === --- /dev/null +++ test/Analysis/copypaste/objc-methods.m @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -analyze -Wno-objc-root-class -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// This tests if we search for clones in Objective-C methods. + +@interface A +- (int) setOk : (int) a : (int) b; +@end + +@implementation A +- (int) setOk : (int) a : (int) b { // expected-warning{{Detected code clone.}} + if (a > b) +return a; + return b; +} +@end + +@interface B +- (int) setOk : (int) a : (int) b; +@end + +@implementation B +- (int) setOk : (int) a : (int) b { // expected-note{{Related code clone is here.}} + if (a > b) +return a; + return b; +} +@end Index: test/Analysis/copypaste/functions.cpp === --- /dev/null +++ test/Analysis/copypaste/functions.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// This tests if we search for clones in functions. + +void log(); + +int max(int a, int b) { // expected-warning{{Detected code clone.}} + log(); + if (a > b) +return a; + return b; +} + +int maxClone(int x, int y) { // expected-note{{Related code clone is here.}} + log(); + if (x > y) +return x; + return y; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { // no-warning + return a + b; +} Index: test/Analysis/copypaste/false-positives.cpp === --- /dev/null +++ test/Analysis/copypaste/false-positives.cpp @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// This test contains false-positive reports from the CloneChecker that need to +// be fixed. + +void log(); + +int max(int a, int b) { // expected-warning{{Detected code clone.}} + log(); + if (a > b) +return a; + return b; +} + +// FIXME: Detect different binary operator kinds. +int min1(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (a < b) +return a; + return b; +} + +// FIXME: Detect different variable patterns. +int min2(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (b > a) +return a; + return b; +} Index: test/Analysis/copypaste/blocks.cpp === --- /dev/null +++ test/Analysis/copypaste/blocks.cpp @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -analyze -fblocks -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// This tests if we search for clones in blocks. + +void log(); + +auto BlockA = ^(int a, int b){ // expected-warning{{Detected code clone.}} + log(); + if (a > b) +return a; + return b; +}; + +auto BlockB = ^(int a, int b){ // expected-note{{Related code clone is here.}} + log(); + if (a > b) +return a; + return b; +}; Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp === --- /dev/null +++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp @@ -0,0 +1,96 @@ +//===--- CloneChecker.cpp - Clone detection checker -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===---
Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
teemperor updated this revision to Diff 65439. teemperor marked 7 inline comments as done. teemperor added a comment. - Fixed the minor problems as pointed out by Artem. - Now using AnalysisConsumer instead of RecursiveASTVisitor. - Function names are now confirming to LLVM code-style. - Renamed DataCollectVisitor to CloneSignatureGenerator (as it's no longer following the Visitor pattern). - Added tests for clone detection in ObjC-methods and blocks. - Moved false-positive tests into own file. https://reviews.llvm.org/D20795 Files: include/clang/Analysis/CloneDetection.h include/clang/StaticAnalyzer/Checkers/Checkers.td lib/Analysis/CMakeLists.txt lib/Analysis/CloneDetection.cpp lib/StaticAnalyzer/Checkers/CMakeLists.txt lib/StaticAnalyzer/Checkers/CloneChecker.cpp test/Analysis/copypaste/blocks.cpp test/Analysis/copypaste/false-positives.cpp test/Analysis/copypaste/functions.cpp test/Analysis/copypaste/objc-methods.m test/Analysis/copypaste/sub-sequences.cpp Index: test/Analysis/copypaste/sub-sequences.cpp === --- /dev/null +++ test/Analysis/copypaste/sub-sequences.cpp @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// This tests if sub-sequences can match with normal sequences. + +void log2(int a); +void log(); + +int max(int a, int b) { + log2(a); + log(); // expected-warning{{Detected code clone.}} + if (a > b) +return a; + return b; +} + +int maxClone(int a, int b) { + log(); // expected-note{{Related code clone is here.}} + if (a > b) +return a; + return b; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { // no-warning + return a + b; +} Index: test/Analysis/copypaste/objc-methods.m === --- /dev/null +++ test/Analysis/copypaste/objc-methods.m @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -analyze -Wno-objc-root-class -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// This tests if we search for clones in Objective-C methods. + +@interface A +- (int) setOk : (int) a : (int) b; +@end + +@implementation A +- (int) setOk : (int) a : (int) b { // expected-warning{{Detected code clone.}} + if (a > b) +return a; + return b; +} +@end + +@interface B +- (int) setOk : (int) a : (int) b; +@end + +@implementation B +- (int) setOk : (int) a : (int) b { // expected-note{{Related code clone is here.}} + if (a > b) +return a; + return b; +} +@end Index: test/Analysis/copypaste/functions.cpp === --- /dev/null +++ test/Analysis/copypaste/functions.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// This tests if we search for clones in functions. + +void log(); + +int max(int a, int b) { // expected-warning{{Detected code clone.}} + log(); + if (a > b) +return a; + return b; +} + +int maxClone(int x, int y) { // expected-note{{Related code clone is here.}} + log(); + if (x > y) +return x; + return y; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { // no-warning + return a + b; +} Index: test/Analysis/copypaste/false-positives.cpp === --- /dev/null +++ test/Analysis/copypaste/false-positives.cpp @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// This test contains false-positive reports from the CloneChecker that need to +// be fixed. + +void log(); + +int max(int a, int b) { // expected-warning{{Detected code clone.}} + log(); + if (a > b) +return a; + return b; +} + +// FIXME: Detect different binary operator kinds. +int min1(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (a < b) +return a; + return b; +} + +// FIXME: Detect different variable patterns. +int min2(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (b > a) +return a; + return b; +} Index: test/Analysis/copypaste/blocks.cpp === --- /dev/null +++ test/Analysis/copypaste/blocks.cpp @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -analyze -fblocks -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// This tests if we search for clones in blocks. + +void log(); + +auto BlockA = ^(int a, int b){ // expected-warning{{Detected code clone.}} + log(); + if (a > b) +return a; + return b; +}; + +auto BlockB = ^(int a, int b){ // expected-note{{Related code clone is here.}} + log(); + if (a > b) +return a; + return b; +}; Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp === --- /dev/null +++ lib/StaticAnalyzer/Checkers/CloneChec
Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
NoQ added a comment. The idea with strings as keys is curious! I've got a few more minor comments :) Comment at: lib/Analysis/CloneDetection.cpp:104 @@ +103,3 @@ +// Storage for the signatures of the direct child statements. This is only +// needed if the current statemnt is a CompoundStmt. +std::vector ChildSignatures; Typo: statement. Comment at: lib/Analysis/CloneDetection.cpp:147 @@ +146,3 @@ + const CompoundStmt *CS, + std::vector ChildSignatures) { + Maybe pass `ChildSignatures` by reference? Comment at: lib/Analysis/CloneDetection.cpp:178 @@ +177,3 @@ + + bool VisitFunctionDecl(FunctionDecl *D) { +// If we found a function, we start the clone search on its body statement. You'd probably want to add `ObjCMethodDecl` and `BlockDecl`, because they aren't sub-classes of `FunctionDecl` (and probably even tests for that). Because this part of the code essentially re-implements `AnalysisConsumer`, (a `RecursiveASTVisitor` that tells us what code bodies to analyze with the static analyzer - i should've looked there earlier!!). Alternatively, you can just rely on `AnalysisConsumer`, which would eliminate the recursive visitor completely: {F2205103} This way you'd be analyzing exactly the same code bodies that the analyzer itself would analyze; if you'd want to extend to various declarations, you'd be able to do that by subscribing on `check::ASTDecl`. But i dare not to predict what kind of different flexibility you'd need next. Comment at: lib/Analysis/CloneDetection.cpp:188 @@ +187,3 @@ +void CloneDetector::analyzeTranslationUnit(TranslationUnitDecl *TU) { + DataCollectVisitor visitor(*this, TU->getASTContext()); + visitor.TraverseDecl(TU); Micro-nit: Capitalize 'V' in 'visitor'? https://reviews.llvm.org/D20795 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
teemperor marked 9 inline comments as done. Comment at: include/clang/AST/CloneDetection.h:149 @@ +148,3 @@ +/// (e.g. function bodies). Other clones (e.g. cloned comments or declarations) +/// are not supported. +/// Put the idea on the future TODO list, but we probably can't reuse a big part of the current infrastructure for it. https://reviews.llvm.org/D20795 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
teemperor updated this revision to Diff 65291. teemperor added a comment. Ok, so I think I've addressed the points from last meeting in this patch: It was planned to replace custom hashing with FoldingSetNodeID and a hashmap: In this patch I removed all custom hashing. It's now done via LLVM's StringMap and strings which brings the same functionality. Using FoldingSetNodeID is not possible because it hides it's internal data vector which StringMap doesn't support, so I've just used a normal vector instead. It was also planned to refactor CloneSignatureCache: In this patch I've completely removed the cache because it is no longer necessary. Fixing it was not an option because we assumed that the statements RecursiveASTVisitor visits are the same that Stmt::children() delivers, which is for example for LambdaExprs not true (and RecursiveASTVisitor makes no promise about that in it's API). The only sensible fix for this was to use Artem's initial suggestion to not use RecursiveASTVisitor to traverse statements. I've also adressed Artems and Vassils comments regarding documentation and code style in this patch (Thanks!). https://reviews.llvm.org/D20795 Files: include/clang/Analysis/CloneDetection.h include/clang/StaticAnalyzer/Checkers/Checkers.td lib/Analysis/CMakeLists.txt lib/Analysis/CloneDetection.cpp lib/StaticAnalyzer/Checkers/CMakeLists.txt lib/StaticAnalyzer/Checkers/CloneChecker.cpp test/Analysis/copypaste/test-min-max.cpp test/Analysis/copypaste/test-sub-sequences.cpp Index: test/Analysis/copypaste/test-sub-sequences.cpp === --- /dev/null +++ test/Analysis/copypaste/test-sub-sequences.cpp @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// We test if sub-sequences can match with normal sequences containing only +// a single statement. + +void log2(int a); +void log(); + +int max(int a, int b) { + log2(a); + log(); // expected-warning{{Detected code clone.}} + if (a > b) +return a; + return b; +} + +int maxClone(int a, int b) { + log(); // expected-note{{Related code clone is here.}} + if (a > b) +return a; + return b; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { // no-warning + return a + b; +} Index: test/Analysis/copypaste/test-min-max.cpp === --- /dev/null +++ test/Analysis/copypaste/test-min-max.cpp @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +void log(); + +int max(int a, int b) { // expected-warning{{Detected code clone.}} + log(); + if (a > b) +return a; + return b; +} + +int maxClone(int x, int y) { // expected-note{{Related code clone is here.}} + log(); + if (x > y) +return x; + return y; +} + +// False positives below. These clones should not be reported. + +// FIXME: Detect different binary operator kinds. +int min1(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (a < b) +return a; + return b; +} + +// FIXME: Detect different variable patterns. +int min2(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (b > a) +return a; + return b; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { // no-warning + return a + b; +} Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp === --- /dev/null +++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp @@ -0,0 +1,85 @@ +//===--- CloneChecker.cpp - Clone detection checker -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// +/// +/// \file +/// CloneChecker is a checker that reports clones in the current translation +/// unit. +/// +//===--===// + +#include "ClangSACheckers.h" +#include "clang/Analysis/CloneDetection.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { +class CloneChecker : public Checker { + +public: + void checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager &Mgr, BugReporter &BR) const; +}; +} // end anonymous namespace + +void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager &Mgr, + BugR
Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
v.g.vassilev accepted this revision. v.g.vassilev added a comment. This revision is now accepted and ready to land. LGTM, given the comments are addressed. Comment at: include/clang/Analysis/CloneDetection.h:37 @@ +36,3 @@ + /// Stmt, then S is a pointer to this Stmt. + Stmt const *S; + It is more common in the codebase to use Did you mean `const Stmt *S`. Could you update all similar occurrences to it? Comment at: lib/Analysis/CloneDetection.cpp:119 @@ +118,3 @@ + /// \brief Retrieves the CloneSignature that describes the given Sequence. + CloneDetector::CloneSignature get(StmtSequence const &S) const { +// This cache promises good lookup time for recently added CloneSignatures Same here `const StmtSequence &S` is the common style in the codebase. https://reviews.llvm.org/D20795 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
NoQ added a comment. Regarding the cache stack - it feels easier for me to allocate a separate stack for each statement, and put the stack on stack (!) rather than having it global. This way it'd be automatically cleaned for you when VisitStmt() exits, and you'd be able to address child cache by index rather then through scanning. Comment at: lib/Analysis/CloneDetection.cpp:142 @@ +141,3 @@ +/// Afterwards, the hash values of the children are calculated into the +/// computed hash value. +class HashVisitor : public RecursiveASTVisitor { > This calculation happens in linear time and each statement is only visited a > fixed amount of times during this process. Isn't `SaveAllSubSequences()` quadratic? Comment at: lib/Analysis/CloneDetection.cpp:424 @@ +423,3 @@ + // remove any duplicates. + std::sort(IndexesToRemove.begin(), IndexesToRemove.end(), +std::greater()); I suspect that all //i//'s that make it into the `IndexesToRemove` vector are already unique and increasing, by construction. We take some //i//, see if we need to remove it, then push it at most once and take (//i// + 1). https://reviews.llvm.org/D20795 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
teemperor added inline comments. Comment at: include/clang/AST/CloneDetection.h:131 @@ +130,3 @@ + bool operator<(const StmtSequence &Other) const { +return std::tie(S, StartIndex, EndIndex) < + std::tie(Other.S, Other.StartIndex, Other.EndIndex); zaks.anna wrote: > We are comparing the Stmt pointers here. Does this make results > non-deterministic? > Good point. The updated patch only sorts by deterministic hash values which should fix this. Comment at: lib/AST/CMakeLists.txt:10 @@ -9,3 +9,3 @@ ASTImporter.cpp ASTTypeTraits.cpp AttrImpl.cpp zaks.anna wrote: > Again, I do not think this should be in the AST. Who the users of this will > be? If you envision users outside of Clang Static Analyzer, maybe we could > add this to lib/Analysis/. Oh, sorry! I wanted to bring this up in a meeting but it seems I've forgot to do so. The point with this being in AST is that we might want to merge the Stmt::Profile implementation with the implementation of CloneDtection. And Stmt::Profile is a part of AST AFAIK, so I think we need to stay in the same library? I don't have a good overview over the clang build infrastructure yet, so I would appreciate opinions on that. If that's something we will handle later, what would be a better place for the code? Comment at: lib/AST/CloneDetection.cpp:103 @@ +102,3 @@ +/// \endcode +class HashValue { + unsigned Value = 0; zaks.anna wrote: > This should probably be a utility instead of something specific to this > checker. Do we have something similar to this in LLVM already? Yes, there is similar builtin hashing available in llvm::hashing, but the API is designed to work with iterator pairs. So we either have to record all the data in a vector and hash that (which is what FoldingSetNodeID does) or use code that isn't exposed in the API (llvm::hashing::detail::hash_state). The first option creates unnecessary overhead and the second option would require either patching LLVM or using code that isn't part of the public API. I updated the patch with something that uses FoldingSetNodeID for now. I'll update it later if we can land a patch in LLVM that adds an API for hashing without recording all necessary data beforehand. http://reviews.llvm.org/D20795 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
teemperor updated this revision to Diff 63851. teemperor marked 18 inline comments as done. teemperor added a comment. - Checker is now in the alpha package and hidden. - MinGroupComplexity is now a parameter for the checker. - StmtData is now called CloneSignature. - Replaced the std::map inside CloneDetector with a vector and a small cache in the HashVisitor. - Moved code from AST to Analysis. - Fixed multiple other smaller problems pointed out in the review. (Thanks, Vassil, Anna and Artem!) http://reviews.llvm.org/D20795 Files: include/clang/Analysis/CloneDetection.h include/clang/StaticAnalyzer/Checkers/Checkers.td lib/Analysis/CMakeLists.txt lib/Analysis/CloneDetection.cpp lib/StaticAnalyzer/Checkers/CMakeLists.txt lib/StaticAnalyzer/Checkers/CloneChecker.cpp test/Analysis/copypaste/test-min-max.cpp test/Analysis/copypaste/test-sub-sequences.cpp Index: test/Analysis/copypaste/test-sub-sequences.cpp === --- /dev/null +++ test/Analysis/copypaste/test-sub-sequences.cpp @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +// We test if sub-sequences can match with normal sequences containing only +// a single statement. + +void log2(int a); +void log(); + +int max(int a, int b) { + log2(a); + log(); // expected-warning{{Detected code clone.}} + if (a > b) +return a; + return b; +} + +int maxClone(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (a > b) +return a; + return b; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { // no-warning + return a + b; +} Index: test/Analysis/copypaste/test-min-max.cpp === --- /dev/null +++ test/Analysis/copypaste/test-min-max.cpp @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=alpha.clone.CloneChecker -verify %s + +void log(); + +int max(int a, int b) { // expected-warning{{Detected code clone.}} + log(); + if (a > b) +return a; + return b; +} + +int maxClone(int x, int y) { // expected-note{{Related code clone is here.}} + log(); + if (x > y) +return x; + return y; +} + +// False positives below. These clones should not be reported. + +// FIXME: Detect different binary operator kinds. +int min1(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (a < b) +return a; + return b; +} + +// FIXME: Detect different variable patterns. +int min2(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (b > a) +return a; + return b; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { // no-warning + return a + b; +} Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp === --- /dev/null +++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp @@ -0,0 +1,80 @@ +//===--- CloneChecker.cpp - Clone detection checker -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// +/// +/// \file +/// CloneChecker is a checker that reports clones in the current translation +/// unit. +/// +//===--===// + +#include "ClangSACheckers.h" +#include "clang/Analysis/CloneDetection.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { +class CloneChecker : public Checker { + +public: + void checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager &Mgr, BugReporter &BR) const; +}; +} // end anonymous namespace + +void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager &Mgr, + BugReporter &BR) const { + + int MinComplexity = Mgr.getAnalyzerOptions().getOptionAsInteger( + "MinimumCloneComplexity", 10, this); + + assert(MinComplexity >= 0); + + SourceManager &SM = BR.getSourceManager(); + + CloneDetector CloneDetector; + CloneDetector.analyzeTranslationUnit(const_cast(TU)); + + std::vector CloneGroups; + CloneDetector.findClones(CloneGroups, MinComplexity); + + DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic(); + + unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning, + "Detected code clone."); + + unsigned NoteID = DiagEngine.getCustomDi
Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
NoQ added a comment. Your code is well-written and easy to understand, and makes me want to use it on my code! Added some minor comments, and there seems to be a small logic error in the compound statement hasher. Not sure if that was already explained in detail, but i seem to agree that the only point for this project to integrate into Clang Static Analyzer is to integrate with the `BugReporter` facility - at least optionally: it would allow the reports of the clone detector checker to be gathered and viewed in a manner similar to other checkers - i.e. through scan-build/scan-view, probably CodeChecker and other tools. That should make the check easier to run and attract more users. However, the `BugReporter` mechanism is tweaked for the analyzer's path-sensitive checkers, so it'd need to be modified to suit your purpose, so in my opinion this is not critical for the initial commit. Comment at: lib/AST/CloneDetection.cpp:52 @@ +51,3 @@ + Other.getEndLoc() == getEndLoc(); + return StartIsInBounds && EndIsInBounds; +} Perhaps we could early-return when we see that `StartIsInBounds` is false (for performance, because `isBeforeInTranslationUnit` looks heavy). Comment at: lib/AST/CloneDetection.cpp:88 @@ +87,3 @@ +/// It is defined as: +/// H(D) = PrimeFactor^(N-1) * D[0] + PrimeFactor^(N-2) * D[1] + ... + D[N] +/// where It seems that the last index is off-by-one, i think you meant something like: ``` H(D) = PrimeFactor^(N-1) * D[0] + PrimeFactor^(N-2) * D[1] + ... + D[N-1] ``` Comment at: lib/AST/CloneDetection.cpp:175 @@ +174,3 @@ + ++StartIterator; +auto EndIterator = Iterator; +for (unsigned I = 0; I < Length; ++I) Shouldn't it say something like "`EndIterator = StartIterator`"? Because when `StartPos` is non-zero, in your code we get [`StartPos`, `Length`) instead of [`StartPos`, `StartPos` + `Length`). In your code, `Length` acts as some kind of `EndPos` instead. Comment at: lib/AST/CloneDetection.cpp:194 @@ +193,3 @@ + } else { +StmtSequence ChildSequence(StmtSequence(Child, Context)); + Simplifies to: ``` StmtSequence ChildSequence(Child, Context); ``` Comment at: lib/AST/CloneDetection.cpp:238 @@ +237,3 @@ + // the CompoundStmt. + auto CS = dyn_cast(CurrentStmt.front()); + if (!CS) I think that the code that deals with //computing// data for sections of compound statements (as opposed to //saving// this data) should be moved to `VisitStmt()`. Or, even better, to `VisitCompoundStmt()` - that's the whole point of having a visitor, after all. Upon expanding the complexity of the hash, you'd probably provide more special cases for special statement classes, which would all sit in their own Visit method. That's for the future though, not really critical. Comment at: lib/AST/CloneDetection.cpp:262 @@ +261,3 @@ + + CloneDetector::StmtData SubData(SubHash.getValue(), SubComplexity); + CD.add(Sequence, SubData); This code is duplicated from the beginning of the function, which synergizes with my point above: if `SaveData()` contained only that much, then it could have been re-used on both call sites. Comment at: lib/StaticAnalyzer/Checkers/CloneChecker.cpp:40 @@ +39,3 @@ + CloneDetector.AnalyzeTranslationUnitDecl( + TU->getASTContext().getTranslationUnitDecl()); + `TU->getASTContext().getTranslationUnitDecl()` is always equal to `TU` - there should, in most cases, be only one TranslationUnitDecl object around, but even if there'd be more some day, i think this invariant would still hold. Comment at: lib/StaticAnalyzer/Checkers/CloneChecker.cpp:43 @@ +42,3 @@ + std::vector CloneGroups; + CloneDetector.findClones(CloneGroups); + An idea: because this function optionally accepts `MinGroupComplexity`, you may probably want to expose this feature as a //checker-specific option// - see `MallocChecker::IsOptimistic` as an example, shouldn't be hard. Comment at: lib/StaticAnalyzer/Checkers/CloneChecker.cpp:45 @@ +44,3 @@ + + DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic(); + zaks.anna wrote: > Is it possible to report these through BugReporter? I think that'd require changes in the BugReporter to allow easily adding extra pieces to path-insensitive reports (which is quite a wanted feature in many AST-based checkers - assuming we want more AST-based checkers to be moved into the analyzer specifically for better reporting, integration with scan-build, and stuff like that). Comment at: test/Analysis/copypaste/test-min-max.cpp:39 @@ +38,3 @@ + +int foo(int a, int b) { + return a + b; Perhaps add a `// no-warning` comment to spots that should not cause a warning? This comment has no
Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
zaks.anna added a comment. Hi, Thank you for working on this! Here are several comments from a **partial** review. Can you talk a bit about how do you envision to generalize this to copy and paste detection, where the pasted code is only partially modified? Do you think this could be generalized in that way? Comment at: include/clang/AST/CloneDetection.h:20 @@ +19,3 @@ + +#include +#include Please, check if there are better data structures you could be using: http://llvm.org/docs/ProgrammersManual.html#map-like-containers-std-map-densemap-etc Comment at: include/clang/AST/CloneDetection.h:131 @@ +130,3 @@ + bool operator<(const StmtSequence &Other) const { +return std::tie(S, StartIndex, EndIndex) < + std::tie(Other.S, Other.StartIndex, Other.EndIndex); We are comparing the Stmt pointers here. Does this make results non-deterministic? Comment at: include/clang/AST/CloneDetection.h:145 @@ +144,3 @@ +/// +/// This class first needs to be provided with (possibly multiple) translation +/// units by passing them to \p AnalyzeTranslationUnitDecl . Let's not talk about multiple TU here since much more is needed to support them. Maybe we should just say that, first, given a TU, the class generates the clone identifiers (hashes?) of statements in it (by running AnalyzeTranslationUnitDecl)? The description of the method already states that it can be called multiple times. Comment at: include/clang/AST/CloneDetection.h:161 @@ +160,3 @@ + /// Holds the generated data for a StmtSequence + struct StmtData { +/// The generated hash value. StmtData is too generic of a name. Maybe it could be called something like code signature, clone signature, or code hash? Comment at: include/clang/AST/CloneDetection.h:166 @@ +165,3 @@ +/// +/// This scalar values serves as a simple way of filtering clones +/// that are too small to be reported. A greater value indicates that the serves -> serve Comment at: include/clang/AST/CloneDetection.h:186 @@ +185,3 @@ + /// \param S The given StmtSequence + /// \param Error Output parameter that is set to true if no data was generated + ///for the given StmtSequence. This can happen if the given Could this return an llvm Optional instead of the error code? Comment at: include/clang/AST/CloneDetection.h:201 @@ +200,3 @@ + /// \brief Stores the StmtSequence with its associated data in the search + ///database of this CloneDetector. + /// database -> map? I would just say "Stores the StmtSequence with its associated data to allow future querying." Comment at: include/clang/AST/CloneDetection.h:209 @@ +208,3 @@ + + /// \brief Searches all translation units in this CloneDetector for clones. + /// Please, do not talk about "all translation units" here. Maybe say something like previously seen/stored statements/code or something like that. Comment at: include/clang/AST/CloneDetection.h:217 @@ +216,3 @@ + void findClones(std::vector &Result, + unsigned MinGroupComplexity = 10); + Make 10 into a constant. It probably should be a checker parameter. Comment at: include/clang/StaticAnalyzer/Checkers/Checkers.td:668 @@ +667,3 @@ + +let ParentPackage = CloneDetection in { + This should be an "alpha" package until it's considered to be ready for end users. Comment at: lib/AST/CMakeLists.txt:10 @@ -9,3 +9,3 @@ ASTImporter.cpp ASTTypeTraits.cpp AttrImpl.cpp Again, I do not think this should be in the AST. Who the users of this will be? If you envision users outside of Clang Static Analyzer, maybe we could add this to lib/Analysis/. Comment at: lib/AST/CloneDetection.cpp:56 @@ +55,3 @@ +StmtSequence::iterator StmtSequence::begin() const { + if (holdsSequence()) { +auto CS = cast(S); http://llvm.org/docs/CodingStandards.html#use-early-exits-and-continue-to-simplify-code Comment at: lib/AST/CloneDetection.cpp:103 @@ +102,3 @@ +/// \endcode +class HashValue { + unsigned Value = 0; This should probably be a utility instead of something specific to this checker. Do we have something similar to this in LLVM already? Comment at: lib/StaticAnalyzer/Checkers/CloneChecker.cpp:45 @@ +44,3 @@ + + DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic(); + Is it possible to report these through BugReporter? http://reviews.llvm.org/D20795 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
teemperor updated this revision to Diff 63266. teemperor added a comment. - StmtSequence is now treating all Stmt objects as const. http://reviews.llvm.org/D20795 Files: include/clang/AST/CloneDetection.h include/clang/StaticAnalyzer/Checkers/Checkers.td lib/AST/CMakeLists.txt lib/AST/CloneDetection.cpp lib/StaticAnalyzer/Checkers/CMakeLists.txt lib/StaticAnalyzer/Checkers/CloneChecker.cpp test/Analysis/copypaste/test-min-max.cpp test/Analysis/copypaste/test-sub-sequences.cpp Index: test/Analysis/copypaste/test-sub-sequences.cpp === --- /dev/null +++ test/Analysis/copypaste/test-sub-sequences.cpp @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s + +// We test if sub-sequences can match with normal sequences containing only +// a single statement. + +void log2(int a); +void log(); + +int max(int a, int b) { + log2(a); + log(); // expected-warning{{Detected code clone.}} + if (a > b) +return a; + return b; +} + +int maxClone(int a, int b) { + log(); // expected-note{{Related code clone is here.}} + if (a > b) +return a; + return b; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { + return a + b; +} Index: test/Analysis/copypaste/test-min-max.cpp === --- /dev/null +++ test/Analysis/copypaste/test-min-max.cpp @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s + +void log(); + +int max(int a, int b) { // expected-warning{{Detected code clone.}} + log(); + if (a > b) +return a; + return b; +} + +int maxClone(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (a > b) +return a; + return b; +} + +// False positives below. These clones should not be reported. + +// FIXME: Detect different binary operator kinds. +int min1(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (a < b) +return a; + return b; +} + +// FIXME: Detect different variable patterns. +int min2(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (b > a) +return a; + return b; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { + return a + b; +} Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp === --- /dev/null +++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp @@ -0,0 +1,67 @@ +//===--- CloneDetection.cpp - Clone detection checkers --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// +/// +/// \file +/// CloneChecker is a checker that reports clones in the current translation +/// unit. +/// +//===--===// + +#include "ClangSACheckers.h" +#include "clang/AST/CloneDetection.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { +class CloneChecker : public Checker { + +public: + void checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager &Mgr, BugReporter &BR) const; +}; +} // end anonymous namespace + +void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager &Mgr, + BugReporter &BR) const { + CloneDetector CloneDetector; + CloneDetector.AnalyzeTranslationUnitDecl( + TU->getASTContext().getTranslationUnitDecl()); + + std::vector CloneGroups; + CloneDetector.findClones(CloneGroups); + + DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic(); + + unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning, + "Detected code clone."); + + unsigned NoteID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Note, + "Related code clone is here."); + + for (CloneDetector::CloneGroup &Group : CloneGroups) { +DiagEngine.Report(Group.front().getStartLoc(), WarnID); +for (unsigned J = 1; J < Group.size(); ++J) { + DiagEngine.Report(Group[J].getStartLoc(), NoteID); +} + } +} + +//===--===// +// Register CloneChecker +//===--===// + +void ento::registerCloneChecker(CheckerManager &Mg
Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
teemperor updated this revision to Diff 63262. teemperor marked 7 inline comments as done. http://reviews.llvm.org/D20795 Files: include/clang/AST/CloneDetection.h include/clang/StaticAnalyzer/Checkers/Checkers.td lib/AST/CMakeLists.txt lib/AST/CloneDetection.cpp lib/StaticAnalyzer/Checkers/CMakeLists.txt lib/StaticAnalyzer/Checkers/CloneChecker.cpp test/Analysis/copypaste/test-min-max.cpp test/Analysis/copypaste/test-sub-sequences.cpp Index: test/Analysis/copypaste/test-sub-sequences.cpp === --- /dev/null +++ test/Analysis/copypaste/test-sub-sequences.cpp @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s + +// We test if sub-sequences can match with normal sequences containing only +// a single statement. + +void log2(int a); +void log(); + +int max(int a, int b) { + log2(a); + log(); // expected-warning{{Detected code clone.}} + if (a > b) +return a; + return b; +} + +int maxClone(int a, int b) { + log(); // expected-note{{Related code clone is here.}} + if (a > b) +return a; + return b; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { + return a + b; +} Index: test/Analysis/copypaste/test-min-max.cpp === --- /dev/null +++ test/Analysis/copypaste/test-min-max.cpp @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s + +void log(); + +int max(int a, int b) { // expected-warning{{Detected code clone.}} + log(); + if (a > b) +return a; + return b; +} + +int maxClone(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (a > b) +return a; + return b; +} + +// False positives below. These clones should not be reported. + +// FIXME: Detect different binary operator kinds. +int min1(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (a < b) +return a; + return b; +} + +// FIXME: Detect different variable patterns. +int min2(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (b > a) +return a; + return b; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { + return a + b; +} Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp === --- /dev/null +++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp @@ -0,0 +1,67 @@ +//===--- CloneDetection.cpp - Clone detection checkers --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// +/// +/// \file +/// CloneChecker is a checker that reports clones in the current translation +/// unit. +/// +//===--===// + +#include "ClangSACheckers.h" +#include "clang/AST/CloneDetection.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { +class CloneChecker : public Checker { + +public: + void checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager &Mgr, BugReporter &BR) const; +}; +} // end anonymous namespace + +void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager &Mgr, + BugReporter &BR) const { + CloneDetector CloneDetector; + CloneDetector.AnalyzeTranslationUnitDecl( + TU->getASTContext().getTranslationUnitDecl()); + + std::vector CloneGroups; + CloneDetector.findClones(CloneGroups); + + DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic(); + + unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning, + "Detected code clone."); + + unsigned NoteID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Note, + "Related code clone is here."); + + for (CloneDetector::CloneGroup &Group : CloneGroups) { +DiagEngine.Report(Group.front().getStartLoc(), WarnID); +for (unsigned J = 1; J < Group.size(); ++J) { + DiagEngine.Report(Group[J].getStartLoc(), NoteID); +} + } +} + +//===--===// +// Register CloneChecker +//===--===// + +void ento::registerCloneChecker(CheckerManager &Mgr) { + Mgr.registerChecker(); +} Index: li
Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
teemperor updated this revision to Diff 63161. teemperor marked 5 inline comments as done. teemperor added a comment. - Fixed type of StmtSequence::iterator. http://reviews.llvm.org/D20795 Files: include/clang/AST/CloneDetection.h include/clang/StaticAnalyzer/Checkers/Checkers.td lib/AST/CMakeLists.txt lib/AST/CloneDetection.cpp lib/StaticAnalyzer/Checkers/CMakeLists.txt lib/StaticAnalyzer/Checkers/CloneChecker.cpp test/Analysis/copypaste/test-min-max.cpp test/Analysis/copypaste/test-sub-sequences.cpp Index: test/Analysis/copypaste/test-sub-sequences.cpp === --- /dev/null +++ test/Analysis/copypaste/test-sub-sequences.cpp @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s + +// We test if sub-sequences can match with normal sequences containing only +// a single statement. + +void log2(int a); +void log(); + +int max(int a, int b) { + log2(a); + log(); // expected-warning{{Detected code clone.}} + if (a > b) +return a; + return b; +} + +int maxClone(int a, int b) { + log(); // expected-note{{Related code clone is here.}} + if (a > b) +return a; + return b; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { + return a + b; +} Index: test/Analysis/copypaste/test-min-max.cpp === --- /dev/null +++ test/Analysis/copypaste/test-min-max.cpp @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s + +void log(); + +int max(int a, int b) { // expected-warning{{Detected code clone.}} + log(); + if (a > b) +return a; + return b; +} + +int maxClone(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (a > b) +return a; + return b; +} + +// False positives below. These clones should not be reported. + +// FIXME: Detect different binary operator kinds. +int min1(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (a < b) +return a; + return b; +} + +// FIXME: Detect different variable patterns. +int min2(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (b > a) +return a; + return b; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { + return a + b; +} Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp === --- /dev/null +++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp @@ -0,0 +1,67 @@ +//===--- CloneDetection.cpp - Clone detection checkers --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// +/// +/// \file +/// CloneChecker is a checker that reports clones in the current translation +/// unit. +/// +//===--===// + +#include "ClangSACheckers.h" +#include "clang/AST/CloneDetection.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { +class CloneChecker : public Checker { + +public: + void checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager &Mgr, BugReporter &BR) const; +}; +} // end anonymous namespace + +void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager &Mgr, + BugReporter &BR) const { + CloneDetector CloneDetector; + CloneDetector.AnalyzeTranslationUnitDecl( + TU->getASTContext().getTranslationUnitDecl()); + + std::vector CloneGroups; + CloneDetector.findClones(CloneGroups); + + DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic(); + + unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning, + "Detected code clone."); + + unsigned NoteID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Note, + "Related code clone is here."); + + for (CloneDetector::CloneGroup &Group : CloneGroups) { +DiagEngine.Report(Group.front().getStartLoc(), WarnID); +for (unsigned J = 1; J < Group.size(); ++J) { + DiagEngine.Report(Group[J].getStartLoc(), NoteID); +} + } +} + +//===--===// +// Register CloneChecker +//===--===// + +void ento::registerClone
Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
teemperor updated this revision to Diff 63159. teemperor added a comment. - Fixed the problems pointed out by Vassil (Thanks!) - Comments now have less line breaks. - Added more documentation to HashValue, it's hash function and the used prime numbers. - Added a typedef for StmtSequence::iterator. - StmtSequence::end() is now always returning a correct iterator. http://reviews.llvm.org/D20795 Files: include/clang/AST/CloneDetection.h include/clang/StaticAnalyzer/Checkers/Checkers.td lib/AST/CMakeLists.txt lib/AST/CloneDetection.cpp lib/StaticAnalyzer/Checkers/CMakeLists.txt lib/StaticAnalyzer/Checkers/CloneChecker.cpp test/Analysis/copypaste/test-min-max.cpp test/Analysis/copypaste/test-sub-sequences.cpp Index: test/Analysis/copypaste/test-sub-sequences.cpp === --- /dev/null +++ test/Analysis/copypaste/test-sub-sequences.cpp @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s + +// We test if sub-sequences can match with normal sequences containing only +// a single statement. + +void log2(int a); +void log(); + +int max(int a, int b) { + log2(a); + log(); // expected-warning{{Detected code clone.}} + if (a > b) +return a; + return b; +} + +int maxClone(int a, int b) { + log(); // expected-note{{Related code clone is here.}} + if (a > b) +return a; + return b; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { + return a + b; +} Index: test/Analysis/copypaste/test-min-max.cpp === --- /dev/null +++ test/Analysis/copypaste/test-min-max.cpp @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s + +void log(); + +int max(int a, int b) { // expected-warning{{Detected code clone.}} + log(); + if (a > b) +return a; + return b; +} + +int maxClone(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (a > b) +return a; + return b; +} + +// False positives below. These clones should not be reported. + +// FIXME: Detect different binary operator kinds. +int min1(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (a < b) +return a; + return b; +} + +// FIXME: Detect different variable patterns. +int min2(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (b > a) +return a; + return b; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { + return a + b; +} Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp === --- /dev/null +++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp @@ -0,0 +1,67 @@ +//===--- CloneDetection.cpp - Clone detection checkers --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// +/// +/// \file +/// CloneChecker is a checker that reports clones in the current translation +/// unit. +/// +//===--===// + +#include "ClangSACheckers.h" +#include "clang/AST/CloneDetection.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { +class CloneChecker : public Checker { + +public: + void checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager &Mgr, BugReporter &BR) const; +}; +} // end anonymous namespace + +void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager &Mgr, + BugReporter &BR) const { + CloneDetector CloneDetector; + CloneDetector.AnalyzeTranslationUnitDecl( + TU->getASTContext().getTranslationUnitDecl()); + + std::vector CloneGroups; + CloneDetector.findClones(CloneGroups); + + DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic(); + + unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning, + "Detected code clone."); + + unsigned NoteID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Note, + "Related code clone is here."); + + for (CloneDetector::CloneGroup &Group : CloneGroups) { +DiagEngine.Report(Group.front().getStartLoc(), WarnID); +for (unsigned J = 1; J < Group.size(); ++J) { + DiagEngine.Report(Group[J].getStartLoc(), NoteID); +} + } +} + +//===---
Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
v.g.vassilev requested changes to this revision. This revision now requires changes to proceed. Comment at: include/clang/AST/CloneDetection.h:148 @@ +147,3 @@ +/// This class only searches for clones in exectuable source code +/// (e.g. function bodies). Other clones (e.g. cloned comments or declarations) +/// are not supported. It might be a good idea to detect cloned comments, too. Comment at: include/clang/AST/CloneDetection.h:153 @@ +152,3 @@ +/// in the given statements. +/// This is done by hashing all statements using a locality-sensitive hash +/// function that generates identical hash values for similar statement Can you move this on the previous line? Comment at: include/clang/AST/CloneDetection.h:166 @@ +165,3 @@ +/// that are too small to be reported. +/// A greater value indicates that the related StmtSequence is probably +/// more interesting to the user. This should go on prev line too. Please check all comments for this pattern. Comment at: include/clang/AST/CloneDetection.h:215 @@ +214,3 @@ + /// StmtSequences that were identified to be clones of each other. + std::vector findClones(unsigned MinGroupComplexity = 10); + We shouldn't copy (or hope this would be std::moved). Can you pass it as a reference to the argument list? Comment at: lib/AST/CloneDetection.cpp:22 @@ +21,3 @@ + +StmtSequence::StmtSequence(CompoundStmt *Stmt, ASTContext *Context, + unsigned StartIndex, unsigned EndIndex) It is a very common pattern in clang the ASTContext to be passed as ASTContext&, this should simplify the body of the constructors. Comment at: lib/AST/CloneDetection.cpp:45 @@ +44,3 @@ + // surround the other sequence. + bool StartIsInBounds = Context->getSourceManager().isBeforeInTranslationUnit( + getStartLoc(), Other.getStartLoc()) || I'd factor out the Context->getSourceManager() in a local variable. Comment at: lib/AST/CloneDetection.cpp:54 @@ +53,3 @@ + +Stmt *const *StmtSequence::begin() const { + if (holdsSequence()) { I am not sure what is the intent but shouldn't that be `Stmt const* const*`? Comment at: lib/AST/CloneDetection.cpp:56 @@ +55,3 @@ + if (holdsSequence()) { +auto CS = static_cast(S); +return CS->body_begin() + StartIndex; Please use `auto CS = cast(S);` This logic will crash on `void f() try {} catch(...){}` In this case we do not generate a CompoundStmt. Comment at: lib/AST/CloneDetection.cpp:64 @@ +63,3 @@ + if (holdsSequence()) { +auto CS = static_cast(S); +return CS->body_begin() + EndIndex; Same as above. Comment at: lib/AST/CloneDetection.cpp:84 @@ +83,3 @@ + void addData(unsigned Data) { +Value *= 53; +Value += Data; Still a floating magic constant. Could you factor it out in a variable with a meaningful name? Comment at: lib/AST/CloneDetection.cpp:112 @@ +111,3 @@ + + // We need to traverse postorder over the AST for our algorithm + // to work as each parent expects that its children were already hashed. Doxygen style /// ? Comment at: lib/AST/CloneDetection.cpp:139 @@ +138,3 @@ +// Iterate over each child in the sub-sequence. +for (auto I = StartIterator; I != EndIterator; ++I) { + Stmt *Child = *I; Could we use a range-based for loop. This looks odd. Comment at: lib/AST/CloneDetection.cpp:161 @@ +160,3 @@ + void SaveData(StmtSequence CurrentStmt, HashValue Hash, + unsigned Complexity); + Indent. Comment at: lib/AST/CloneDetection.cpp:164 @@ +163,3 @@ + CloneDetector &CD; + ASTContext &Context; +}; Could you move these members above. I don't think this is common in the codebase. Comment at: lib/AST/CloneDetection.cpp:293 @@ +292,3 @@ + // contains another group, we only need to return the bigger group. + for (unsigned I = 0; I < Result.size(); ++I) { +for (unsigned J = 0; J < Result.size(); ++J) { Capital letters are more common when naming iterators. What about small `i` and `j` here. And down you can use `I` and you won't need to break the line. http://reviews.llvm.org/D20795 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
teemperor updated this revision to Diff 62899. teemperor added a comment. - Fixed a few typos in comments and documentation. http://reviews.llvm.org/D20795 Files: include/clang/AST/CloneDetection.h include/clang/StaticAnalyzer/Checkers/Checkers.td lib/AST/CMakeLists.txt lib/AST/CloneDetection.cpp lib/StaticAnalyzer/Checkers/CMakeLists.txt lib/StaticAnalyzer/Checkers/CloneChecker.cpp test/Analysis/copypaste/test-min-max.cpp test/Analysis/copypaste/test-sub-sequences.cpp Index: test/Analysis/copypaste/test-sub-sequences.cpp === --- /dev/null +++ test/Analysis/copypaste/test-sub-sequences.cpp @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s + +// We test if sub-sequences can match with normal sequences containing only +// a single statement. + +void log2(int a); +void log(); + +int max(int a, int b) { + log2(a); + log(); // expected-warning{{Detected code clone.}} + if (a > b) +return a; + return b; +} + +int maxClone(int a, int b) { + log(); // expected-note{{Related code clone is here.}} + if (a > b) +return a; + return b; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { + return a + b; +} Index: test/Analysis/copypaste/test-min-max.cpp === --- /dev/null +++ test/Analysis/copypaste/test-min-max.cpp @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s + +void log(); + +int max(int a, int b) { // expected-warning{{Detected code clone.}} + log(); + if (a > b) +return a; + return b; +} + +int maxClone(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (a > b) +return a; + return b; +} + +// False positives below. These clones should not be reported. + +// FIXME: Detect different binary operator kinds. +int min1(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (a < b) +return a; + return b; +} + +// FIXME: Detect different variable patterns. +int min2(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (b > a) +return a; + return b; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { + return a + b; +} Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp === --- /dev/null +++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp @@ -0,0 +1,67 @@ +//===--- CloneDetection.cpp - Clone detection checkers --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// +/// +/// \file +/// CloneChecker is a checker that reports clones in the current translation +/// unit. +/// +//===--===// + +#include "ClangSACheckers.h" +#include "clang/AST/CloneDetection.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { +class CloneChecker : public Checker { + +public: + void checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager &Mgr, BugReporter &BR) const; +}; +} // end anonymous namespace + +void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager &Mgr, + BugReporter &BR) const { + CloneDetector CloneDetector; + CloneDetector.AnalyzeTranslationUnitDecl( + TU->getASTContext().getTranslationUnitDecl()); + + std::vector CloneGroups = + CloneDetector.findClones(); + + DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic(); + + unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning, + "Detected code clone."); + + unsigned NoteID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Note, + "Related code clone is here."); + + for (CloneDetector::CloneGroup &Group : CloneGroups) { +DiagEngine.Report(Group.front().getStartLoc(), WarnID); +for (unsigned J = 1; J < Group.size(); ++J) { + DiagEngine.Report(Group[J].getStartLoc(), NoteID); +} + } +} + +//===--===// +// Register CloneChecker +//===--===// + +void ento::registerCloneChecker(CheckerManager &Mgr) { + Mgr.r
Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
teemperor updated this revision to Diff 62888. teemperor added a comment. - Using doxygen-style comments for all private members. http://reviews.llvm.org/D20795 Files: include/clang/AST/CloneDetection.h include/clang/StaticAnalyzer/Checkers/Checkers.td lib/AST/CMakeLists.txt lib/AST/CloneDetection.cpp lib/StaticAnalyzer/Checkers/CMakeLists.txt lib/StaticAnalyzer/Checkers/CloneChecker.cpp test/Analysis/copypaste/test-min-max.cpp test/Analysis/copypaste/test-sub-sequences.cpp Index: test/Analysis/copypaste/test-sub-sequences.cpp === --- /dev/null +++ test/Analysis/copypaste/test-sub-sequences.cpp @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s + +// We test if sub-sequences can match with normal sequences containing only +// a single statement. + +void log2(int a); +void log(); + +int max(int a, int b) { + log2(a); + log(); // expected-warning{{Detected code clone.}} + if (a > b) +return a; + return b; +} + +int maxClone(int a, int b) { + log(); // expected-note{{Related code clone is here.}} + if (a > b) +return a; + return b; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { + return a + b; +} Index: test/Analysis/copypaste/test-min-max.cpp === --- /dev/null +++ test/Analysis/copypaste/test-min-max.cpp @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s + +void log(); + +int max(int a, int b) { // expected-warning{{Detected code clone.}} + log(); + if (a > b) +return a; + return b; +} + +int maxClone(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (a > b) +return a; + return b; +} + +// False positives below. These clones should not be reported. + +// FIXME: Detect different binary operator kinds. +int min1(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (a < b) +return a; + return b; +} + +// FIXME: Detect different variable patterns. +int min2(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (b > a) +return a; + return b; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { + return a + b; +} Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp === --- /dev/null +++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp @@ -0,0 +1,67 @@ +//===--- CloneDetection.cpp - Clone detection checkers --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// +/// +/// \file +/// CloneChecker is a checker that reports clones in the current translation +/// unit. +/// +//===--===// + +#include "ClangSACheckers.h" +#include "clang/AST/CloneDetection.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { +class CloneChecker : public Checker { + +public: + void checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager &Mgr, BugReporter &BR) const; +}; +} // end anonymous namespace + +void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager &Mgr, + BugReporter &BR) const { + CloneDetector CloneDetector; + CloneDetector.AnalyzeTranslationUnitDecl( + TU->getASTContext().getTranslationUnitDecl()); + + std::vector CloneGroups = + CloneDetector.findClones(); + + DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic(); + + unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning, + "Detected code clone."); + + unsigned NoteID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Note, + "Related code clone is here."); + + for (CloneDetector::CloneGroup &Group : CloneGroups) { +DiagEngine.Report(Group.front().getStartLoc(), WarnID); +for (unsigned J = 1; J < Group.size(); ++J) { + DiagEngine.Report(Group[J].getStartLoc(), NoteID); +} + } +} + +//===--===// +// Register CloneChecker +//===--===// + +void ento::registerCloneChecker(CheckerManager &Mgr) { +
Re: [PATCH] D20795: Added basic capabilities to detect source code clones.
teemperor retitled this revision from "Added ASTStructure for analyzing the structure of Stmts." to "Added basic capabilities to detect source code clones.". teemperor updated the summary for this revision. teemperor updated this revision to Diff 62877. teemperor added a comment. - Patch now only provides basic generic hashing and is smaller. - Code style is now respecting LLVM/clang guidelines. - Documented all non-trivial functions. - Moved testing from unittests to normal tests. - Added tests for false-positives. - Fixed the problems pointed out by Vassil (beside "Is that the LLVM/Clang common notion for documenting private members: (i.e. doxygen disabled) instead of /"). - No longer patching SourceManager.h. http://reviews.llvm.org/D20795 Files: include/clang/AST/CloneDetection.h include/clang/StaticAnalyzer/Checkers/Checkers.td lib/AST/CMakeLists.txt lib/AST/CloneDetection.cpp lib/StaticAnalyzer/Checkers/CMakeLists.txt lib/StaticAnalyzer/Checkers/CloneChecker.cpp test/Analysis/copypaste/test-min-max.cpp test/Analysis/copypaste/test-sub-sequences.cpp Index: test/Analysis/copypaste/test-sub-sequences.cpp === --- /dev/null +++ test/Analysis/copypaste/test-sub-sequences.cpp @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s + +// We test if sub-sequences can match with normal sequences containing only +// a single statement. + +void log2(int a); +void log(); + +int max(int a, int b) { + log2(a); + log(); // expected-warning{{Detected code clone.}} + if (a > b) +return a; + return b; +} + +int maxClone(int a, int b) { + log(); // expected-note{{Related code clone is here.}} + if (a > b) +return a; + return b; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { + return a + b; +} Index: test/Analysis/copypaste/test-min-max.cpp === --- /dev/null +++ test/Analysis/copypaste/test-min-max.cpp @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -analyze -std=c++11 -analyzer-checker=clone.CloneChecker -verify %s + +void log(); + +int max(int a, int b) { // expected-warning{{Detected code clone.}} + log(); + if (a > b) +return a; + return b; +} + +int maxClone(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (a > b) +return a; + return b; +} + +// False positives below. These clones should not be reported. + +// FIXME: Detect different binary operator kinds. +int min1(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (a < b) +return a; + return b; +} + +// FIXME: Detect different variable patterns. +int min2(int a, int b) { // expected-note{{Related code clone is here.}} + log(); + if (b > a) +return a; + return b; +} + +// Functions below are not clones and should not be reported. + +int foo(int a, int b) { + return a + b; +} Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp === --- /dev/null +++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp @@ -0,0 +1,67 @@ +//===--- CloneDetection.cpp - Clone detection checkers --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// +/// +/// \file +/// CloneChecker is a checker that reports clones in the current translation +/// unit. +/// +//===--===// + +#include "ClangSACheckers.h" +#include "clang/AST/CloneDetection.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace ento; + +namespace { +class CloneChecker : public Checker { + +public: + void checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager &Mgr, BugReporter &BR) const; +}; +} // end anonymous namespace + +void CloneChecker::checkEndOfTranslationUnit(const TranslationUnitDecl *TU, + AnalysisManager &Mgr, + BugReporter &BR) const { + CloneDetector CloneDetector; + CloneDetector.AnalyzeTranslationUnitDecl( + TU->getASTContext().getTranslationUnitDecl()); + + std::vector CloneGroups = + CloneDetector.findClones(); + + DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic(); + + unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning, + "Detected code clone."); + + unsigned NoteID = DiagEngine.getCustomDiagID(Diagnos