teemperor created this revision.
teemperor added reviewers: v.g.vassilev, NoQ, zaks.anna.
teemperor added subscribers: cfe-commits, vsk.
One of the goals of the project was to find bugs caused by copy-pasting, which
happen when a piece of code is copied but not all variables in this piece of
code are correctly adapted to the new environment (e.g. like this 'if
(mutex1.try_lock()) { foo(); mutex2.unlock(); }' where the mutex2 variable is a
leftover from the original source code).
This patch adds support vor analyzing the pattern of the referenced variables
for similar errors. So far only 'one-off' errors are supported, i.e. pattern
errors where only one variable is not following the pattern of its related
clones.
Additionally, the CloneChecker - which is currently the primary user of the
CloneDetector API - now also supports reporting these suspicious clones.
https://reviews.llvm.org/D23314
Files:
include/clang/Analysis/CloneDetection.h
lib/Analysis/CloneDetection.cpp
lib/StaticAnalyzer/Checkers/CloneChecker.cpp
test/Analysis/copypaste/suspicious-clones.cpp
Index: test/Analysis/copypaste/suspicious-clones.cpp
===================================================================
--- /dev/null
+++ test/Analysis/copypaste/suspicious-clones.cpp
@@ -0,0 +1,97 @@
+// RUN: %clang_cc1 -analyze -analyzer-checker=alpha.clone.CloneChecker -analyzer-config alpha.clone.CloneChecker:ReportSuspiciousClones=true -analyzer-config alpha.clone.CloneChecker:ReportNormalClones=false -verify %s
+
+// Tests finding a suspicious clone that references local variables.
+
+void log();
+
+int max(int a, int b) {
+ log();
+ if (a > b)
+ return a;
+ return b; // expected-note{{Suggestion is based on the useage of this variable in a similar piece of code.}}
+}
+
+int maxClone(int x, int y, int z) {
+ log();
+ if (x > y)
+ return x;
+ return z; // expected-warning{{Maybe you wanted to use 'y' here?}}
+}
+
+// Tests finding a suspicious clone that references global variables.
+
+struct mutex {
+ bool try_lock();
+ void unlock();
+};
+
+mutex m1;
+mutex m2;
+int i;
+
+void busyIncrement() {
+ while (true) {
+ if (m1.try_lock()) {
+ ++i;
+ m1.unlock(); // expected-note{{Suggestion is based on the useage of this variable in a similar piece of code.}}
+ if (i > 1000) {
+ return;
+ }
+ }
+ }
+}
+
+void faultyBusyIncrement() {
+ while (true) {
+ if (m1.try_lock()) {
+ ++i;
+ m2.unlock(); // expected-warning{{Maybe you wanted to use 'm1' here?}}
+ if (i > 1000) {
+ return;
+ }
+ }
+ }
+}
+
+// Tests that we provide two suggestions in cases where two fixes are possible.
+
+int foo(int a, int b, int c) {
+ a += b + c;
+ b /= a + b;
+ c -= b * a; // expected-warning{{Maybe you wanted to use 'a' here?}}
+ return c;
+}
+
+int fooClone(int a, int b, int c) {
+ a += b + c;
+ b /= a + b;
+ c -= a * a; // expected-note{{Or maybe you wanted to use 'b' here in this similar piece of code?}}
+ return c;
+}
+
+
+// Tests that for clone groups with a many possible suspicious clone pairs, at
+// most one warning per clone group is generated and every relevant clone is
+// reported through either a warning or a note.
+
+long bar1(long a, long b, long c, long d) {
+ c = a - b;
+ c = c / d * a;
+ d = b * b - c; // expected-warning{{Maybe you wanted to use 'c' here?}}
+ return d;
+}
+
+long bar2(long a, long b, long c, long d) {
+ c = a - b;
+ c = c / d * a;
+ d = c * b - c; // expected-note{{Or maybe you wanted to use 'b' here in this similar piece of code?}} \
+ // expected-warning{{Maybe you wanted to use 'a' here?}}
+ return d;
+}
+
+long bar3(long a, long b, long c, long d) {
+ c = a - b;
+ c = c / d * a;
+ d = a * b - c; // expected-note{{Or maybe you wanted to use 'c' here in this similar piece of code?}}
+ return d;
+}
Index: lib/StaticAnalyzer/Checkers/CloneChecker.cpp
===================================================================
--- lib/StaticAnalyzer/Checkers/CloneChecker.cpp
+++ lib/StaticAnalyzer/Checkers/CloneChecker.cpp
@@ -34,6 +34,83 @@
void checkEndOfTranslationUnit(const TranslationUnitDecl *TU,
AnalysisManager &Mgr, BugReporter &BR) const;
+
+ /// \brief Reports all clones to the user.
+ void reportClones(SourceManager &SM, AnalysisManager &Mgr,
+ int MinComplexity) const {
+
+ std::vector<CloneDetector::CloneGroup> CloneGroups;
+ CloneDetector.findClones(CloneGroups, MinComplexity);
+
+ DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic();
+
+ unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning,
+ "Detected code clone.");
+
+ unsigned NoteID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Note,
+ "Related code clone is here.");
+
+ for (CloneDetector::CloneGroup &Group : CloneGroups) {
+ // For readability reasons we sort the clones by line numbers.
+ std::sort(Group.Sequences.begin(), Group.Sequences.end(),
+ [&SM](const StmtSequence &LHS, const StmtSequence &RHS) {
+ return SM.isBeforeInTranslationUnit(LHS.getStartLoc(),
+ RHS.getStartLoc()) &&
+ SM.isBeforeInTranslationUnit(LHS.getEndLoc(),
+ RHS.getEndLoc());
+ });
+
+ // We group the clones by printing the first as a warning and all others
+ // as a note.
+ DiagEngine.Report(Group.Sequences.front().getStartLoc(), WarnID);
+ for (unsigned i = 1; i < Group.Sequences.size(); ++i) {
+ DiagEngine.Report(Group.Sequences[i].getStartLoc(), NoteID);
+ }
+ }
+ }
+
+ /// \brief Reports only suspicious clones to the user along with informaton
+ /// that explain why they are suspicious.
+ void reportSuspiciousClones(SourceManager &SM, AnalysisManager &Mgr,
+ int MinComplexity) const {
+
+ std::vector<CloneDetector::SuspiciousClonePair> Clones;
+ CloneDetector.findSuspiciousClones(Clones, MinComplexity);
+
+ DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic();
+
+ auto WarnID = DiagEngine.getCustomDiagID(
+ DiagnosticsEngine::Warning, "Maybe you wanted to use %0 here?");
+
+ auto NoteID = DiagEngine.getCustomDiagID(
+ DiagnosticsEngine::Note, "Suggestion is based on the useage of this "
+ "variable in a similar piece of code.");
+
+ auto NoteWithSuggestionID = DiagEngine.getCustomDiagID(
+ DiagnosticsEngine::Note, "Or maybe you wanted to use %0 here in this "
+ "similar piece of code?");
+
+ for (CloneDetector::SuspiciousClonePair &Pair : Clones) {
+ // The first clone always has a suggestion and we report it to the user
+ // along with the place where the suggestion should be used.
+ DiagEngine.Report(Pair.FirstClone.Location.getBegin(), WarnID)
+ << Pair.FirstClone.Location << Pair.FirstClone.Suggestion;
+
+ // The second clone can have a suggestion and if there is one, we report
+ // that suggestion to the user.
+ if (Pair.SecondClone.Suggestion) {
+ DiagEngine.Report(Pair.SecondClone.Location.getBegin(),
+ NoteWithSuggestionID)
+ << Pair.SecondClone.Location << Pair.SecondClone.Suggestion;
+ continue;
+ }
+
+ // If there isn't a suggestion in the second clone, we only inform the
+ // user where we got the idea that his code could contain an error.
+ DiagEngine.Report(Pair.SecondClone.Location.getBegin(), NoteID)
+ << Pair.SecondClone.Location;
+ }
+ }
};
} // end anonymous namespace
@@ -52,39 +129,19 @@
int MinComplexity = Mgr.getAnalyzerOptions().getOptionAsInteger(
"MinimumCloneComplexity", 10, this);
-
assert(MinComplexity >= 0);
- SourceManager &SM = BR.getSourceManager();
-
- std::vector<CloneDetector::CloneGroup> CloneGroups;
- CloneDetector.findClones(CloneGroups, MinComplexity);
-
- DiagnosticsEngine &DiagEngine = Mgr.getDiagnostic();
+ bool ReportSuspiciousClones = Mgr.getAnalyzerOptions().getBooleanOption(
+ "ReportSuspiciousClones", true, this);
- unsigned WarnID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Warning,
- "Detected code clone.");
+ bool ReportNormalClones = Mgr.getAnalyzerOptions().getBooleanOption(
+ "ReportNormalClones", true, this);
- unsigned NoteID = DiagEngine.getCustomDiagID(DiagnosticsEngine::Note,
- "Related code clone is here.");
+ if (ReportSuspiciousClones)
+ reportSuspiciousClones(BR.getSourceManager(), Mgr, MinComplexity);
- for (CloneDetector::CloneGroup &Group : CloneGroups) {
- // For readability reasons we sort the clones by line numbers.
- std::sort(Group.Sequences.begin(), Group.Sequences.end(),
- [&SM](const StmtSequence &LHS, const StmtSequence &RHS) {
- return SM.isBeforeInTranslationUnit(LHS.getStartLoc(),
- RHS.getStartLoc()) &&
- SM.isBeforeInTranslationUnit(LHS.getEndLoc(),
- RHS.getEndLoc());
- });
-
- // We group the clones by printing the first as a warning and all others
- // as a note.
- DiagEngine.Report(Group.Sequences.front().getStartLoc(), WarnID);
- for (unsigned i = 1; i < Group.Sequences.size(); ++i) {
- DiagEngine.Report(Group.Sequences[i].getStartLoc(), NoteID);
- }
- }
+ if (ReportNormalClones)
+ reportClones(BR.getSourceManager(), Mgr, MinComplexity);
}
//===----------------------------------------------------------------------===//
Index: lib/Analysis/CloneDetection.cpp
===================================================================
--- lib/Analysis/CloneDetection.cpp
+++ lib/Analysis/CloneDetection.cpp
@@ -88,8 +88,11 @@
struct VariableOccurence {
/// The index of the associated VarDecl in the Variables vector.
size_t KindID;
+ /// The location in the code where the variable was references.
+ SourceRange Location;
- VariableOccurence(size_t KindID) : KindID(KindID) {}
+ VariableOccurence(size_t KindID, SourceRange Location)
+ : KindID(KindID), Location(Location) {}
};
/// All occurences of referenced variables in the order of appearance.
@@ -100,19 +103,20 @@
/// \brief Adds a new variable referenced to this pattern.
/// \param VarDecl The declaration of the variable that is referenced.
- void addVariableOccurence(const VarDecl *VarDecl) {
+ /// \param Location The SourceRange where this variable is referenced.
+ void addVariableOccurence(const VarDecl *VarDecl, SourceRange Location) {
// First check if we already reference this variable
for (size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) {
if (Variables[KindIndex] == VarDecl) {
// If yes, add a new occurence that points to the existing entry in
// the Variables vector.
- Occurences.emplace_back(KindIndex);
+ Occurences.emplace_back(KindIndex, Location);
return;
}
}
// If this variable wasn't already referenced, add it to the list of
// referenced variables and add a occurence that points to this new entry.
- Occurences.emplace_back(Variables.size());
+ Occurences.emplace_back(Variables.size(), Location);
Variables.push_back(VarDecl);
}
@@ -127,7 +131,7 @@
// Check if S is a reference to a variable. If yes, add it to the pattern.
if (auto D = dyn_cast<DeclRefExpr>(S)) {
if (auto VD = dyn_cast<VarDecl>(D->getDecl()->getCanonicalDecl()))
- addVariableOccurence(VD);
+ addVariableOccurence(VD, D->getSourceRange());
}
// Recursively check all children of the given statement.
@@ -146,31 +150,89 @@
/// \brief Compares this pattern with the given one.
/// \param Other The given VariablePattern to compare with.
- /// \return Returns true if and only if the references variables in this
- /// object follow the same pattern than the ones in the given
- /// VariablePattern.
+ /// \param FirstMismatch Output parameter that will be filled with information
+ /// about the first difference between the two patterns. This parameter
+ /// can be a nullptr, in which case it will be ignored.
+ /// \return Returns the number of differences between the pattern this object
+ /// is following and the given VariablePattern.
///
- /// For example, the following statements all have the same pattern:
+ /// For example, the following statements all have the same pattern and this
+ /// function would return zero:
///
/// if (a < b) return a; return b;
/// if (x < y) return x; return y;
/// if (u2 < u1) return u2; return u1;
///
- /// but the following statement has a different pattern (note the changed
- /// variables in the return statements).
+ /// But the following statement has a different pattern (note the changed
+ /// variables in the return statements) and would have two differences when
+ /// compared with one of the statements above.
///
/// if (a < b) return b; return a;
///
/// This function should only be called if the related statements of the given
/// pattern and the statements of this objects are clones of each other.
- bool comparePattern(const VariablePattern &Other) {
+ unsigned getPatternDifferences(
+ const VariablePattern &Other,
+ CloneDetector::SuspiciousClonePair *FirstMismatch = nullptr) {
+ unsigned NumberOfDifferences = 0;
+
assert(Other.Occurences.size() == Occurences.size());
for (unsigned i = 0; i < Occurences.size(); ++i) {
- if (Occurences[i].KindID != Other.Occurences[i].KindID) {
- return false;
+ auto ThisOccurence = Occurences[i];
+ auto OtherOccurence = Other.Occurences[i];
+ if (ThisOccurence.KindID != OtherOccurence.KindID) {
+ ++NumberOfDifferences;
+
+ // If FirstMismatch is not a nullptr, we need to store information about
+ // the first difference between the two patterns.
+ if (FirstMismatch == nullptr)
+ continue;
+
+ // Only proceed if we just found the first difference as we only store
+ // information about the first difference.
+ if (NumberOfDifferences != 1)
+ continue;
+
+ const VarDecl *FirstSuggestion = nullptr;
+ // If there is a variable available in the list of referenced variables
+ // which wouldn't break the pattern if it is used in place of the
+ // current variable, we provide this variable as the suggested fix.
+ if (OtherOccurence.KindID < Variables.size())
+ FirstSuggestion = Variables[OtherOccurence.KindID];
+
+ // Store information about the first clone.
+ FirstMismatch->FirstClone =
+ CloneDetector::SuspiciousClonePair::SuspiciousClone(
+ Variables[ThisOccurence.KindID], ThisOccurence.Location,
+ FirstSuggestion);
+
+ // Same as above but with the other clone. We do this for both clones as
+ // we don't know which clone is the one containing the unintended
+ // pattern error.
+ const VarDecl *SecondSuggestion = nullptr;
+ if (ThisOccurence.KindID < Other.Variables.size())
+ SecondSuggestion = Other.Variables[ThisOccurence.KindID];
+
+ // Store information about the second clone.
+ FirstMismatch->SecondClone =
+ CloneDetector::SuspiciousClonePair::SuspiciousClone(
+ Variables[ThisOccurence.KindID], OtherOccurence.Location,
+ SecondSuggestion);
+
+ // SuspiciousClonePair guarantees that the first clone always has a
+ // suggested variable associated with it. As we know that one of the two
+ // clones in the pair always has suggestion, we swap the two clones
+ // in case the first clone has no suggested variable which means that
+ // the second clone has a suggested variable and should be first.
+ if (!FirstMismatch->FirstClone.Suggestion)
+ std::swap(FirstMismatch->FirstClone, FirstMismatch->SecondClone);
+
+ // This ensures that we always have at least one suggestion in a pair.
+ assert(FirstMismatch->FirstClone.Suggestion);
}
}
- return true;
+
+ return NumberOfDifferences;
}
};
}
@@ -526,7 +588,8 @@
// and assign them to our new clone group.
auto I = UnassignedSequences.begin(), E = UnassignedSequences.end();
while (I != E) {
- if (VariablePattern(*I).comparePattern(PrototypeFeatures)) {
+
+ if (VariablePattern(*I).getPatternDifferences(PrototypeFeatures) == 0) {
FilteredGroup.Sequences.push_back(*I);
I = UnassignedSequences.erase(I);
continue;
@@ -543,11 +606,15 @@
}
void CloneDetector::findClones(std::vector<CloneGroup> &Result,
- unsigned MinGroupComplexity) {
+ unsigned MinGroupComplexity,
+ bool CheckPatterns) {
// Add every valid clone group that fulfills the complexity requirement.
for (const CloneGroup &Group : CloneGroups) {
if (Group.isValid() && Group.Complexity >= MinGroupComplexity) {
- createCloneGroups(Result, Group);
+ if (CheckPatterns)
+ createCloneGroups(Result, Group);
+ else
+ Result.push_back(Group);
}
}
@@ -577,3 +644,37 @@
Result.erase(Result.begin() + *I);
}
}
+
+void CloneDetector::findSuspiciousClones(
+ std::vector<CloneDetector::SuspiciousClonePair> &Result,
+ unsigned MinGroupComplexity) {
+ std::vector<CloneGroup> Clones;
+ // Reuse the normal search for clones but specify that the clone groups don't
+ // need to have a common referenced variable pattern so that we can manually
+ // search for the kind of pattern errors this function is supposed to find.
+ findClones(Clones, MinGroupComplexity, false);
+
+ for (const CloneGroup &Group : Clones) {
+ for (unsigned i = 0; i < Group.Sequences.size(); ++i) {
+ VariablePattern PatternA(Group.Sequences[i]);
+
+ for (unsigned j = i + 1; j < Group.Sequences.size(); ++j) {
+ VariablePattern PatternB(Group.Sequences[j]);
+
+ CloneDetector::SuspiciousClonePair ClonePair;
+ // For now, we only report clones which break the variable pattern just
+ // once because multiple differences in a pattern are an indicator that
+ // those differences are maybe intended (e.g. because it's actually
+ // a different algorithm).
+ // TODO: In very big clones even multiple variables can be unintended,
+ // so replacing this number with a percentage could better handle such
+ // cases. On the other hand it could increase the false-positive rate
+ // for all clones if the percentage is too high.
+ if (PatternA.getPatternDifferences(PatternB, &ClonePair) == 1) {
+ Result.push_back(ClonePair);
+ break;
+ }
+ }
+ }
+ }
+}
Index: include/clang/Analysis/CloneDetection.h
===================================================================
--- include/clang/Analysis/CloneDetection.h
+++ include/clang/Analysis/CloneDetection.h
@@ -24,6 +24,7 @@
class Stmt;
class Decl;
+class VarDecl;
class ASTContext;
class CompoundStmt;
@@ -222,7 +223,43 @@
/// that were identified to be clones of each other.
/// \param MinGroupComplexity Only return clones which have at least this
/// complexity value.
- void findClones(std::vector<CloneGroup> &Result, unsigned MinGroupComplexity);
+ /// \param CheckPatterns Returns only clone groups in which the referenced
+ /// variables follow the same pattern.
+ void findClones(std::vector<CloneGroup> &Result, unsigned MinGroupComplexity,
+ bool CheckPatterns = true);
+
+ /// \brief Describes two clones that reference their variables in a different
+ /// pattern which could indicate a programming error.
+ struct SuspiciousClonePair {
+ /// \brief Utility class holding the relevant information about a single
+ /// clone in this pair.
+ struct SuspiciousClone {
+ /// The variable which referencing in this clone was against the pattern.
+ const VarDecl *Variable;
+ /// Where the variable was referenced.
+ SourceRange Location;
+ /// The variable that should have been referenced to follow the pattern.
+ /// If Suggestion is a nullptr then it's not possible to fix the pattern
+ /// by referencing a different variable in this clone.
+ const VarDecl *Suggestion;
+ SuspiciousClone(const VarDecl *Variable, SourceRange Location,
+ const VarDecl *Suggestion)
+ : Variable(Variable), Location(Location), Suggestion(Suggestion) {}
+ SuspiciousClone() {}
+ };
+ /// The first clone in the pair which always has a suggested variable.
+ SuspiciousClone FirstClone;
+ /// This other clone in the pair which can have a suggested variable.
+ SuspiciousClone SecondClone;
+ };
+
+ /// \brief Searches the provided statements for pairs of clones that don't
+ /// follow the same pattern when referencing variables.
+ /// \param Result Output parameter that will contain the clone pairs.
+ /// \param MinGroupComplexity Only clone pairs in which the clones have at
+ /// least this complexity value.
+ void findSuspiciousClones(std::vector<SuspiciousClonePair> &Result,
+ unsigned MinGroupComplexity);
private:
/// Stores all found clone groups including invalid groups with only a single
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits