[PATCH] D20811: [analyzer] Model some library functions
This revision was automatically updated to reflect the committed changes. Closed by commit rL284960: [analyzer] Add StdLibraryFunctions checker. (authored by dergachev). Changed prior to commit: https://reviews.llvm.org/D20811?vs=75446&id=75560#toc Repository: rL LLVM https://reviews.llvm.org/D20811 Files: cfe/trunk/include/clang/StaticAnalyzer/Checkers/Checkers.td cfe/trunk/lib/StaticAnalyzer/Checkers/CMakeLists.txt cfe/trunk/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp cfe/trunk/test/Analysis/std-c-library-functions.c cfe/trunk/test/Analysis/std-c-library-functions.cpp Index: cfe/trunk/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp === --- cfe/trunk/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp +++ cfe/trunk/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp @@ -0,0 +1,943 @@ +//=== StdLibraryFunctionsChecker.cpp - Model standard functions -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// +// +// This checker improves modeling of a few simple library functions. +// It does not generate warnings. +// +// This checker provides a specification format - `FunctionSummaryTy' - and +// contains descriptions of some library functions in this format. Each +// specification contains a list of branches for splitting the program state +// upon call, and range constraints on argument and return-value symbols that +// are satisfied on each branch. This spec can be expanded to include more +// items, like external effects of the function. +// +// The main difference between this approach and the body farms technique is +// in more explicit control over how many branches are produced. For example, +// consider standard C function `ispunct(int x)', which returns a non-zero value +// iff `x' is a punctuation character, that is, when `x' is in range +// ['!', '/'] [':', '@'] U ['[', '\`'] U ['{', '~']. +// `FunctionSummaryTy' provides only two branches for this function. However, +// any attempt to describe this range with if-statements in the body farm +// would result in many more branches. Because each branch needs to be analyzed +// independently, this significantly reduces performance. Additionally, +// once we consider a branch on which `x' is in range, say, ['!', '/'], +// we assume that such branch is an important separate path through the program, +// which may lead to false positives because considering this particular path +// was not consciously intended, and therefore it might have been unreachable. +// +// This checker uses eval::Call for modeling "pure" functions, for which +// their `FunctionSummaryTy' is a precise model. This avoids unnecessary +// invalidation passes. Conflicts with other checkers are unlikely because +// if the function has no other effects, other checkers would probably never +// want to improve upon the modeling done by this checker. +// +// Non-"pure" functions, for which only partial improvement over the default +// behavior is expected, are modeled via check::PostCall, non-intrusively. +// +// The following standard C functions are currently supported: +// +// fgetc getline isdigit isupper +// fread isalnum isgraph isxdigit +// fwrite isalpha islower read +// getc isascii isprint write +// getcharisblank ispunct +// getdelim iscntrl isspace +// +//===--===// + +#include "ClangSACheckers.h" +#include "clang/StaticAnalyzer/Core/Checker.h" +#include "clang/StaticAnalyzer/Core/CheckerManager.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" + +using namespace clang; +using namespace clang::ento; + +namespace { +class StdLibraryFunctionsChecker : public Checker { + /// Below is a series of typedefs necessary to define function specs. + /// We avoid nesting types here because each additional qualifier + /// would need to be repeated in every function spec. + struct FunctionSummaryTy; + + /// Specify how much the analyzer engine should entrust modeling this function + /// to us. If he doesn't, he performs additional invalidations. + enum InvalidationKindTy { NoEvalCall, EvalCallAsPure }; + + /// A pair of ValueRangeKindTy and IntRangeVectorTy would describe a range + /// imposed on a particular argument or return value symbol. + /// + /// Given a range, should the argument stay inside or outside this range? + /// The special `ComparesToArgument' value indicates that we should + /// impose a constraint that involves other argument or return value symbols. + enum ValueRangeKindTy { OutOfRange, WithinRang
[PATCH] D20811: [analyzer] Model some library functions
dcoughlin accepted this revision. dcoughlin added a comment. This looks great! Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:694 +INVALIDATION_APPROACH(EvalCallAsPure)) + CASE // Is certainly uppercase. +ARGUMENT_CONDITION(ARG_NO(0), WithinRange) I think think this comment should say 'lowercase'. Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:702 + END_CASE + CASE // Is ascii but not uppercase. +ARGUMENT_CONDITION(ARG_NO(0), WithinRange) Same here. https://reviews.llvm.org/D20811 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D20811: [analyzer] Model some library functions
NoQ updated this revision to Diff 75446. NoQ added a comment. Herald added a subscriber: modocache. Update the domain-specific language for function specs/summaries. https://reviews.llvm.org/D20811 Files: include/clang/StaticAnalyzer/Checkers/Checkers.td lib/StaticAnalyzer/Checkers/CMakeLists.txt lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp test/Analysis/std-c-library-functions.c test/Analysis/std-c-library-functions.cpp Index: test/Analysis/std-c-library-functions.cpp === --- /dev/null +++ test/Analysis/std-c-library-functions.cpp @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -analyze -analyzer-checker=unix.StdCLibraryFunctions,debug.ExprInspection -verify %s + +// Test that we don't model functions with broken prototypes. +// Because they probably work differently as well. +// +// This test lives in a separate file because we wanted to test all functions +// in the .c file, however in C there are no overloads. + +void clang_analyzer_eval(bool); +bool isalpha(char); + +void test() { + clang_analyzer_eval(isalpha('A')); // no-crash // expected-warning{{UNKNOWN}} +} Index: test/Analysis/std-c-library-functions.c === --- /dev/null +++ test/Analysis/std-c-library-functions.c @@ -0,0 +1,184 @@ +// RUN: %clang_cc1 -analyze -analyzer-checker=unix.StdCLibraryFunctions,debug.ExprInspection -verify %s + +void clang_analyzer_eval(int); + +int glob; + +typedef struct FILE FILE; +#define EOF -1 + +int getc(FILE *); +void test_getc(FILE *fp) { + int x; + while ((x = getc(fp)) != EOF) { +clang_analyzer_eval(x > 255); // expected-warning{{FALSE}} +clang_analyzer_eval(x >= 0); // expected-warning{{TRUE}} + } +} + +int fgetc(FILE *); +void test_fgets(FILE *fp) { + clang_analyzer_eval(fgetc(fp) < 256); // expected-warning{{TRUE}} + clang_analyzer_eval(fgetc(fp) >= 0); // expected-warning{{UNKNOWN}} +} + + +typedef unsigned long size_t; +typedef signed long ssize_t; +ssize_t read(int, void *, size_t); +ssize_t write(int, const void *, size_t); +void test_read_write(int fd, char *buf) { + glob = 1; + ssize_t x = write(fd, buf, 10); + clang_analyzer_eval(glob); // expected-warning{{UNKNOWN}} + if (x >= 0) { +clang_analyzer_eval(x <= 10); // expected-warning{{TRUE}} +ssize_t y = read(fd, &glob, sizeof(glob)); +if (y >= 0) { + clang_analyzer_eval(y <= sizeof(glob)); // expected-warning{{TRUE}} +} else { + // -1 overflows on promotion! + clang_analyzer_eval(y <= sizeof(glob)); // expected-warning{{FALSE}} +} + } else { +clang_analyzer_eval(x == -1); // expected-warning{{TRUE}} + } +} + +size_t fread(void *, size_t, size_t, FILE *); +size_t fwrite(const void *restrict, size_t, size_t, FILE *restrict); +void test_fread_fwrite(FILE *fp, int *buf) { + size_t x = fwrite(buf, sizeof(int), 10, fp); + clang_analyzer_eval(x <= 10); // expected-warning{{TRUE}} + size_t y = fread(buf, sizeof(int), 10, fp); + clang_analyzer_eval(y <= 10); // expected-warning{{TRUE}} + size_t z = fwrite(buf, sizeof(int), y, fp); + // FIXME: should be TRUE once symbol-symbol constraint support is improved. + clang_analyzer_eval(z <= y); // expected-warning{{UNKNOWN}} +} + +ssize_t getline(char **, size_t *, FILE *); +void test_getline(FILE *fp) { + char *line = 0; + size_t n = 0; + ssize_t len; + while ((len = getline(&line, &n, fp)) != -1) { +clang_analyzer_eval(len == 0); // expected-warning{{FALSE}} + } +} + +int isascii(int); +void test_isascii(int x) { + clang_analyzer_eval(isascii(123)); // expected-warning{{TRUE}} + clang_analyzer_eval(isascii(-1)); // expected-warning{{FALSE}} + if (isascii(x)) { +clang_analyzer_eval(x < 128); // expected-warning{{TRUE}} +clang_analyzer_eval(x >= 0); // expected-warning{{TRUE}} + } else { +if (x > 42) + clang_analyzer_eval(x >= 128); // expected-warning{{TRUE}} +else + clang_analyzer_eval(x < 0); // expected-warning{{TRUE}} + } + glob = 1; + isascii('a'); + clang_analyzer_eval(glob); // expected-warning{{TRUE}} +} + +int islower(int); +void test_islower(int x) { + clang_analyzer_eval(islower('x')); // expected-warning{{TRUE}} + clang_analyzer_eval(islower('X')); // expected-warning{{FALSE}} + if (islower(x)) +clang_analyzer_eval(x < 'a'); // expected-warning{{FALSE}} +} + +int getchar(void); +void test_getchar() { + int x = getchar(); + if (x == EOF) +return; + clang_analyzer_eval(x < 0); // expected-warning{{FALSE}} + clang_analyzer_eval(x < 256); // expected-warning{{TRUE}} +} + +int isalpha(int); +void test_isalpha() { + clang_analyzer_eval(isalpha(']')); // expected-warning{{FALSE}} + clang_analyzer_eval(isalpha('Q')); // expected-warning{{TRUE}} + clang_analyzer_eval(isalpha(128)); // expected-warning{{UNKNOWN}} +} + +int isalnum(int); +void test_alnum() { + clang_analyzer_eval(isalnum('1')); // expected-warning{{TRUE}} + clang_analyzer_eval(
[PATCH] D20811: [analyzer] Model some library functions
dcoughlin added a comment. In https://reviews.llvm.org/D20811#575521, @NoQ wrote: > I thought to give it a pause to take a fresh look at how to arrange the > macro-hints in the summaries. > > Maybe something like that: > > CASE > ARGUMENT_CONDITION(ARG_NO(0), OutOfRange) > RANGE('0', '9') > RANGE('A', 'Z') > RANGE('a', 'z') > RANGE(128, 255) > END_ARGUMENT_CONDITION > RETURN_VALUE_CONDITION(WithinRange) > SINGLE_VALUE(0) > END_RETURN_VALUE_CONDITION > END_CASE > Looks great to me! https://reviews.llvm.org/D20811 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D20811: [analyzer] Model some library functions
NoQ marked 9 inline comments as done. NoQ added a comment. I thought to give it a pause to take a fresh look at how to arrange the macro-hints in the summaries. Maybe something like that: CASE ARGUMENT_CONDITION(ARG_NO(0), OutOfRange) RANGE('0', '9') RANGE('A', 'Z') RANGE('a', 'z') RANGE(128, 255) END_ARGUMENT_CONDITION RETURN_VALUE_CONDITION(WithinRange) SINGLE_VALUE(0) END_RETURN_VALUE_CONDITION END_CASE Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:547 +RANGE { + RET_VAL, RANGE_KIND(OutOfRange), + SET { POINT(0) } dcoughlin wrote: > NoQ wrote: > > dcoughlin wrote: > > > Is it ever the case that this final 'RANGE" constrains anything other > > > than the return value? If not, can 'RET_VAL' be elided? > > Some summaries only have pre-conditions: "for this argument constraint, any > > return value is possible". We should also be able to support void > > functions, which have no return values. > What does a postcondition on a void function mean in this context? Can you > refer to argument values? Such as "If the the function terminates then it > must have been the case that the first argument was in the rangy x..z even > though we didn't know that going in? Is this useful? No, i don't think this is useful. There are just timeless immutable symbols about which we learn something new on every branch. If the function doesn't terminate on certain pre-conditons, then we can model it by never mentioning these pre-conditions in any of the branches (we don't use this trick anywhere yet - all functions listed here shall terminate in all cases). This would have been useful if we start referring to the heap shape (eg. "if the value behind the pointer passed as second argument to the call was in range [1,10] before the call, then it would be equal to 42 after the call"), but we don't do that yet. https://reviews.llvm.org/D20811 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D20811: [analyzer] Model some library functions
zaks.anna added a comment. Ping? Is there something blocking progress here? This functionality is very useful and almost done. Thanks! https://reviews.llvm.org/D20811 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20811: [analyzer] Model some library functions
dcoughlin added a comment. In https://reviews.llvm.org/D20811#544981, @NoQ wrote: > Hmm, what about > > CONSTRAIN > ARGUMENT_VALUE(0, WithinRange) > RANGE('0', '9') > RANGE('A', 'Z') > RANGE('a', 'z') > END_ARGUMENT_VALUE > RETURN_VALUE(OutOfRange) > VALUE(0) > END_RETURN_VALUE > END_CONSTRAIN > > > ? "CONSTRAIN" is a verb. What is the direct object here? It seems to me that the thing being constrained is the return value, so it seems odd to have 'CONSTRAIN' around the conditions on the arguments. > Something i don't like here is that the word "value" is overloaded. Maybe > rename the inner `VALUE` back to `POINT`? I don't think the geometric metaphor of 'POINT' makes sense here, especially with 'RANGE' (which I think is very good). What is the analog of a range that has only a single element? Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:547 @@ +546,3 @@ +RANGE { + RET_VAL, RANGE_KIND(OutOfRange), + SET { POINT(0) } NoQ wrote: > dcoughlin wrote: > > Is it ever the case that this final 'RANGE" constrains anything other than > > the return value? If not, can 'RET_VAL' be elided? > Some summaries only have pre-conditions: "for this argument constraint, any > return value is possible". We should also be able to support void functions, > which have no return values. What does a postcondition on a void function mean in this context? Can you refer to argument values? Such as "If the the function terminates then it must have been the case that the first argument was in the rangy x..z even though we didn't know that going in? Is this useful? https://reviews.llvm.org/D20811 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20811: [analyzer] Model some library functions
NoQ added a comment. In https://reviews.llvm.org/D20811#544927, @dcoughlin wrote: > That said, now that I look at it with 'POSTCONDITION' alone I don't think it > is clear that the provided value describes the return value. What do you > think about renaming it 'RETURN_VALUE'? Or adding back the RET_VAL I asked > you about removing before? :-) Hmm, what about CONSTRAIN ARGUMENT_VALUE(0, WithinRange) RANGE('0', '9') RANGE('A', 'Z') RANGE('a', 'z') END_ARGUMENT_VALUE RETURN_VALUE(OutOfRange) VALUE(0) END_RETURN_VALUE END_CONSTRAIN ? Something i don't like here is that the word "value" is overloaded. Maybe rename the inner `VALUE` back to `POINT`? In https://reviews.llvm.org/D20811#544927, @dcoughlin wrote: > Also: do you think CONDITION_KIND is needed? in PRECONDITION? Or can the bare > kind be used like in POSTCONDITION? I agree that it's ok to use the bare kind, because it's quite self-explanatory. https://reviews.llvm.org/D20811 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20811: [analyzer] Model some library functions
dcoughlin added a comment. I think this is much clearer! That said, now that I look at it with 'POSTCONDITION' alone I don't think it is clear that the provided value describes the return value. What do you think about renaming it 'RETURN_VALUE'? Or adding back the RET_VAL I asked you about removing before? :-) Also: do you think CONDITION_KIND is needed? in PRECONDITION? Or can the bare kind be used like in POSTCONDITION? https://reviews.llvm.org/D20811 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20811: [analyzer] Model some library functions
NoQ added a comment. In https://reviews.llvm.org/D20811#544250, @dcoughlin wrote: > I think a good rule of thumb for readability is: suppose you are a maintainer > and need to add a summary for a new function. Can you copy the the summary > for an existing function and figure out what each component means so you can > change it for the new function? Seems i've written too many summaries to reliably use this rule :) Could you have a look at another attempt?: SUMMARY(isalnum, ARGUMENT_TYPES { IntTy }, RETURN_TYPE(IntTy), INVALIDATION_APPROACH(EvalCallAsPure)) CASE // Boils down to isupper() or islower() or isdigit() PRE_CONDITION(ARG_NO(0), CONDITION_KIND(WithinRange)) RANGE('0', '9') RANGE('A', 'Z') RANGE('a', 'z') END_PRE_CONDITION POST_CONDITION(OutOfRange) VALUE(0) END_POST_CONDITION END_CASE CASE // The locale-specific range. PRE_CONDITION(ARG_NO(0), CONDITION_KIND(WithinRange)) RANGE(128, 255) END_PRE_CONDITION // No post-condition. We are completely unaware of // locale-specific return values. END_CASE CASE PRE_CONDITION(ARG_NO(0), CONDITION_KIND(OutOfRange)) RANGE('0', '9') RANGE('A', 'Z') RANGE('a', 'z') RANGE(128, 255) END_PRE_CONDITION POST_CONDITION(WithinRange) VALUE(0) END_POST_CONDITION END_CASE END_SUMMARY Comment at: include/clang/StaticAnalyzer/Checkers/Checkers.td:419 @@ -418,1 +418,3 @@ +def StdLibraryFunctionsChecker : Checker<"StdLibraryFunctions">, + HelpText<"Improve modeling of standard library functions">, dcoughlin wrote: > I know you and Gábor already discussed this -- but shouldn't this be > CStdLibraryFunctionsChecker or 'StdCLibraryFunctionsChecker'? Or is is your > intent that both C and C++ standard libraries would be modeled by this > checker? Hmm, i just realized what you guys were talking about :) The same checker cpp file and even the same checker object should probably produce different checker list entries here which would go into separate packages (cplusplus for C++ library functions, etc.). We could even split the specifications into different files, but the checker object would still be the same, defined in the same file. Will do. Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:537 @@ +536,3 @@ + SPEC_DATA { +ARGUMENT_TYPES { IntTy }, +RETURN_TYPE(IntTy), dcoughlin wrote: > The argument and return types seem like more a property of the function than > than the summary. Why are they here and not with the function name? Because this is where C++ initializer list syntax forces them to be. Hiding this detail is, as far as i see, only possible with the means of BEGIN_.../END_... macros (which isn't a big deal i think). Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:547 @@ +546,3 @@ +RANGE { + RET_VAL, RANGE_KIND(OutOfRange), + SET { POINT(0) } dcoughlin wrote: > Is it ever the case that this final 'RANGE" constrains anything other than > the return value? If not, can 'RET_VAL' be elided? Some summaries only have pre-conditions: "for this argument constraint, any return value is possible". We should also be able to support void functions, which have no return values. https://reviews.llvm.org/D20811 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20811: [analyzer] Model some library functions
dcoughlin added a comment. Thanks for adding the macros. I've provided some feedback inline. I think a good rule of thumb for readability is: suppose you are a maintainer and need to add a summary for a new function. Can you copy the the summary for an existing function and figure out what each component means so you can change it for the new function? Comment at: include/clang/StaticAnalyzer/Checkers/Checkers.td:419 @@ -418,1 +418,3 @@ +def StdLibraryFunctionsChecker : Checker<"StdLibraryFunctions">, + HelpText<"Improve modeling of standard library functions">, I know you and Gábor already discussed this -- but shouldn't this be CStdLibraryFunctionsChecker or 'StdCLibraryFunctionsChecker'? Or is is your intent that both C and C++ standard libraries would be modeled by this checker? Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:11 @@ +10,3 @@ +// This checker improves modeling of a few simple library functions. +// It does not throw warnings. +// "throw" --> "generate" Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:534 @@ +533,3 @@ +// The isascii() family of functions. +SPEC { + FOR_FUNCTION(isalnum), Is "specification" the right term here? Or is this really a "summary"? Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:536 @@ +535,3 @@ + FOR_FUNCTION(isalnum), + SPEC_DATA { +ARGUMENT_TYPES { IntTy }, 'SPEC_DATA' doesn't seem to add much in terms of readability. Is it needed? Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:537 @@ +536,3 @@ + SPEC_DATA { +ARGUMENT_TYPES { IntTy }, +RETURN_TYPE(IntTy), The argument and return types seem like more a property of the function than than the summary. Why are they here and not with the function name? Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:540 @@ +539,3 @@ +INVALIDATION_APPROACH(EvalCallAsPure), +BRANCHES { + BRANCH { // Boils down to isupper() or islower() or isdigit() "Cases" seems more appropriate than "branches" (branching is an implementation detail). Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:542 @@ +541,3 @@ + BRANCH { // Boils down to isupper() or islower() or isdigit() +RANGE { + ARG_NO(0), RANGE_KIND(WithinRange), If I understand correctly, the first "argument" to branch describes the constraints on the function arguments and the second (if present) describes the resulting constraint on the return value when the argument constraint holds. Is there a way to make this apparent in the spelling of the summary? As a straw proposal, what about renaming the first 'RANGE' to 'ARGUMENT_CONSTRAINT' and the second the 'RETURN_VALUE_CONSTRAINT'? Or, more jargony, "PRECONDITION" and "POSTCONDITION"? Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:547 @@ +546,3 @@ +RANGE { + RET_VAL, RANGE_KIND(OutOfRange), + SET { POINT(0) } Is it ever the case that this final 'RANGE" constrains anything other than the return value? If not, can 'RET_VAL' be elided? Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:554 @@ +553,3 @@ + ARG_NO(0), RANGE_KIND(WithinRange), + SET { SEG(128, 255) } +} What is the motivation behind the use of geometric terms here (i.e., "SEG", "POINT")? Why not "INTERVAL" and "EXACT_VALUE"? Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:560 @@ +559,3 @@ + ARG_NO(0), RANGE_KIND(OutOfRange), + SET { SEG('0', '9') U SEG('A', 'Z') + U SEG('a', 'z') U SEG(128, 255)} Would you be opposed to 'UNION' instead of 'U'? https://reviews.llvm.org/D20811 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20811: [analyzer] Model some library functions
NoQ updated this revision to Diff 71510. NoQ marked an inline comment as done. NoQ added a comment. Herald added subscribers: mgorny, beanz. Added a huge amount of macros in order to improve readability of function specs. Other inline comments should have been addressed before. https://reviews.llvm.org/D20811 Files: include/clang/StaticAnalyzer/Checkers/Checkers.td lib/StaticAnalyzer/Checkers/CMakeLists.txt lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp test/Analysis/std-library-functions.c test/Analysis/std-library-functions.cpp Index: test/Analysis/std-library-functions.cpp === --- /dev/null +++ test/Analysis/std-library-functions.cpp @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -analyze -analyzer-checker=unix.StdLibraryFunctions,debug.ExprInspection -verify %s + +// Test that we don't model functions with broken prototypes. +// Because they probably work differently as well. +// +// This test lives in a separate file because we wanted to test all functions +// in the .c file, however in C there are no overloads. + +void clang_analyzer_eval(bool); +bool isalpha(char); + +void test() { + clang_analyzer_eval(isalpha('A')); // no-crash // expected-warning{{UNKNOWN}} +} Index: test/Analysis/std-library-functions.c === --- /dev/null +++ test/Analysis/std-library-functions.c @@ -0,0 +1,184 @@ +// RUN: %clang_cc1 -analyze -analyzer-checker=unix.StdLibraryFunctions,debug.ExprInspection -verify %s + +void clang_analyzer_eval(int); + +int glob; + +typedef struct FILE FILE; +#define EOF -1 + +int getc(FILE *); +void test_getc(FILE *fp) { + int x; + while ((x = getc(fp)) != EOF) { +clang_analyzer_eval(x > 255); // expected-warning{{FALSE}} +clang_analyzer_eval(x >= 0); // expected-warning{{TRUE}} + } +} + +int fgetc(FILE *); +void test_fgets(FILE *fp) { + clang_analyzer_eval(fgetc(fp) < 256); // expected-warning{{TRUE}} + clang_analyzer_eval(fgetc(fp) >= 0); // expected-warning{{UNKNOWN}} +} + + +typedef unsigned long size_t; +typedef signed long ssize_t; +ssize_t read(int, void *, size_t); +ssize_t write(int, const void *, size_t); +void test_read_write(int fd, char *buf) { + glob = 1; + ssize_t x = write(fd, buf, 10); + clang_analyzer_eval(glob); // expected-warning{{UNKNOWN}} + if (x >= 0) { +clang_analyzer_eval(x <= 10); // expected-warning{{TRUE}} +ssize_t y = read(fd, &glob, sizeof(glob)); +if (y >= 0) { + clang_analyzer_eval(y <= sizeof(glob)); // expected-warning{{TRUE}} +} else { + // -1 overflows on promotion! + clang_analyzer_eval(y <= sizeof(glob)); // expected-warning{{FALSE}} +} + } else { +clang_analyzer_eval(x == -1); // expected-warning{{TRUE}} + } +} + +size_t fread(void *, size_t, size_t, FILE *); +size_t fwrite(const void *restrict, size_t, size_t, FILE *restrict); +void test_fread_fwrite(FILE *fp, int *buf) { + size_t x = fwrite(buf, sizeof(int), 10, fp); + clang_analyzer_eval(x <= 10); // expected-warning{{TRUE}} + size_t y = fread(buf, sizeof(int), 10, fp); + clang_analyzer_eval(y <= 10); // expected-warning{{TRUE}} + size_t z = fwrite(buf, sizeof(int), y, fp); + // FIXME: should be TRUE once symbol-symbol constraint support is improved. + clang_analyzer_eval(z <= y); // expected-warning{{UNKNOWN}} +} + +ssize_t getline(char **, size_t *, FILE *); +void test_getline(FILE *fp) { + char *line = 0; + size_t n = 0; + ssize_t len; + while ((len = getline(&line, &n, fp)) != -1) { +clang_analyzer_eval(len == 0); // expected-warning{{FALSE}} + } +} + +int isascii(int); +void test_isascii(int x) { + clang_analyzer_eval(isascii(123)); // expected-warning{{TRUE}} + clang_analyzer_eval(isascii(-1)); // expected-warning{{FALSE}} + if (isascii(x)) { +clang_analyzer_eval(x < 128); // expected-warning{{TRUE}} +clang_analyzer_eval(x >= 0); // expected-warning{{TRUE}} + } else { +if (x > 42) + clang_analyzer_eval(x >= 128); // expected-warning{{TRUE}} +else + clang_analyzer_eval(x < 0); // expected-warning{{TRUE}} + } + glob = 1; + isascii('a'); + clang_analyzer_eval(glob); // expected-warning{{TRUE}} +} + +int islower(int); +void test_islower(int x) { + clang_analyzer_eval(islower('x')); // expected-warning{{TRUE}} + clang_analyzer_eval(islower('X')); // expected-warning{{FALSE}} + if (islower(x)) +clang_analyzer_eval(x < 'a'); // expected-warning{{FALSE}} +} + +int getchar(void); +void test_getchar() { + int x = getchar(); + if (x == EOF) +return; + clang_analyzer_eval(x < 0); // expected-warning{{FALSE}} + clang_analyzer_eval(x < 256); // expected-warning{{TRUE}} +} + +int isalpha(int); +void test_isalpha() { + clang_analyzer_eval(isalpha(']')); // expected-warning{{FALSE}} + clang_analyzer_eval(isalpha('Q')); // expected-warning{{TRUE}} + clang_analyzer_eval(isalpha(128)); // expected-warning{{UNKNOWN}} +} + +int isalnum(int); +void test_
Re: [PATCH] D20811: [analyzer] Model some library functions
NoQ added a comment. > Is it really a problem if the checker comments are part of the Doxygen > documentation? Of course not :) I've been mostly thinking about the benefits of the anonymous namespace itself (cleaner global scope, no name collisions, but even these benefits are extremely minor). https://reviews.llvm.org/D20811 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20811: [analyzer] Model some library functions
Alexander_Droste added a comment. > It has been originally written as a large set of files. If you feel strongly > about it, we could merge it into a single file. That makes sense to me. > @Alexander_Droste, what do you think? Hi, I would still strongly prefer to keep them in separate files if possible. One of the headers (`MPIFunctionClassifier.hpp`) also got moved to `include/clang/StaticAnalyzer/Checkers`, as it is needed by some MPI clang-tidy checks. Is it really a problem if the checker comments are part of the Doxygen documentation? Further, I think that the separation of concerns in form of distinct files might be valuable for people being new to the Clang Static Analyzer framework, as the grouping of functionality is visible on a higher level of abstraction. Regardless, I would of course accept if you prefer to merge the files into a single one, excluding the `MPIFunctionClassifier.hpp` header. https://reviews.llvm.org/D20811 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20811: [analyzer] Model some library functions
zaks.anna added inline comments. Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:206 @@ +205,3 @@ +: Call.getArgExpr(ArgNo)->getType().getCanonicalType(); + } + static QualType getArgType(const CallExpr *CE, ArgNoTy ArgNo) { Separate commit is fine. I'd provide both APIs in CallEvent. Comment at: test/Analysis/std-library-functions.c:4 @@ +3,3 @@ +void clang_analyzer_eval(int); + +int glob; Ok. https://reviews.llvm.org/D20811 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20811: [analyzer] Model some library functions
zaks.anna added a subscriber: Alexander_Droste. zaks.anna added a comment. > Even though there are some doxygen-style comments in the checkers, i’ve never > seen doxygen actually generate any docs for checker classes. > Are they useful for IDE quick-hints only? I think it's useful to have consistent documentation format. In https://reviews.llvm.org/D20811#497585, @NoQ wrote: > Answering myself: Hmm, so the only reason why MPI checker class appears in > doxygen > (http://clang.llvm.org/doxygen/classclang_1_1ento_1_1mpi_1_1MPIChecker.html) > is because this class is not in anonymous namespace (as far as i understand, > they needed to be multi-file for some reason). CheckerDocumentation says that > every checker must be wrapped in anonymous namespace, except > CheckerDocumentationChecker :) > > I don’t really see a good reason for the library functions checker to be > moved out of anonymous namespace or deserve a doxygen page - after all, it’s > all in one file, and the docs are right in front of the reader’s eyes anyway. > But maybe if this checker expands enough, we could expose its data structures > into public use, and then they'd be worth documenting :) It has been originally written as a large set of files. If you feel strongly about it, we could merge it into a single file. That makes sense to me. @Alexander_Droste, what do you think? https://reviews.llvm.org/D20811 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20811: [analyzer] Model some library functions
NoQ added inline comments. Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:509 @@ +508,3 @@ + //} + // } + //} dcoughlin wrote: > I disagree about compactness being valuable here. I think it is more > important to intrinsically document the spec. These will be written once and > read frequently. When they are written, they will copied from a previous > example -- probably by someone who is not familiar with the code or the spec > format. > > Another possibility (not sure if it is the right one here) is to use macro > tricks to define a simple DSL like Kulpreet did in the > LocalizationChecker.cpp. > These will be written once and read frequently. If only it was so :)) Hmm. What do you think of the following format? Macros mostly expand to empty or (argument), but it should be more readable than the `/*`...`*/` noise. ``` SPEC { FOR_FUNCTION("isalnum"), SPEC_DATA { ARGUMENT_TYPES { IntTy }, RETURN_TYPE(IntTy), INVALIDATION_APPROACH(EvalCallAsPure), BRANCHES { BRANCH { // Boils down to isupper() or islower() or isdigit() RANGE { ARG_NO(0), RANGE_KIND(WithinRange), SET { SEG('0', '9') U SEG('A', 'Z') U SEG('a', 'z') } }, RANGE { RET_VAL, RANGE_KIND(OutOfRange), SET { SEG(0, 0) } } }, BRANCH { // The locale-specific branch. RANGE { ARG_NO(0), RANGE_KIND(WithinRange), SET { SEG(128, 255) } } }, BRANCH { // Other. RANGE { ARG_NO(0), RANGE_KIND(OutOfRange), SET { SEG('0', '9') U SEG('A', 'Z') U SEG('a', 'z') U SEG(128, 255)} }, RANGE { RET_VAL, RANGE_KIND(WithinRange), SET { SEG(0, 0) } } } } } }, ``` https://reviews.llvm.org/D20811 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20811: [analyzer] Model some library functions
dcoughlin added inline comments. Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:509 @@ +508,3 @@ + //} + // } + //} I disagree about compactness being valuable here. I think it is more important to intrinsically document the spec. These will be written once and read frequently. When they are written, they will copied from a previous example -- probably by someone who is not familiar with the code or the spec format. Another possibility (not sure if it is the right one here) is to use macro tricks to define a simple DSL like Kulpreet did in the LocalizationChecker.cpp. https://reviews.llvm.org/D20811 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20811: [analyzer] Model some library functions
NoQ added a comment. Answering myself: Hmm, so the only reason why MPI checker class appears in doxygen (http://clang.llvm.org/doxygen/classclang_1_1ento_1_1mpi_1_1MPIChecker.html) is because this class is not in anonymous namespace (as far as i understand, they needed to be multi-file for some reason). CheckerDocumentation says that every checker must be wrapped in anonymous namespace, except CheckerDocumentationChecker :) I don’t really see a good reason for the library functions checker to be moved out of anonymous namespace or deserve a doxygen page - after all, it’s all in one file, and the docs are right in front of the reader’s eyes anyway. But maybe if this checker expands enough, we could expose its data structures into public use, and then they'd be worth documenting :) https://reviews.llvm.org/D20811 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20811: [analyzer] Model some library functions
NoQ added inline comments. Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:192 @@ +191,3 @@ + }; + + // The map of all functions supported by the checker. It is initialized Even though there are some doxygen-style comments in the checkers, i’ve never seen doxygen actually generate any docs for checker classes. Are they useful for IDE quick-hints only? https://reviews.llvm.org/D20811 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20811: [analyzer] Model some library functions
NoQ added inline comments. Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:191 @@ +190,3 @@ +// impossible to verify this precisely, but at least +// this check avoids potential crashes. +bool matchesCall(const CallExpr *CE) const; zaks.anna wrote: > Do we need to talk about crashes when describing what this does? > Also, please, use oxygen throughout. Added more comments below. Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:205 @@ +204,3 @@ + } + static QualType getArgType(const CallEvent &Call, ArgNoTy ArgNo) { +return ArgNo == Ret ? Call.getResultType().getCanonicalType() zaks.anna wrote: > We could either provide these APIs in CallEvent or at least have variants > that return canonical types. Maybe we already do some of that? Maybe a separate commit? There are quite a few checkers from which the `.getArgExpr(N)->getType()` pattern could be de-duplicated, but i don't think many of them are interested in canonical types. Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:445 @@ +444,3 @@ + + const FunctionSpecTy &Spec = FSMI->second; + if (!Spec.matchesCall(CE)) zaks.anna wrote: > When can this go wrong? Are we checking if there is a mismatch between the > function declaration and the call expression? It is strange that > findFunctionSpec takes both of those. Couldn't you always get FunctionDecl > out of CallExpr? Callee decl is path-sensitive information because functions can be passed around by pointers, as mentioned in the comment at the beginning of the function. Expanded the comment, added a test. Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:508 @@ +507,3 @@ + FunctionSpecMap = { +// The isascii() family of functions. +{ "isalnum", zaks.anna wrote: > you could also use /*NameOfTheField*/ convention to name the arguments if > that would make this map more readable. I think compactness is worth it here, and specs are pretty easy to remember, imho. Added an example to the first spec to see how it looks and make it easier for the reader to adapt and remember, but i'm not quite convinced that verbosity is worth it here. Comment at: test/Analysis/std-library-functions.c:3 @@ +2,3 @@ + +void clang_analyzer_eval(int); +int glob; zaks.anna wrote: > Why are you not testing all of the functions? I was too bored to generate tests for all branches of all functions (and if i auto-generate such tests, it defeats the purpose), but i added some of the more creative tests and covered at least some branches of all functions with them. https://reviews.llvm.org/D20811 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20811: [analyzer] Model some library functions
NoQ updated this revision to Diff 65369. NoQ marked 4 inline comments as done. https://reviews.llvm.org/D20811 Files: include/clang/StaticAnalyzer/Checkers/Checkers.td lib/StaticAnalyzer/Checkers/CMakeLists.txt lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp test/Analysis/std-library-functions.c test/Analysis/std-library-functions.cpp Index: test/Analysis/std-library-functions.cpp === --- /dev/null +++ test/Analysis/std-library-functions.cpp @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -analyze -analyzer-checker=unix.StdLibraryFunctions,debug.ExprInspection -verify %s + +// Test that we don't model functions with broken prototypes. +// Because they probably work differently as well. +// +// This test lives in a separate file because we wanted to test all functions +// in the .c file, however in C there are no overloads. + +void clang_analyzer_eval(bool); +bool isalpha(char); + +void test() { + clang_analyzer_eval(isalpha('A')); // no-crash // expected-warning{{UNKNOWN}} +} Index: test/Analysis/std-library-functions.c === --- /dev/null +++ test/Analysis/std-library-functions.c @@ -0,0 +1,184 @@ +// RUN: %clang_cc1 -analyze -analyzer-checker=unix.StdLibraryFunctions,debug.ExprInspection -verify %s + +void clang_analyzer_eval(int); + +int glob; + +typedef struct FILE FILE; +#define EOF -1 + +int getc(FILE *); +void test_getc(FILE *fp) { + int x; + while ((x = getc(fp)) != EOF) { +clang_analyzer_eval(x > 255); // expected-warning{{FALSE}} +clang_analyzer_eval(x >= 0); // expected-warning{{TRUE}} + } +} + +int fgetc(FILE *); +void test_fgets(FILE *fp) { + clang_analyzer_eval(fgetc(fp) < 256); // expected-warning{{TRUE}} + clang_analyzer_eval(fgetc(fp) >= 0); // expected-warning{{UNKNOWN}} +} + + +typedef unsigned long size_t; +typedef signed long ssize_t; +ssize_t read(int, void *, size_t); +ssize_t write(int, const void *, size_t); +void test_read_write(int fd, char *buf) { + glob = 1; + ssize_t x = write(fd, buf, 10); + clang_analyzer_eval(glob); // expected-warning{{UNKNOWN}} + if (x >= 0) { +clang_analyzer_eval(x <= 10); // expected-warning{{TRUE}} +ssize_t y = read(fd, &glob, sizeof(glob)); +if (y >= 0) { + clang_analyzer_eval(y <= sizeof(glob)); // expected-warning{{TRUE}} +} else { + // -1 overflows on promotion! + clang_analyzer_eval(y <= sizeof(glob)); // expected-warning{{FALSE}} +} + } else { +clang_analyzer_eval(x == -1); // expected-warning{{TRUE}} + } +} + +size_t fread(void *, size_t, size_t, FILE *); +size_t fwrite(const void *restrict, size_t, size_t, FILE *restrict); +void test_fread_fwrite(FILE *fp, int *buf) { + size_t x = fwrite(buf, sizeof(int), 10, fp); + clang_analyzer_eval(x <= 10); // expected-warning{{TRUE}} + size_t y = fread(buf, sizeof(int), 10, fp); + clang_analyzer_eval(y <= 10); // expected-warning{{TRUE}} + size_t z = fwrite(buf, sizeof(int), y, fp); + // FIXME: should be TRUE once symbol-symbol constraint support is improved. + clang_analyzer_eval(z <= y); // expected-warning{{UNKNOWN}} +} + +ssize_t getline(char **, size_t *, FILE *); +void test_getline(FILE *fp) { + char *line = 0; + size_t n = 0; + ssize_t len; + while ((len = getline(&line, &n, fp)) != -1) { +clang_analyzer_eval(len == 0); // expected-warning{{FALSE}} + } +} + +int isascii(int); +void test_isascii(int x) { + clang_analyzer_eval(isascii(123)); // expected-warning{{TRUE}} + clang_analyzer_eval(isascii(-1)); // expected-warning{{FALSE}} + if (isascii(x)) { +clang_analyzer_eval(x < 128); // expected-warning{{TRUE}} +clang_analyzer_eval(x >= 0); // expected-warning{{TRUE}} + } else { +if (x > 42) + clang_analyzer_eval(x >= 128); // expected-warning{{TRUE}} +else + clang_analyzer_eval(x < 0); // expected-warning{{TRUE}} + } + glob = 1; + isascii('a'); + clang_analyzer_eval(glob); // expected-warning{{TRUE}} +} + +int islower(int); +void test_islower(int x) { + clang_analyzer_eval(islower('x')); // expected-warning{{TRUE}} + clang_analyzer_eval(islower('X')); // expected-warning{{FALSE}} + if (islower(x)) +clang_analyzer_eval(x < 'a'); // expected-warning{{FALSE}} +} + +int getchar(void); +void test_getchar() { + int x = getchar(); + if (x == EOF) +return; + clang_analyzer_eval(x < 0); // expected-warning{{FALSE}} + clang_analyzer_eval(x < 256); // expected-warning{{TRUE}} +} + +int isalpha(int); +void test_isalpha() { + clang_analyzer_eval(isalpha(']')); // expected-warning{{FALSE}} + clang_analyzer_eval(isalpha('Q')); // expected-warning{{TRUE}} + clang_analyzer_eval(isalpha(128)); // expected-warning{{UNKNOWN}} +} + +int isalnum(int); +void test_alnum() { + clang_analyzer_eval(isalnum('1')); // expected-warning{{TRUE}} + clang_analyzer_eval(isalnum(')')); // expected-warning{{FALSE}} +} + +int isblank(int); +void test_isblank() { + clang_ana
Re: [PATCH] D20811: [analyzer] Model some library functions
zaks.anna added inline comments. Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:191 @@ +190,3 @@ +// impossible to verify this precisely, but at least +// this check avoids potential crashes. +bool matchesCall(const CallExpr *CE) const; Do we need to talk about crashes when describing what this does? Also, please, use oxygen throughout. Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:205 @@ +204,3 @@ + } + static QualType getArgType(const CallEvent &Call, ArgNoTy ArgNo) { +return ArgNo == Ret ? Call.getResultType().getCanonicalType() We could either provide these APIs in CallEvent or at least have variants that return canonical types. Maybe we already do some of that? Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:445 @@ +444,3 @@ + + const FunctionSpecTy &Spec = FSMI->second; + if (!Spec.matchesCall(CE)) When can this go wrong? Are we checking if there is a mismatch between the function declaration and the call expression? It is strange that findFunctionSpec takes both of those. Couldn't you always get FunctionDecl out of CallExpr? Comment at: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp:508 @@ +507,3 @@ + FunctionSpecMap = { +// The isascii() family of functions. +{ "isalnum", you could also use /*NameOfTheField*/ convention to name the arguments if that would make this map more readable. Comment at: test/Analysis/std-library-functions.c:3 @@ +2,3 @@ + +void clang_analyzer_eval(int); +int glob; Why are you not testing all of the functions? https://reviews.llvm.org/D20811 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20811: [analyzer] Model some library functions
NoQ updated this revision to Diff 65248. NoQ marked 11 inline comments as done. NoQ added a comment. Renamed the checker as **xazax.hun** suggested, added a lot more comments, done with inline comments :) https://reviews.llvm.org/D20811 Files: include/clang/StaticAnalyzer/Checkers/Checkers.td lib/StaticAnalyzer/Checkers/CMakeLists.txt lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp test/Analysis/std-library-functions.c Index: test/Analysis/std-library-functions.c === --- test/Analysis/std-library-functions.c +++ test/Analysis/std-library-functions.c @@ -0,0 +1,109 @@ +// RUN: %clang_cc1 -analyze -analyzer-checker=unix.StdLibraryFunctions,debug.ExprInspection -verify %s + +void clang_analyzer_eval(int); +int glob; + +typedef struct FILE FILE; +int getc(FILE *); +#define EOF -1 +void test_getc(FILE *fp) { + int x; + while ((x = getc(fp)) != EOF) { +clang_analyzer_eval(x > 255); // expected-warning{{FALSE}} +clang_analyzer_eval(x >= 0); // expected-warning{{TRUE}} + } +} + +typedef unsigned long size_t; +typedef signed long ssize_t; +ssize_t write(int, const void *, size_t); +void test_write(int fd, char *buf) { + glob = 1; + ssize_t x = write(fd, buf, 10); + clang_analyzer_eval(glob); // expected-warning{{UNKNOWN}} + if (x >= 0) +clang_analyzer_eval(x <= 10); // expected-warning{{TRUE}} + else +clang_analyzer_eval(x == -1); // expected-warning{{TRUE}} +} + +size_t fread(void *, size_t, size_t, FILE *); +void test_fread(FILE *fp, int *buf) { + size_t x = fread(buf, sizeof(int), 10, fp); + clang_analyzer_eval(x <= 10); // expected-warning{{TRUE}} +} + +ssize_t getline(char **, size_t *, FILE *); +void test_getline(FILE *fp) { + char *line = 0; + size_t n = 0; + ssize_t len; + while ((len = getline(&line, &n, fp)) != -1) { +clang_analyzer_eval(len == 0); // expected-warning{{FALSE}} + } +} + +int isascii(int); +void test_isascii(int x) { + clang_analyzer_eval(isascii(123)); // expected-warning{{TRUE}} + clang_analyzer_eval(isascii(-1)); // expected-warning{{FALSE}} + if (isascii(x)) { +clang_analyzer_eval(x < 128); // expected-warning{{TRUE}} +clang_analyzer_eval(x >= 0); // expected-warning{{TRUE}} + } else { +if (x > 42) + clang_analyzer_eval(x >= 128); // expected-warning{{TRUE}} +else + clang_analyzer_eval(x < 0); // expected-warning{{TRUE}} + } + glob = 1; + isascii('a'); + clang_analyzer_eval(glob); // expected-warning{{TRUE}} +} + +int islower(int); +void test_islower(int x) { + clang_analyzer_eval(islower('x')); // expected-warning{{TRUE}} + clang_analyzer_eval(islower('X')); // expected-warning{{FALSE}} + if (islower(x)) +clang_analyzer_eval(x < 'a'); // expected-warning{{FALSE}} +} + +int getchar(void); +void test_getchar() { + int x = getchar(); + if (x == EOF) +return; + clang_analyzer_eval(x < 0); // expected-warning{{FALSE}} + clang_analyzer_eval(x < 256); // expected-warning{{TRUE}} +} + +int isalpha(int); +void test_isalpha() { + clang_analyzer_eval(isalpha(']')); // expected-warning{{FALSE}} + clang_analyzer_eval(isalpha('Q')); // expected-warning{{TRUE}} + clang_analyzer_eval(isalpha(128)); // expected-warning{{UNKNOWN}} +} + +int isalnum(int); +void test_alnum() { + clang_analyzer_eval(isalnum('1')); // expected-warning{{TRUE}} + clang_analyzer_eval(isalnum(')')); // expected-warning{{FALSE}} +} + +int isblank(int); +void test_isblank() { + clang_analyzer_eval(isblank('\t')); // expected-warning{{TRUE}} + clang_analyzer_eval(isblank(' ')); // expected-warning{{TRUE}} + clang_analyzer_eval(isblank('\n')); // expected-warning{{FALSE}} +} + +int ispunct(int); +void test_ispunct(int x) { + clang_analyzer_eval(ispunct(' ')); // expected-warning{{FALSE}} + clang_analyzer_eval(ispunct(-1)); // expected-warning{{FALSE}} + clang_analyzer_eval(ispunct('#')); // expected-warning{{TRUE}} + clang_analyzer_eval(ispunct('_')); // expected-warning{{TRUE}} + if (ispunct(x)) +clang_analyzer_eval(x < 127); // expected-warning{{TRUE}} +} Index: lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp === --- lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp +++ lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp @@ -0,0 +1,831 @@ +//=== StdLibraryFunctionsChecker.cpp - Model standard functions -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===--===// +// +// This checker improves modeling of a few simple library functions. +// It does not throw warnings. +// +// This checker provides a specification format - `FunctionSpecTy' - and +// contains descriptions of some library functions in this format. Each +// specification con
Re: [PATCH] D20811: [analyzer] Model some library functions
zaks.anna added a comment. Thanks for the patch! Here are the comments, most of which are nits. Could you add the high level description of what we are doing somewhere or maybe just describe the meaning of FunctionSpec since it encodes how functions are modeled. Also, we should explain why we are not using BodyFarm somewhere in the comment. Comment at: lib/StaticAnalyzer/Checkers/LibraryFunctionsChecker.cpp:10 @@ +9,3 @@ +// +// This checker improves modeling of a few simple library functions. +// It does not throw warnings. Please, list the functions. Comment at: lib/StaticAnalyzer/Checkers/LibraryFunctionsChecker.cpp:27 @@ +26,3 @@ + static const uint32_t Ret = std::numeric_limits::max(); + enum ValueRangeKindTy { Outside, Inside, ComparesToArgument }; + enum InvalidationKindTy { Normal, Pure }; Naming looks odd: maybe "OutOfRange" and "WithinRange"? Comment at: lib/StaticAnalyzer/Checkers/LibraryFunctionsChecker.cpp:33 @@ +32,3 @@ + private: +// ArgNo in CallExpr and CallEvent is defined is Unsigned, but +// obviously uint32_t should be enough for most practical purposes. nit: "is Unsigned" -> "as Unsigned" Please, use a typedef for the type as you are using it below in getArgType. Comment at: lib/StaticAnalyzer/Checkers/LibraryFunctionsChecker.cpp:83 @@ +82,3 @@ + static QualType getArgType(const FunctionSpec &Spec, uint32_t ArgNo) { +return ArgNo == Ret ? Spec.RetType : Spec.ArgTypes[ArgNo]; + } Are the types in FunctionSpec already canonical? If so, please, add a comment. Comment at: lib/StaticAnalyzer/Checkers/LibraryFunctionsChecker.cpp:178 @@ +177,3 @@ +if (auto N = V.getAs()) { + const IntRangeVector &R = VR.getRanges(); + size_t E = R.size(); nit: If you could factor these out into separate helper functions, it would be easier to read. Lot's of nesting.. Comment at: lib/StaticAnalyzer/Checkers/LibraryFunctionsChecker.cpp:219 @@ +218,3 @@ + +if (NewState) + C.addTransition(NewState); What happens when NewState == State? I guess addTransition would just not do anything, but maybe we should just make the intent explicit and not call it at all. Comment at: lib/StaticAnalyzer/Checkers/LibraryFunctionsChecker.cpp:245 @@ +244,3 @@ + } + case Normal: +return false; Replace "Normal" with a more descriptive name. Comment at: lib/StaticAnalyzer/Checkers/LibraryFunctionsChecker.cpp:273 @@ +272,3 @@ + + // Verify that function signature matches the spec in advance, + // so that we didn't have to roll back if anything goes wrong. Looks like you might want to have the checking code as a member on FunctionSpec. Comment at: lib/StaticAnalyzer/Checkers/LibraryFunctionsChecker.cpp:322 @@ +321,3 @@ + + // NOTE: The signature needs to have the correct number of arguments. + // NOTE: However, insert Irrelevant when the type is insignificant. Let's explain what we are doing next. For example, "Let's initialize the FunctionSpec for the functions we are modeling." Remove "NOTE:" Comment at: lib/StaticAnalyzer/Checkers/LibraryFunctionsChecker.cpp:326 @@ +325,3 @@ + // is completely unknown, omit it from the respective range set. + // NOTE: Upon comparing to another argument, the other argument is casted + // to the current argument's type. This avoids proper promotion but not clear where this is used or if it is used in the initialization at all. Comment at: lib/StaticAnalyzer/Checkers/LibraryFunctionsChecker.cpp:337 @@ +336,3 @@ + // { ranges list: + //{ argument index, inside or outside, {{from, to}, ...} }, + //{ argument index, compares to argument, {{how, which}} }, Is every item in the range set used to bifurcate the state? http://reviews.llvm.org/D20811 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20811: [analyzer] Model some library functions
NoQ added a comment. Yeah, good point, the "Std" part should definitely appear in the name, not sure about the "C" thing, as we could probably expand this checker to model some simple C++ functions as well (and then we'd make a separate checker section to move from unix. to cplusplus. or something like that, not sure maybe we'd need to reside in core. anyway). http://reviews.llvm.org/D20811 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D20811: [analyzer] Model some library functions
xazax.hun added a comment. It is great to model more widely used functions! However I think the LibraryFunctionsChecker name might be a bit to broad, wouldn't something like StdCLibraryFunctions be more informative? http://reviews.llvm.org/D20811 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D20811: [analyzer] Model some library functions
NoQ created this revision. NoQ added reviewers: zaks.anna, dcoughlin. NoQ added a subscriber: cfe-commits. Herald added a subscriber: aemerson. I've put together a simple checker that throws no warnings, but models some library functions, which has already helped us to suppress some false positives in other checkers in our runs. For pure functions, i chose the old `evalCall()` approach instead of the body-farm approach because i wanted to produce less state splits. For example, this checker produce a single exploded graph branch for `ispunct()`'s non-zero branch, when its argument is in range ['!', '/'] U [':', '@'] U ['[', '`'] U ['{', '~'] - i'm not sure if there's a way to write this out with if's and produce less than 4 branches. (Do we have any plans on merging branches more aggressively during analysis?) Because these functions are pure, we'd hardly ever want to catch them with`evalCall()` again in another checker. Additionally, this checker's brace-initializers for function specifications are quite short - of course they're limited to very simple cases - the list of these cases can be expanded though. The checker doesn't seem to be noticeably degrading performance. Here's an example of a false positve squashed: {F203} Here `line` is taken to be "", the `line++` statement is executed at least once (by looking at the exploded graph; there's lack of "entering loop body" diagnostic piece because loop condition has complicated CFG, which is why it fails to highlight - a separate issue), and the analyzer fails to realize that isspace('\0') is false. http://reviews.llvm.org/D20811 Files: include/clang/StaticAnalyzer/Checkers/Checkers.td lib/StaticAnalyzer/Checkers/CMakeLists.txt lib/StaticAnalyzer/Checkers/LibraryFunctionsChecker.cpp test/Analysis/library-functions.c Index: test/Analysis/library-functions.c === --- /dev/null +++ test/Analysis/library-functions.c @@ -0,0 +1,109 @@ +// RUN: %clang_cc1 -analyze -analyzer-checker=unix.LibraryFunctions,debug.ExprInspection -verify %s + +void clang_analyzer_eval(int); +int glob; + +typedef struct FILE FILE; +int getc(FILE *); +#define EOF -1 +void test_getc(FILE *fp) { + int x; + while ((x = getc(fp)) != EOF) { +clang_analyzer_eval(x > 255); // expected-warning{{FALSE}} +clang_analyzer_eval(x >= 0); // expected-warning{{TRUE}} + } +} + +typedef unsigned long size_t; +typedef signed long ssize_t; +ssize_t write(int, const void *, size_t); +void test_write(int fd, char *buf) { + glob = 1; + ssize_t x = write(fd, buf, 10); + clang_analyzer_eval(glob); // expected-warning{{UNKNOWN}} + if (x >= 0) +clang_analyzer_eval(x <= 10); // expected-warning{{TRUE}} + else +clang_analyzer_eval(x == -1); // expected-warning{{TRUE}} +} + +size_t fread(void *, size_t, size_t, FILE *); +void test_fread(FILE *fp, int *buf) { + size_t x = fread(buf, sizeof(int), 10, fp); + clang_analyzer_eval(x <= 10); // expected-warning{{TRUE}} +} + +ssize_t getline(char **, size_t *, FILE *); +void test_getline(FILE *fp) { + char *line = 0; + size_t n = 0; + ssize_t len; + while ((len = getline(&line, &n, fp)) != -1) { +clang_analyzer_eval(len == 0); // expected-warning{{FALSE}} + } +} + +int isascii(int); +void test_isascii(int x) { + clang_analyzer_eval(isascii(123)); // expected-warning{{TRUE}} + clang_analyzer_eval(isascii(-1)); // expected-warning{{FALSE}} + if (isascii(x)) { +clang_analyzer_eval(x < 128); // expected-warning{{TRUE}} +clang_analyzer_eval(x >= 0); // expected-warning{{TRUE}} + } else { +if (x > 42) + clang_analyzer_eval(x >= 128); // expected-warning{{TRUE}} +else + clang_analyzer_eval(x < 0); // expected-warning{{TRUE}} + } + glob = 1; + isascii('a'); + clang_analyzer_eval(glob); // expected-warning{{TRUE}} +} + +int islower(int); +void test_islower(int x) { + clang_analyzer_eval(islower('x')); // expected-warning{{TRUE}} + clang_analyzer_eval(islower('X')); // expected-warning{{FALSE}} + if (islower(x)) +clang_analyzer_eval(x < 'a'); // expected-warning{{FALSE}} +} + +int getchar(void); +void test_getchar() { + int x = getchar(); + if (x == EOF) +return; + clang_analyzer_eval(x < 0); // expected-warning{{FALSE}} + clang_analyzer_eval(x < 256); // expected-warning{{TRUE}} +} + +int isalpha(int); +void test_isalpha() { + clang_analyzer_eval(isalpha(']')); // expected-warning{{FALSE}} + clang_analyzer_eval(isalpha('Q')); // expected-warning{{TRUE}} + clang_analyzer_eval(isalpha(128)); // expected-warning{{UNKNOWN}} +} + +int isalnum(int); +void test_alnum() { + clang_analyzer_eval(isalnum('1')); // expected-warning{{TRUE}} + clang_analyzer_eval(isalnum(')')); // expected-warning{{FALSE}} +} + +int isblank(int); +void test_isblank() { + clang_analyzer_eval(isblank('\t')); // expected-warning{{TRUE}} + clang_analyzer_eval(isblank(' ')); // expected-warning{{TRUE}} + clang_analyzer_eval(isblan