[PATCH] D38797: [analyzer] CStringChecker: pr34460: Admit that some casts are hard to model.

2017-10-13 Thread Phabricator via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL315742: [analyzer] CStringChecker: pr34460: Avoid a crash 
when a cast is not modeled. (authored by dergachev).

Changed prior to commit:
  https://reviews.llvm.org/D38797?vs=118597=118964#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D38797

Files:
  cfe/trunk/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
  cfe/trunk/test/Analysis/bstring.cpp
  cfe/trunk/test/Analysis/casts.c

Index: cfe/trunk/test/Analysis/casts.c
===
--- cfe/trunk/test/Analysis/casts.c
+++ cfe/trunk/test/Analysis/casts.c
@@ -123,3 +123,29 @@
   int x = (int) p;
   clang_analyzer_eval(++x < 10); // no-crash // expected-warning{{UNKNOWN}}
 }
+
+void multiDimensionalArrayPointerCasts() {
+  static int x[10][10];
+  int *y1 = &(x[3][5]);
+  char *z = ((char *) y1) + 2;
+  int *y2 = (int *)(z - 2);
+  int *y3 = ((int *)x) + 35; // This is offset for [3][5].
+
+  clang_analyzer_eval(y1 == y2); // expected-warning{{TRUE}}
+
+  // FIXME: should be FALSE (i.e. equal pointers).
+  clang_analyzer_eval(y1 - y2); // expected-warning{{UNKNOWN}}
+  // FIXME: should be TRUE (i.e. same symbol).
+  clang_analyzer_eval(*y1 == *y2); // expected-warning{{UNKNOWN}}
+
+  clang_analyzer_eval(*((char *)y1) == *((char *) y2)); // expected-warning{{TRUE}}
+
+  clang_analyzer_eval(y1 == y3); // expected-warning{{TRUE}}
+
+  // FIXME: should be FALSE (i.e. equal pointers).
+  clang_analyzer_eval(y1 - y3); // expected-warning{{UNKNOWN}}
+  // FIXME: should be TRUE (i.e. same symbol).
+  clang_analyzer_eval(*y1 == *y3); // expected-warning{{UNKNOWN}}
+
+  clang_analyzer_eval(*((char *)y1) == *((char *) y3)); // expected-warning{{TRUE}}
+}
Index: cfe/trunk/test/Analysis/bstring.cpp
===
--- cfe/trunk/test/Analysis/bstring.cpp
+++ cfe/trunk/test/Analysis/bstring.cpp
@@ -1,8 +1,35 @@
 // RUN: %clang_analyze_cc1 -analyzer-checker=core,unix.cstring,alpha.unix.cstring,debug.ExprInspection -analyzer-store=region -verify %s
+// RUN: %clang_analyze_cc1 -DUSE_BUILTINS -analyzer-checker=core,unix.cstring,alpha.unix.cstring,debug.ExprInspection -analyzer-store=region -verify %s
+// RUN: %clang_analyze_cc1 -DVARIANT -analyzer-checker=core,unix.cstring,alpha.unix.cstring,debug.ExprInspection -analyzer-store=region -verify %s
+// RUN: %clang_analyze_cc1 -DUSE_BUILTINS -DVARIANT -analyzer-checker=core,unix.cstring,alpha.unix.cstring,debug.ExprInspection -analyzer-store=region -verify %s
 
 #include "Inputs/system-header-simulator-cxx.h"
 #include "Inputs/system-header-simulator-for-malloc.h"
 
+// This provides us with four possible mempcpy() definitions.
+// See also comments in bstring.c.
+
+#ifdef USE_BUILTINS
+#define BUILTIN(f) __builtin_##f
+#else /* USE_BUILTINS */
+#define BUILTIN(f) f
+#endif /* USE_BUILTINS */
+
+#ifdef VARIANT
+
+#define __mempcpy_chk BUILTIN(__mempcpy_chk)
+void *__mempcpy_chk(void *__restrict__ s1, const void *__restrict__ s2,
+size_t n, size_t destlen);
+
+#define mempcpy(a,b,c) __mempcpy_chk(a,b,c,(size_t)-1)
+
+#else /* VARIANT */
+
+#define mempcpy BUILTIN(mempcpy)
+void *mempcpy(void *__restrict__ s1, const void *__restrict__ s2, size_t n);
+
+#endif /* VARIANT */
+
 void clang_analyzer_eval(int);
 
 int *testStdCopyInvalidatesBuffer(std::vector v) {
@@ -36,3 +63,17 @@
 
   return buf;
 }
+
+namespace pr34460 {
+short a;
+class b {
+  int c;
+  long g;
+  void d() {
+int e = c;
+f += e;
+mempcpy(f, , g);
+  }
+  unsigned *f;
+};
+}
Index: cfe/trunk/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
===
--- cfe/trunk/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ cfe/trunk/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -1050,31 +1050,22 @@
 // If this is mempcpy, get the byte after the last byte copied and
 // bind the expr.
 if (IsMempcpy) {
-  loc::MemRegionVal destRegVal = destVal.castAs();
-
-  // Get the length to copy.
-  if (Optional lenValNonLoc = sizeVal.getAs()) {
-// Get the byte after the last byte copied.
-SValBuilder  = C.getSValBuilder();
-ASTContext  = SvalBuilder.getContext();
-QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
-loc::MemRegionVal DestRegCharVal = SvalBuilder.evalCast(destRegVal,
-  CharPtrTy, Dest->getType()).castAs();
-SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add,
-  DestRegCharVal,
-  *lenValNonLoc,
-  Dest->getType());
-
-// The byte after the last byte copied is the return value.
-state = state->BindExpr(CE, LCtx, lastElement);
-  } else {
-// If we don't know how much we copied, we can at least
-   

[PATCH] D38797: [analyzer] CStringChecker: pr34460: Admit that some casts are hard to model.

2017-10-11 Thread Devin Coughlin via Phabricator via cfe-commits
dcoughlin accepted this revision.
dcoughlin added a comment.
This revision is now accepted and ready to land.

This looks good to me.


https://reviews.llvm.org/D38797



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38797: [analyzer] CStringChecker: pr34460: Admit that some casts are hard to model.

2017-10-11 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ added inline comments.



Comment at: test/Analysis/casts.c:134-139
+  clang_analyzer_eval(y1 == y2); // expected-warning{{TRUE}}
+
+  // FIXME: should be FALSE (i.e. equal pointers).
+  clang_analyzer_eval(y1 - y2); // expected-warning{{UNKNOWN}}
+  // FIXME: should be TRUE (i.e. same symbol).
+  clang_analyzer_eval(*y1 == *y2); // expected-warning{{UNKNOWN}}

Here we have `y1` pointing to `element{element{x, 3}, 5}` vs. both `y2` and 
`y3` pointing to `element{x, 35}`. While `y3` is inevitably looking like that, 
`y2` could have been same as `y1` if we didn't unwrap multi-dimensional array 
elements.


https://reviews.llvm.org/D38797



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D38797: [analyzer] CStringChecker: pr34460: Admit that some casts are hard to model.

2017-10-11 Thread Artem Dergachev via Phabricator via cfe-commits
NoQ created this revision.
Herald added a subscriber: szepet.

In https://bugs.llvm.org/show_bug.cgi?id=34460 CStringChecker tries to 
`evalCast()` a memory region from `void *` to `char *` for the purposes of 
modeling `mempcpy()`. The memory region turned out to be an element region of 
type `unsigned int` with a symbolic index. For such regions, even such trivial 
casts have turned out to be unsupported (modeled as `UnknownVal`), and the 
checker crashed upon asserting them to be supported.

I tried to get such trivial cast working for a few days and ended up believing 
that it not worth the effort because of:

1. How we represent casts in the SVal hierarchy.
  - In our case, the region is an ElementRegion with element type `unsigned 
int`, which //essentially// represents a pointer of type `unsigned int *`, not 
`void *`. It means that the cast is definitely not a no-op; the region needs to 
be modified.
  - Now, if the offset was concrete, we already try to replace ElementRegion 
with another ElementRegion of the other type, and recompute element index 
accordingly. It is only possible if byte offset of the region is divisible by 
size of the target type (in our case it is because the target type is `char`). 
If it's not divisible, we make two ElementRegions on top of each other, first 
with element-type `char` to represent a raw offset, other of the target type to 
represent the cast. It means that for symbolic index case, we need to account 
for being able to receive such two-element-regions construct in the first 
place, and then investigate divisibility of a linear expression of form `a*N + 
b`, where `N` is concrete but `a` and `b` are possibly symbolic, to  a concrete 
type size, which our range constraint manager would never handle.
  - In fact, now we not only unpack the top one or two ElementRegions while 
computing the offset (that needs to be divisible), but also we unpack all other 
top-level element regions (i.e. if we look into a multi-dimensional array; see 
`ElementRegion::getAsArrayOffset()`). This was never tested; i added relevant 
tests into `casts.c` in this patch to demonstrate the problems caused by that 
(which are unrelated to the rest of the patch). I believe that unpacking 
multi-dimensional arrays in this way is not ideal; however, i also added tests 
that would fail if this behavior is disabled. This needs much more work. Of 
course, this work would be even harder if we intend to support symbolic offsets.
2. How we split the responsibilities between entities. When trying to implement 
the plan described in 1., i ended up moving a lot of code around and passing 
ProgramState through dozens of APIs. We need the state to perform binary 
operations on `SVal` objects through `SValBuilder`. Originally, `castRegion()` 
resides in `RegionStore` which is not intended to access the program state. It 
is trivial to move `castRegion` to `SimpleSValBuilder`, but passing `State` 
into it results in a huge cascade of changes in `SValBuilder` method arguments. 
I have given up when i had to pass `State` to 
`SValBuilder::getConstantVal(const Expr *);`, which seemed ridiculous (it's 
constant, why does it depend on the state?). Introducing all this complexity 
only to compute an `SVal` that nobody would be able to understand was not worth 
the effort, in my opinion.

Hence the quick fix. Code slightly simplified to use a more high-level API.


https://reviews.llvm.org/D38797

Files:
  lib/StaticAnalyzer/Checkers/CStringChecker.cpp
  test/Analysis/bstring.cpp
  test/Analysis/casts.c

Index: test/Analysis/casts.c
===
--- test/Analysis/casts.c
+++ test/Analysis/casts.c
@@ -123,3 +123,29 @@
   int x = (int) p;
   clang_analyzer_eval(++x < 10); // no-crash // expected-warning{{UNKNOWN}}
 }
+
+void multiDimensionalArrayPointerCasts() {
+  static int x[10][10];
+  int *y1 = &(x[3][5]);
+  char *z = ((char *) y1) + 2;
+  int *y2 = (int *)(z - 2);
+  int *y3 = ((int *)x) + 35; // This is offset for [3][5].
+
+  clang_analyzer_eval(y1 == y2); // expected-warning{{TRUE}}
+
+  // FIXME: should be FALSE (i.e. equal pointers).
+  clang_analyzer_eval(y1 - y2); // expected-warning{{UNKNOWN}}
+  // FIXME: should be TRUE (i.e. same symbol).
+  clang_analyzer_eval(*y1 == *y2); // expected-warning{{UNKNOWN}}
+
+  clang_analyzer_eval(*((char *)y1) == *((char *) y2)); // expected-warning{{TRUE}}
+
+  clang_analyzer_eval(y1 == y3); // expected-warning{{TRUE}}
+
+  // FIXME: should be FALSE (i.e. equal pointers).
+  clang_analyzer_eval(y1 - y3); // expected-warning{{UNKNOWN}}
+  // FIXME: should be TRUE (i.e. same symbol).
+  clang_analyzer_eval(*y1 == *y3); // expected-warning{{UNKNOWN}}
+
+  clang_analyzer_eval(*((char *)y1) == *((char *) y3)); // expected-warning{{TRUE}}
+}
Index: test/Analysis/bstring.cpp
===
--- test/Analysis/bstring.cpp
+++ test/Analysis/bstring.cpp
@@ -1,8 +1,35