steakhal updated this revision to Diff 286318.
steakhal marked 4 inline comments as done.
steakhal retitled this revision from "[analyzer][RFC] Handle pointer difference 
of ElementRegion and SymbolicRegion" to "[analyzer] Handle pointer difference 
of ElementRegion and SymbolicRegion".
steakhal edited the summary of this revision.
steakhal added a comment.

- Refined documentation comments as noted.
- Added tests.
- Removed the complicated `ByteOffsetOfElement` lambda.
- Rename revision.

---

Before this patch, only these reported `Unknown` instead of the currently 
expected value.
Besides that all tests passed as-is on master:

  clang_analyzer_dump_int(p - p0);       // expected-warning {{0 S32b}}
  clang_analyzer_dump_int(p - p1);       // expected-warning {{-1 S32b}}
  clang_analyzer_dump_int(p - pn);       // expected-warning-re {{0 - 
(reg_${{[0-9]+}}<int n>)}}
  clang_analyzer_dump_int((p + 1) - p);  // expected-warning {{1 S32b}}
  
  // Swapped operands:
  clang_analyzer_dump_int(p0 - p);       // expected-warning {{0 S32b}}
  clang_analyzer_dump_int(p1 - p);       // expected-warning {{1 S32b}}
  clang_analyzer_dump_int(pn - p);       // expected-warning-re 
{{reg_${{[0-9]+}}<int n>}}
  clang_analyzer_dump_int(p - (p + 1));  // expected-warning {{-1 S32b}}



---

Further notes:
Element{X, n, Ty1} and Element{X, m, Ty2} should compare equal if and only if 
the `n * sizeof(Ty1)` equals to `n * sizeof(Ty2)`.
However, previously it did not take the size of the types into account (there 
is the corresponding FIXIT).
I'm not fixing this either for now.

The analyzer returns `Unknown` for this call:
clang_analyzer_dump_int((p + 1) - q);
However, IMO it should hold the expression 'p+1+q' instead - regardless of `p` 
alias (or not) the same memory region of `p`
There is a FIXME in the testcode for this.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D84736/new/

https://reviews.llvm.org/D84736

Files:
  clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
  clang/test/Analysis/pointer-arithmetic.c

Index: clang/test/Analysis/pointer-arithmetic.c
===================================================================
--- clang/test/Analysis/pointer-arithmetic.c
+++ clang/test/Analysis/pointer-arithmetic.c
@@ -1,4 +1,9 @@
-// RUN: %clang_analyze_cc1 -analyzer-checker=core -verify %s
+// RUN: %clang_analyze_cc1 -analyzer-checker=core,debug.ExprInspection \
+// RUN:   -analyzer-config eagerly-assume=false -verify %s
+
+void clang_analyzer_eval(int);
+void clang_analyzer_dump_ptr(int *);
+void clang_analyzer_dump_int(int);
 
 int test1() {
   int *p = (int *)sizeof(int);
@@ -28,3 +33,88 @@
   p += 1;
   return *p; // expected-warning {{Dereference of null pointer}}
 }
+
+void simplify_symregion_and_elementregion_pointer_arithmetic_and_comparison(int *p, int n, int m, int *q) {
+  // 'q' is SymReg{q}
+  // 'p' is SymReg{p}
+  int *p1 = p + 1;  // Element{p,1}
+  int *p0 = p1 - 1; // Element{p,0}
+  int *pn = p + n;  // Element{p,n}
+  int *pm = p + m;  // Element{p,m}
+
+  clang_analyzer_dump_ptr(q);
+  clang_analyzer_dump_ptr(p);
+  clang_analyzer_dump_ptr(p0);
+  clang_analyzer_dump_ptr(p1);
+  clang_analyzer_dump_ptr(pn);
+  clang_analyzer_dump_ptr(pm);
+  // expected-warning-re@-6 {{&SymRegion{reg_${{[0-9]+}}<int * q>}}}
+  // expected-warning-re@-6 {{&SymRegion{reg_${{[0-9]+}}<int * p>}}}
+  // expected-warning-re@-6 {{&Element{SymRegion{reg_${{[0-9]+}}<int * p>},0 S64b,int}}}}
+  // expected-warning-re@-6 {{&Element{SymRegion{reg_${{[0-9]+}}<int * p>},1 S64b,int}}}
+  // expected-warning-re@-6 {{&Element{SymRegion{reg_${{[0-9]+}}<int * p>},reg_${{[0-9]+}}<int n>,int}}}
+  // expected-warning-re@-6 {{&Element{SymRegion{reg_${{[0-9]+}}<int * p>},reg_${{[0-9]+}}<int m>,int}}}
+
+  // Test the equality operator:
+  clang_analyzer_eval(p == p0);  // expected-warning {{TRUE}}
+  clang_analyzer_eval(p == p1);  // expected-warning {{FALSE}}
+  clang_analyzer_eval(p1 == pn); // expected-warning {{UNKNOWN}}
+  clang_analyzer_eval(pn == pm); // expected-warning {{UNKNOWN}}
+
+  // Reverse operands:
+  clang_analyzer_eval(p0 == p);  // expected-warning {{TRUE}}
+  clang_analyzer_eval(p1 == p);  // expected-warning {{FALSE}}
+  clang_analyzer_eval(pn == p1); // expected-warning {{UNKNOWN}}
+  clang_analyzer_eval(pm == pn); // expected-warning {{UNKNOWN}}
+
+  // Test the inequality operator:
+  clang_analyzer_eval(p != p0);  // expected-warning {{FALSE}}
+  clang_analyzer_eval(p != p1);  // expected-warning {{TRUE}}
+  clang_analyzer_eval(p1 != pn); // expected-warning {{UNKNOWN}}
+  clang_analyzer_eval(pn != pm); // expected-warning {{UNKNOWN}}
+
+  // Reverse operands:
+  clang_analyzer_eval(p0 != p);  // expected-warning {{FALSE}}
+  clang_analyzer_eval(p1 != p);  // expected-warning {{TRUE}}
+  clang_analyzer_eval(pn != p1); // expected-warning {{UNKNOWN}}
+  clang_analyzer_eval(pm != pn); // expected-warning {{UNKNOWN}}
+
+  // Test the subtraction operator:
+  clang_analyzer_dump_int(p - q);        // expected-warning-re {{(reg_${{[0-9]+}}<int * p>) - (reg_${{[0-9]+}}<int * q>)}}
+  clang_analyzer_dump_int(p - p);        // expected-warning {{0 S32b}}
+  clang_analyzer_dump_int(p - p0);       // expected-warning {{0 S32b}}
+  clang_analyzer_dump_int(p - p1);       // expected-warning {{-1 S32b}}
+  clang_analyzer_dump_int(p - pn);       // expected-warning-re {{0 - (reg_${{[0-9]+}}<int n>)}}
+  clang_analyzer_dump_int((p + 1) - q);  // expected-warning {{Unknown}} // FIXME: Might point to the same region, we should hold the expression 'p+1+q' instead.
+  clang_analyzer_dump_int((p + 1) - p);  // expected-warning {{1 S32b}}
+  clang_analyzer_dump_int((p + 1) - p0); // expected-warning {{1 S32b}}
+  clang_analyzer_dump_int((p + 1) - p1); // expected-warning {{0 S32b}}
+  clang_analyzer_dump_int((p + 1) - pn); // expected-warning-re {{1 - (reg_${{[0-9]+}}<int n>)}}
+
+  // Reverse operands:
+  clang_analyzer_dump_int(q - p);        // expected-warning-re {{(reg_${{[0-9]+}}<int * q>) - (reg_${{[0-9]+}}<int * p>)}}
+  clang_analyzer_dump_int(p - p);        // expected-warning {{0 S32b}}
+  clang_analyzer_dump_int(p0 - p);       // expected-warning {{0 S32b}}
+  clang_analyzer_dump_int(p1 - p);       // expected-warning {{1 S32b}}
+  clang_analyzer_dump_int(pn - p);       // expected-warning-re {{reg_${{[0-9]+}}<int n>}}
+  clang_analyzer_dump_int(q - (p + 1));  // expected-warning {{Unknown}} // FIXME: Might point to the same region, we should hold the expression 'p+1+q' instead.
+  clang_analyzer_dump_int(p - (p + 1));  // expected-warning {{-1 S32b}}
+  clang_analyzer_dump_int(p0 - (p + 1)); // expected-warning {{-1 S32b}}
+  clang_analyzer_dump_int(p1 - (p + 1)); // expected-warning {{0 S32b}}
+  clang_analyzer_dump_int(pn - (p + 1)); // expected-warning-re {{(reg_${{[0-9]+}}<int n>) - 1}}
+}
+
+void clang_analyzer_dump_ptrarray(int (*p)[10]);
+void test_arrays(int (*p)[10]) {
+  int(*pp)[10] = p + 2;
+  clang_analyzer_dump_ptrarray(p);  // expected-warning-re {{&SymRegion{reg_${{[0-9]+}}<int (*)[10] p>}}}
+  clang_analyzer_dump_ptrarray(pp); // expected-warning-re {{&Element{SymRegion{reg_${{[0-9]+}}<int (*)[10] p>},2 S64b,int [10]}}}
+
+  // Assuming a casual x86 architecture:
+  int *q = (int *)p;
+  int *qq = q + 10 * 2;
+  clang_analyzer_dump_ptr(q);              // expected-warning-re {{&Element{SymRegion{reg_${{[0-9]+}}<int (*)[10] p>},0 S64b,int}}}
+  clang_analyzer_dump_ptr(qq);             // expected-warning-re {{&Element{SymRegion{reg_${{[0-9]+}}<int (*)[10] p>},20 S64b,int}}}
+  clang_analyzer_dump_ptrarray(pp);        // expected-warning-re {{&Element{SymRegion{reg_${{[0-9]+}}<int (*)[10] p>},2 S64b,int [10]}}}
+  clang_analyzer_dump_int(qq - (int *)pp); // expected-warning {{0 S32b}}
+}
Index: clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
===================================================================
--- clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
+++ clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp
@@ -1017,40 +1017,77 @@
       }
     }
 
-    // Handle special cases for when both regions are element regions.
-    const ElementRegion *RightER = dyn_cast<ElementRegion>(RightMR);
-    const ElementRegion *LeftER = dyn_cast<ElementRegion>(LeftMR);
-    if (RightER && LeftER) {
-      // Next, see if the two ERs have the same super-region and matching types.
-      // FIXME: This should do something useful even if the types don't match,
-      // though if both indexes are constant the RegionRawOffset path will
-      // give the correct answer.
-      if (LeftER->getSuperRegion() == RightER->getSuperRegion() &&
-          LeftER->getElementType() == RightER->getElementType()) {
-        // Get the left index and cast it to the correct type.
-        // If the index is unknown or undefined, bail out here.
-        SVal LeftIndexVal = LeftER->getIndex();
-        Optional<NonLoc> LeftIndex = LeftIndexVal.getAs<NonLoc>();
-        if (!LeftIndex)
-          return UnknownVal();
-        LeftIndexVal = evalCastFromNonLoc(*LeftIndex, ArrayIndexTy);
-        LeftIndex = LeftIndexVal.getAs<NonLoc>();
-        if (!LeftIndex)
+    // We simplify the binary expressions of the following forms, by evaluating
+    // the operator.
+    // \forall MemRegion X, \forall NonLoc n, m:
+    //  - Element{X,n} OP Element{X,m}
+    //  - Element{X,n} OP X
+    //  -            X OP Element{X,n}
+    // Where the OP is an equality or subtraction operator. Eg:
+    //   - Element{X,n} - Element{X,m} => n-m
+    //   - Element{X,0} == X           => true
+    //   - Element{X,1} == X           => false
+    // We don't simplify the nested ElementRegions here, such as:
+    // Element{Element{x,3,int [10]},5,int}  ==  Element{x,35,int}
+    {
+      // For a situation, where `a` and `b` are memory regions, and `OP` is an
+      // equality operator, we can infer the result for known `Index` values. We
+      // are able to do this because:
+      //  - If we check for equality:
+      //    The answer is `true` if and only if both regions are the same and
+      //    the `Index` is zero (so the ElementRegion refers to the same item),
+      //    `false` otherwise.
+      //  - If we check for inequality:
+      //    The answer is `true` if and only if either the regions are different
+      //    or the `Index` is known to be non-zero.
+      const auto EvaluateEqualityOperators =
+          [this, state, op, resultTy](NonLoc Index,
+                                      bool HasSameMemRegions) -> SVal {
+        assert(BinaryOperator::isEqualityOp(op));
+        const llvm::APSInt *Int = getKnownValue(state, Index);
+        if (!Int)
           return UnknownVal();
 
-        // Do the same for the right index.
-        SVal RightIndexVal = RightER->getIndex();
-        Optional<NonLoc> RightIndex = RightIndexVal.getAs<NonLoc>();
-        if (!RightIndex)
-          return UnknownVal();
-        RightIndexVal = evalCastFromNonLoc(*RightIndex, ArrayIndexTy);
-        RightIndex = RightIndexVal.getAs<NonLoc>();
-        if (!RightIndex)
-          return UnknownVal();
+        const bool EQResult = HasSameMemRegions && *Int == 0;
+        return makeTruthVal(op == BO_EQ ? EQResult : !EQResult, resultTy);
+      };
+
+      const ElementRegion *RightER = dyn_cast<ElementRegion>(RightMR);
+      const ElementRegion *LeftER = dyn_cast<ElementRegion>(LeftMR);
+      if (RightER && LeftER) {
+        // Next, see if the two ERs have the same super-region and matching
+        // types.
+        // FIXME: This should do something useful even if the types don't match,
+        // though if both indexes are constant the RegionRawOffset path will
+        // give the correct answer.
+        if (LeftER->getSuperRegion() == RightER->getSuperRegion() &&
+            LeftER->getElementType() == RightER->getElementType()) {
+          return evalBinOpNN(state, op, LeftER->getIndex(), RightER->getIndex(),
+                             resultTy);
+        }
+      } else if (LeftER && !RightER) {
+        NonLoc LeftIndex = LeftER->getIndex();
+        const bool HasSameMemRegions = LeftER->getSuperRegion() == RightMR;
+
+        if (BinaryOperator::isEqualityOp(op))
+          return EvaluateEqualityOperators(LeftIndex, HasSameMemRegions);
 
-        // Actually perform the operation.
-        // evalBinOpNN expects the two indexes to already be the right type.
-        return evalBinOpNN(state, op, *LeftIndex, *RightIndex, resultTy);
+        if (op == BO_Sub && HasSameMemRegions)
+          return LeftIndex;
+        return UnknownVal();
+      } else if (!LeftER && RightER) {
+        NonLoc RightIndex = RightER->getIndex();
+        const bool HasSameMemRegions = LeftMR == RightER->getSuperRegion();
+
+        if (BinaryOperator::isEqualityOp(op))
+          return EvaluateEqualityOperators(RightIndex, HasSameMemRegions);
+
+        // FIXME: Consider refactoring evalMinus function to accept the State
+        // as well, and use it here.
+        if (op == BO_Sub && HasSameMemRegions)
+          return evalBinOpNN(state, BO_Sub, makeZeroArrayIndex(), RightIndex,
+                             ArrayIndexTy);
+        return UnknownVal();
       }
     }
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to