Author: Shivam
Date: 2022-03-03T23:21:26+05:30
New Revision: bd1917c88a32c0930864d04f4e71155dcc3fa592

URL: 
https://github.com/llvm/llvm-project/commit/bd1917c88a32c0930864d04f4e71155dcc3fa592
DIFF: 
https://github.com/llvm/llvm-project/commit/bd1917c88a32c0930864d04f4e71155dcc3fa592.diff

LOG: [analyzer] Done some changes to detect Uninitialized read by the char 
array manipulation functions

Few weeks back I was experimenting with reading the uninitialized values from 
src , which is actually a bug but the CSA seems to give up at that point . I 
was curious about that and I pinged @steakhal on the discord and according to 
him this seems to be a genuine issue and needs to be fix. So I goes with fixing 
this bug and thanks to @steakhal who help me creating this patch. This feature 
seems to break some tests but this was the genuine problem and the broken tests 
also needs to fix in certain manner. I add a test but yeah we need more 
tests,I'll try to add more tests.Thanks

Reviewed By: steakhal, NoQ

Differential Revision: https://reviews.llvm.org/D120489

Added: 
    clang/test/Analysis/bstring_UninitRead.c

Modified: 
    clang/docs/analyzer/checkers.rst
    clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
    clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
    clang/test/Analysis/bstring.c

Removed: 
    


################################################################################
diff  --git a/clang/docs/analyzer/checkers.rst 
b/clang/docs/analyzer/checkers.rst
index a42e3e6960777..a9ebe063c6c8b 100644
--- a/clang/docs/analyzer/checkers.rst
+++ b/clang/docs/analyzer/checkers.rst
@@ -2625,13 +2625,44 @@ alpha.unix.cstring.OutOfBounds (C)
 """"""""""""""""""""""""""""""""""
 Check for out-of-bounds access in string functions; applies to:`` strncopy, 
strncat``.
 
-
 .. code-block:: c
 
  void test() {
    int y = strlen((char *)&test); // warn
  }
 
+.. _alpha-unix-cstring-UninitializedRead:
+
+alpha.unix.cstring.UninitializedRead (C)
+""""""""""""""""""""""""""""""""""""""""
+Check for uninitialized reads from common memory copy/manipulation functions 
such as:
+ ``memcpy, mempcpy, memmove, memcmp, strcmp, strncmp, strcpy, strlen, strsep`` 
and many more.
+
+.. code-block:: c 
+
+ void test() {
+  char src[10];
+  char dst[5];
+  memcpy(dst,src,sizeof(dst)); // warn: Bytes string function accesses 
uninitialized/garbage values
+ }
+
+Limitations:
+  
+   - Due to limitations of the memory modeling in the analyzer, one can likely
+     observe a lot of false-positive reports like this:
+      .. code-block:: c
+  
+        void false_positive() {
+          int src[] = {1, 2, 3, 4};
+          int dst[5] = {0};
+          memcpy(dst, src, 4 * sizeof(int)); // false-positive:
+          // The 'src' buffer was correctly initialized, yet we cannot conclude
+          // that since the analyzer could not see a direct initialization of 
the
+          // very last byte of the source buffer.
+        }
+  
+     More details at the corresponding `GitHub issue 
<https://github.com/llvm/llvm-project/issues/43459>`_.
+  
 .. _alpha-nondeterminism-PointerIteration:
 
 alpha.nondeterminism.PointerIteration (C++)

diff  --git a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td 
b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
index 6b72d64950106..9340a114ac456 100644
--- a/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
+++ b/clang/include/clang/StaticAnalyzer/Checkers/Checkers.td
@@ -475,7 +475,12 @@ def CStringNotNullTerm : Checker<"NotNullTerminated">,
   HelpText<"Check for arguments which are not null-terminating strings">,
   Dependencies<[CStringModeling]>,
   Documentation<HasAlphaDocumentation>;
-
+ 
+def CStringUninitializedRead : Checker<"UninitializedRead">,
+  HelpText<"Checks if the string manipulation function would read 
uninitialized bytes">,
+  Dependencies<[CStringModeling]>,
+  Documentation<HasAlphaDocumentation>;
+  
 } // end "alpha.unix.cstring"
 
 let ParentPackage = Unix in {

diff  --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp 
b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
index 8483a1a47cea1..54c9e887b8c8c 100644
--- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp
@@ -80,7 +80,7 @@ class CStringChecker : public Checker< eval::Call,
                                          check::RegionChanges
                                          > {
   mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
-      BT_NotCString, BT_AdditionOverflow;
+      BT_NotCString, BT_AdditionOverflow, BT_UninitRead;
 
   mutable const char *CurrentFunctionDescription;
 
@@ -92,11 +92,13 @@ class CStringChecker : public Checker< eval::Call,
     DefaultBool CheckCStringOutOfBounds;
     DefaultBool CheckCStringBufferOverlap;
     DefaultBool CheckCStringNotNullTerm;
+    DefaultBool CheckCStringUninitializedRead;
 
     CheckerNameRef CheckNameCStringNullArg;
     CheckerNameRef CheckNameCStringOutOfBounds;
     CheckerNameRef CheckNameCStringBufferOverlap;
     CheckerNameRef CheckNameCStringNotNullTerm;
+    CheckerNameRef CheckNameCStringUninitializedRead;
   };
 
   CStringChecksFilter Filter;
@@ -367,6 +369,15 @@ ProgramStateRef 
CStringChecker::CheckLocation(CheckerContext &C,
     return nullptr;
   }
 
+  // Ensure that we wouldn't read uninitialized value.
+  if (Access == AccessKind::read) {
+    if (Filter.CheckCStringUninitializedRead &&
+        StInBound->getSVal(ER).isUndef()) {
+      emitUninitializedReadBug(C, StInBound, Buffer.Expression);
+      return nullptr;
+    }
+  }
+
   // Array bound check succeeded.  From this point forward the array bound
   // should always succeed.
   return StInBound;
@@ -578,6 +589,26 @@ void CStringChecker::emitNullArgBug(CheckerContext &C, 
ProgramStateRef State,
   }
 }
 
+void CStringChecker::emitUninitializedReadBug(CheckerContext &C,
+                                              ProgramStateRef State,
+                                              const Expr *E) const {
+  if (ExplodedNode *N = C.generateErrorNode(State)) {
+    const char *Msg =
+        "Bytes string function accesses uninitialized/garbage values";
+    if (!BT_UninitRead)
+      BT_UninitRead.reset(
+          new BuiltinBug(Filter.CheckNameCStringUninitializedRead,
+                         "Accessing unitialized/garbage values", Msg));
+
+    BuiltinBug *BT = static_cast<BuiltinBug *>(BT_UninitRead.get());
+
+    auto Report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
+    Report->addRange(E->getSourceRange());
+    bugreporter::trackExpressionValue(N, E, *Report);
+    C.emitReport(std::move(Report));
+  }
+}
+
 void CStringChecker::emitOutOfBoundsBug(CheckerContext &C,
                                         ProgramStateRef State, const Stmt *S,
                                         StringRef WarningMsg) const {
@@ -2458,3 +2489,4 @@ REGISTER_CHECKER(CStringNullArg)
 REGISTER_CHECKER(CStringOutOfBounds)
 REGISTER_CHECKER(CStringBufferOverlap)
 REGISTER_CHECKER(CStringNotNullTerm)
+REGISTER_CHECKER(CStringUninitializedRead)
\ No newline at end of file

diff  --git a/clang/test/Analysis/bstring.c b/clang/test/Analysis/bstring.c
index c88452e49075d..a7c7bdb23683e 100644
--- a/clang/test/Analysis/bstring.c
+++ b/clang/test/Analysis/bstring.c
@@ -2,13 +2,15 @@
 // RUN:   -analyzer-checker=core \
 // RUN:   -analyzer-checker=unix.cstring \
 // RUN:   -analyzer-checker=alpha.unix.cstring \
+// RUN:   -analyzer-disable-checker=alpha.unix.cstring.UninitializedRead \
 // RUN:   -analyzer-checker=debug.ExprInspection \
-// RUN:   -analyzer-config eagerly-assume=false
+// RUN:   -analyzer-config eagerly-assume=false  
 //
 // RUN: %clang_analyze_cc1 -verify %s -DUSE_BUILTINS \
 // RUN:   -analyzer-checker=core \
 // RUN:   -analyzer-checker=unix.cstring \
 // RUN:   -analyzer-checker=alpha.unix.cstring \
+// RUN:   -analyzer-disable-checker=alpha.unix.cstring.UninitializedRead \
 // RUN:   -analyzer-checker=debug.ExprInspection \
 // RUN:   -analyzer-config eagerly-assume=false
 //
@@ -16,6 +18,7 @@
 // RUN:   -analyzer-checker=core \
 // RUN:   -analyzer-checker=unix.cstring \
 // RUN:   -analyzer-checker=alpha.unix.cstring \
+// RUN:   -analyzer-disable-checker=alpha.unix.cstring.UninitializedRead \
 // RUN:   -analyzer-checker=debug.ExprInspection \
 // RUN:   -analyzer-config eagerly-assume=false
 //
@@ -23,6 +26,7 @@
 // RUN:   -analyzer-checker=core \
 // RUN:   -analyzer-checker=unix.cstring \
 // RUN:   -analyzer-checker=alpha.unix.cstring \
+// RUN:   -analyzer-disable-checker=alpha.unix.cstring.UninitializedRead \
 // RUN:   -analyzer-checker=debug.ExprInspection \
 // RUN:   -analyzer-config eagerly-assume=false
 
@@ -315,7 +319,7 @@ void mempcpy15(void) {
 
   p1 = (&s2) + 1;
   p2 = mempcpy(&s2, &s1, sizeof(struct st));
-
+  
   clang_analyzer_eval(p1 == p2); // expected-warning{{TRUE}}
 }
 

diff  --git a/clang/test/Analysis/bstring_UninitRead.c 
b/clang/test/Analysis/bstring_UninitRead.c
new file mode 100644
index 0000000000000..c535e018e62c2
--- /dev/null
+++ b/clang/test/Analysis/bstring_UninitRead.c
@@ -0,0 +1,59 @@
+// RUN: %clang_analyze_cc1 -verify %s \
+// RUN: -analyzer-checker=core,alpha.unix.cstring
+
+
+// This file is generally for the alpha.unix.cstring.UninitializedRead 
Checker, the reason for putting it into
+// the separate file because the checker is break the some existing test cases 
in bstring.c file , so we don't 
+// wanna mess up with some existing test case so it's better to create 
separate file for it, this file also include 
+// the broken test for the reference in future about the broken tests.
+
+
+typedef typeof(sizeof(int)) size_t;
+
+void clang_analyzer_eval(int);
+
+void *memcpy(void *restrict s1, const void *restrict s2, size_t n);
+
+void top(char *dst) {
+  char buf[10];
+  memcpy(dst, buf, 10); // expected-warning{{Bytes string function accesses 
uninitialized/garbage values}}
+  (void)buf;
+}
+
+//===----------------------------------------------------------------------===
+// mempcpy()
+//===----------------------------------------------------------------------===
+
+void *mempcpy(void *restrict s1, const void *restrict s2, size_t n);
+
+void mempcpy14() {
+  int src[] = {1, 2, 3, 4};
+  int dst[5] = {0};
+  int *p;
+
+  p = mempcpy(dst, src, 4 * sizeof(int)); // expected-warning{{Bytes string 
function accesses uninitialized/garbage values}}
+   // FIXME: This behaviour is actually surprising and needs to be fixed, 
+   // mempcpy seems to consider the very last byte of the src buffer 
uninitialized
+   // and returning undef unfortunately. It should have returned unknown or a 
conjured value instead.
+
+  clang_analyzer_eval(p == &dst[4]); // no-warning (above is fatal)
+}
+
+struct st {
+  int i;
+  int j;
+};
+
+
+void mempcpy15() {
+  struct st s1 = {0};
+  struct st s2;
+  struct st *p1;
+  struct st *p2;
+
+  p1 = (&s2) + 1;
+  p2 = mempcpy(&s2, &s1, sizeof(struct st)); // expected-warning{{Bytes string 
function accesses uninitialized/garbage values}}
+  // FIXME: It seems same as mempcpy14() case.
+  
+  clang_analyzer_eval(p1 == p2); // no-warning (above is fatal)
+}


        
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to