[PATCH] D120369: [analyzer] Add more propagations to Taint analysis

2022-03-07 Thread Endre Fülöp via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG4fd6c6e65ab5: [analyzer] Add more propagations to Taint 
analysis (authored by gamesh411).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120369/new/

https://reviews.llvm.org/D120369

Files:
  clang/docs/analyzer/checkers.rst
  clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  clang/test/Analysis/taint-generic.c

Index: clang/test/Analysis/taint-generic.c
===
--- clang/test/Analysis/taint-generic.c
+++ clang/test/Analysis/taint-generic.c
@@ -1,20 +1,26 @@
-// RUN: %clang_analyze_cc1 -Wno-format-security -Wno-pointer-to-int-cast -verify %s \
+// RUN: %clang_analyze_cc1 -Wno-format-security -Wno-pointer-to-int-cast \
+// RUN:   -Wno-incompatible-library-redeclaration -verify %s \
 // RUN:   -analyzer-checker=alpha.security.taint \
 // RUN:   -analyzer-checker=core \
 // RUN:   -analyzer-checker=alpha.security.ArrayBoundV2 \
+// RUN:   -analyzer-checker=debug.ExprInspection \
 // RUN:   -analyzer-config \
 // RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml
 
-// RUN: %clang_analyze_cc1 -Wno-format-security -Wno-pointer-to-int-cast -verify %s \
+// RUN: %clang_analyze_cc1 -Wno-format-security -Wno-pointer-to-int-cast \
+// RUN:   -Wno-incompatible-library-redeclaration -verify %s \
 // RUN:   -DFILE_IS_STRUCT \
 // RUN:   -analyzer-checker=alpha.security.taint \
 // RUN:   -analyzer-checker=core \
 // RUN:   -analyzer-checker=alpha.security.ArrayBoundV2 \
+// RUN:   -analyzer-checker=debug.ExprInspection \
 // RUN:   -analyzer-config \
 // RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml
 
-// RUN: not %clang_analyze_cc1 -Wno-pointer-to-int-cast -verify %s \
+// RUN: not %clang_analyze_cc1 -Wno-pointer-to-int-cast \
+// RUN:   -Wno-incompatible-library-redeclaration -verify %s \
 // RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-checker=debug.ExprInspection \
 // RUN:   -analyzer-config \
 // RUN: alpha.security.taint.TaintPropagation:Config=justguessit \
 // RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-FILE
@@ -24,8 +30,10 @@
 // CHECK-INVALID-FILE-SAME:that expects a valid filename instead of
 // CHECK-INVALID-FILE-SAME:'justguessit'
 
-// RUN: not %clang_analyze_cc1 -verify %s \
+// RUN: not %clang_analyze_cc1 -Wno-incompatible-library-redeclaration \
+// RUN:   -verify %s \
 // RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-checker=debug.ExprInspection \
 // RUN:   -analyzer-config \
 // RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config-ill-formed.yaml \
 // RUN:   2>&1 | FileCheck -DMSG=%errc_EINVAL %s -check-prefix=CHECK-ILL-FORMED
@@ -34,8 +42,10 @@
 // CHECK-ILL-FORMED-SAME:'alpha.security.taint.TaintPropagation:Config',
 // CHECK-ILL-FORMED-SAME:that expects a valid yaml file: [[MSG]]
 
-// RUN: not %clang_analyze_cc1 -verify %s \
+// RUN: not %clang_analyze_cc1 -Wno-incompatible-library-redeclaration \
+// RUN:   -verify %s \
 // RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-checker=debug.ExprInspection \
 // RUN:   -analyzer-config \
 // RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config-invalid-arg.yaml \
 // RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-ARG
@@ -46,6 +56,9 @@
 // CHECK-INVALID-ARG-SAME:rules greater or equal to -1
 
 typedef long long rsize_t;
+void clang_analyzer_isTainted_char(char);
+void clang_analyzer_isTainted_charp(char*);
+void clang_analyzer_isTainted_int(int);
 
 int scanf(const char *restrict format, ...);
 char *gets(char *str);
@@ -60,13 +73,18 @@
 #endif
 
 #define bool _Bool
+#define NULL (void*)0
 
 char *getenv(const char *name);
+
+FILE *fopen(const char *name, const char *mode);
+
 int fscanf(FILE *restrict stream, const char *restrict format, ...);
 int sprintf(char *str, const char *format, ...);
 void setproctitle(const char *fmt, ...);
 void setproctitle_init(int argc, char *argv[], char *envp[]);
 typedef __typeof(sizeof(int)) size_t;
+typedef signed long long ssize_t;
 
 // Define string functions. Use builtin for some of them. They all default to
 // the processing in the taint checker.
@@ -388,7 +406,6 @@
   return system(c); // expected-warning {{Untrusted data is passed to a system call}}
 }
 
-typedef signed long long ssize_t;
 ssize_t readlink(const char *path, char *buf, size_t bufsiz);
 int testReadlink(char *path, char *buf, size_t bufsiz) {
   ssize_t s = readlink(path, buf, bufsiz);
@@ -463,6 +480,510 @@
   return system(buf); // expected-warning {{Untrusted data is passed to a system call}}
 }
 
+int fscanf_s(FILE *stream, const char *format, ...);
+void testFscanf_s(const char *fname, int *d) {
+  FILE *f = fopen(fname, "r");
+  fscanf_s(f, "%d", d);
+  cla

[PATCH] D120369: [analyzer] Add more propagations to Taint analysis

2022-03-07 Thread Endre Fülöp via Phabricator via cfe-commits
gamesh411 updated this revision to Diff 413388.
gamesh411 added a comment.
Herald added a project: All.

rebase


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120369/new/

https://reviews.llvm.org/D120369

Files:
  clang/docs/analyzer/checkers.rst
  clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  clang/test/Analysis/taint-generic.c

Index: clang/test/Analysis/taint-generic.c
===
--- clang/test/Analysis/taint-generic.c
+++ clang/test/Analysis/taint-generic.c
@@ -1,20 +1,26 @@
-// RUN: %clang_analyze_cc1 -Wno-format-security -Wno-pointer-to-int-cast -verify %s \
+// RUN: %clang_analyze_cc1 -Wno-format-security -Wno-pointer-to-int-cast \
+// RUN:   -Wno-incompatible-library-redeclaration -verify %s \
 // RUN:   -analyzer-checker=alpha.security.taint \
 // RUN:   -analyzer-checker=core \
 // RUN:   -analyzer-checker=alpha.security.ArrayBoundV2 \
+// RUN:   -analyzer-checker=debug.ExprInspection \
 // RUN:   -analyzer-config \
 // RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml
 
-// RUN: %clang_analyze_cc1 -Wno-format-security -Wno-pointer-to-int-cast -verify %s \
+// RUN: %clang_analyze_cc1 -Wno-format-security -Wno-pointer-to-int-cast \
+// RUN:   -Wno-incompatible-library-redeclaration -verify %s \
 // RUN:   -DFILE_IS_STRUCT \
 // RUN:   -analyzer-checker=alpha.security.taint \
 // RUN:   -analyzer-checker=core \
 // RUN:   -analyzer-checker=alpha.security.ArrayBoundV2 \
+// RUN:   -analyzer-checker=debug.ExprInspection \
 // RUN:   -analyzer-config \
 // RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml
 
-// RUN: not %clang_analyze_cc1 -Wno-pointer-to-int-cast -verify %s \
+// RUN: not %clang_analyze_cc1 -Wno-pointer-to-int-cast \
+// RUN:   -Wno-incompatible-library-redeclaration -verify %s \
 // RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-checker=debug.ExprInspection \
 // RUN:   -analyzer-config \
 // RUN: alpha.security.taint.TaintPropagation:Config=justguessit \
 // RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-FILE
@@ -24,8 +30,10 @@
 // CHECK-INVALID-FILE-SAME:that expects a valid filename instead of
 // CHECK-INVALID-FILE-SAME:'justguessit'
 
-// RUN: not %clang_analyze_cc1 -verify %s \
+// RUN: not %clang_analyze_cc1 -Wno-incompatible-library-redeclaration \
+// RUN:   -verify %s \
 // RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-checker=debug.ExprInspection \
 // RUN:   -analyzer-config \
 // RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config-ill-formed.yaml \
 // RUN:   2>&1 | FileCheck -DMSG=%errc_EINVAL %s -check-prefix=CHECK-ILL-FORMED
@@ -34,8 +42,10 @@
 // CHECK-ILL-FORMED-SAME:'alpha.security.taint.TaintPropagation:Config',
 // CHECK-ILL-FORMED-SAME:that expects a valid yaml file: [[MSG]]
 
-// RUN: not %clang_analyze_cc1 -verify %s \
+// RUN: not %clang_analyze_cc1 -Wno-incompatible-library-redeclaration \
+// RUN:   -verify %s \
 // RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-checker=debug.ExprInspection \
 // RUN:   -analyzer-config \
 // RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config-invalid-arg.yaml \
 // RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-ARG
@@ -46,6 +56,9 @@
 // CHECK-INVALID-ARG-SAME:rules greater or equal to -1
 
 typedef long long rsize_t;
+void clang_analyzer_isTainted_char(char);
+void clang_analyzer_isTainted_charp(char*);
+void clang_analyzer_isTainted_int(int);
 
 int scanf(const char *restrict format, ...);
 char *gets(char *str);
@@ -60,13 +73,18 @@
 #endif
 
 #define bool _Bool
+#define NULL (void*)0
 
 char *getenv(const char *name);
+
+FILE *fopen(const char *name, const char *mode);
+
 int fscanf(FILE *restrict stream, const char *restrict format, ...);
 int sprintf(char *str, const char *format, ...);
 void setproctitle(const char *fmt, ...);
 void setproctitle_init(int argc, char *argv[], char *envp[]);
 typedef __typeof(sizeof(int)) size_t;
+typedef signed long long ssize_t;
 
 // Define string functions. Use builtin for some of them. They all default to
 // the processing in the taint checker.
@@ -388,7 +406,6 @@
   return system(c); // expected-warning {{Untrusted data is passed to a system call}}
 }
 
-typedef signed long long ssize_t;
 ssize_t readlink(const char *path, char *buf, size_t bufsiz);
 int testReadlink(char *path, char *buf, size_t bufsiz) {
   ssize_t s = readlink(path, buf, bufsiz);
@@ -463,6 +480,510 @@
   return system(buf); // expected-warning {{Untrusted data is passed to a system call}}
 }
 
+int fscanf_s(FILE *stream, const char *format, ...);
+void testFscanf_s(const char *fname, int *d) {
+  FILE *f = fopen(fname, "r");
+  fscanf_s(f, "%d", d);
+  clang_analyzer_isTainted_int(*d); // expected-warning {{YES}}
+}
+
+int fr

[PATCH] D120369: [analyzer] Add more propagations to Taint analysis

2022-03-01 Thread Balázs Benics via Phabricator via cfe-commits
steakhal accepted this revision.
steakhal added a reviewer: NoQ.
steakhal added a comment.
This revision is now accepted and ready to land.

Looks good to me. Check the docs if they are still in sync.
Also, postpone this for two days to let others block this if they have 
objections.
Other than that, land it.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120369/new/

https://reviews.llvm.org/D120369

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D120369: [analyzer] Add more propagations to Taint analysis

2022-03-01 Thread Endre Fülöp via Phabricator via cfe-commits
gamesh411 added inline comments.



Comment at: clang/docs/analyzer/checkers.rst:2361
 Default propagations defined by ``GenericTaintChecker``:
-``atoi``, ``atol``, ``atoll``, ``fgetc``, ``fgetln``, ``fgets``, ``fscanf``, 
``sscanf``, ``getc``, ``getc_unlocked``, ``getdelim``, ``getline``, ``getw``, 
``pread``, ``read``, ``strchr``, ``strrchr``, ``tolower``, ``toupper``
+``atoi``, ``atol``, ``atoll``, ``basename``, ``dirname``, ``fgetc``, 
``fgetln``, ``fgets``, ``fnmatch``, ``fread``, ``fscanf``, ``fscanf_s``, 
``index``, ``inflate``, ``isalnum``, ``isalpha``, ``isascii``, ``isblank``, 
``iscntrl``, ``isdigit``, ``isgraph``, ``islower``, ``isprint``, ``ispunct``, 
``isspace``, ``isupper``, ``isxdigit``, ``memchr``, ``memrchr``, ``sscanf``, 
``getc``, ``getc_unlocked``, ``getdelim``, ``getline``, ``getw``, ``memcmp``, 
``memcpy``, ``memmem``, ``memmove``, ``mbtowc``, ``pread``, ``qsort``, 
``qsort_r``, ``rawmemchr``, ``read``, ``readv``, ``recv``, ``recvfrom``, 
``rindex``, ``strcasestr``, ``strchr``, ``strchrnul``,  ``strcasecmp``, 
``strcmp``, ``strcspn``, ``strlen``, ``strncasecmp``, ``strncmp``, ``strndup``, 
``strndupa``, ``strpbrk``, ``strrchr``, ``strsep``, ``strspn``, ``strstr``, 
``strtol``, ``strtoll``, ``strtoul``, ``strtoull``, ``tolower``, ``toupper``, 
``ttyname``, ``ttyname_r``, ``vfscanf``, ``vfscanf``, ``wctomb``, ``wcwidth``
 

steakhal wrote:
> I cannot see the corresponding propagation rule.
> That being said, it would be handy to mention that this is for `zlib` 
> decompression and this should be probably a taint source anyway.
> 
> `vfscanf` occurs two times.
> 
> `vscanf` is not mentioned here; and there are probably a couple others like 
> this.
Removed `vscanf` and `vfscanf` as modeling their taint is not straightforward 
and should be done in another checker.
The problem with those is that they do not use variadic arguments, but an 
abstraction of those implemented by the type `va_list`, which is used to 
support invocations, where the number of arguments is determined at runtime.
In addition to modeling individual v-variants we will also have to handle the 
creation of `va_list` objects, and `va_start` function calls and try to reason 
about which parameters will be tainted.



Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:561
+  {{"vscanf"}, TR::Prop({{0}}, {{}, 1})},
+  {{"vfscanf"}, TR::Prop({{0}}, {{}, 2})},
+

steakhal wrote:
> This function has exactly 3 arguments.
> I'm also puzzled how tainting `va_list` will work out. That should be modeled 
> separately IMO.
> This comment applies to all of the similar `va_list` accepting functions, 
> such as `vscanf`, `vfscanf`, and possibly others.
> 
> That being said, I think `vscanf` is more like `scanf`, so it should be 
> modeled as a taint source instead of a propagator.
removed



Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:575
+  {{"fread"}, TR::Prop({{3}}, {{0, ReturnValueIndex}})},
+  {{"readv"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
+  {{"recv"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},

steakhal wrote:
> I'm on board with marking read operations as props/sources.
> Let's look at the declaration: `ssize_t readv(int fd, const struct iovec 
> *iov, int iovcnt);`
> I'm not sure how could the pointee of `iov` be modified by this call, as its 
> `const`.
> Additionally, I doubt the effectiveness of the rule, since I don't think it 
> would be too likely to have a path leading to a taint sink with an `iov` 
> pointer. That being said, let it be, but don't expect much from this rule :D
On second thought, this seems pointless indeed, removed.



Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:582-583
+
+  {{"dirname"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+  {{"basename"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+  {{"fnmatch"}, TR::Prop({{0}}, {{ReturnValueIndex}})},

steakhal wrote:
> These should be sorted, isn't it?
swapped, thx



Comment at: clang/test/Analysis/taint-generic.c:399-400
+struct iovec {
+  void *iov_base; /* Starting address */
+  size_t iov_len; /* Number of bytes to transfer */
+};

steakhal wrote:
> Please use single-line comments.
> It makes debugging test files easier in some cases.
removed



Comment at: clang/test/Analysis/taint-generic.c:408
+  size_t read = readv(fd, iov, iovcnt);
+  // FIXME: should be able to assert that iov is also tainted
+  return 1 / read; // expected-warning {{Division by a tainted value, possibly 
zero}}

steakhal wrote:
> `clang_analyzer_isTainted(*iov)`
test case removed


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120369/new/

https://reviews.llvm.org/D120369

___
cfe-commits mailin

[PATCH] D120369: [analyzer] Add more propagations to Taint analysis

2022-03-01 Thread Endre Fülöp via Phabricator via cfe-commits
gamesh411 updated this revision to Diff 412057.
gamesh411 marked 17 inline comments as done.
gamesh411 added a comment.
Herald added a subscriber: manas.

- remove vscanf and co.
- use debug.ExprInspection for test cases
- fix semantic issues for modeled functions


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D120369/new/

https://reviews.llvm.org/D120369

Files:
  clang/docs/analyzer/checkers.rst
  clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  clang/test/Analysis/taint-generic.c

Index: clang/test/Analysis/taint-generic.c
===
--- clang/test/Analysis/taint-generic.c
+++ clang/test/Analysis/taint-generic.c
@@ -1,20 +1,26 @@
-// RUN: %clang_analyze_cc1 -Wno-format-security -Wno-pointer-to-int-cast -verify %s \
+// RUN: %clang_analyze_cc1 -Wno-format-security -Wno-pointer-to-int-cast \
+// RUN:   -Wno-incompatible-library-redeclaration -verify %s \
 // RUN:   -analyzer-checker=alpha.security.taint \
 // RUN:   -analyzer-checker=core \
 // RUN:   -analyzer-checker=alpha.security.ArrayBoundV2 \
+// RUN:   -analyzer-checker=debug.ExprInspection \
 // RUN:   -analyzer-config \
 // RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml
 
-// RUN: %clang_analyze_cc1 -Wno-format-security -Wno-pointer-to-int-cast -verify %s \
+// RUN: %clang_analyze_cc1 -Wno-format-security -Wno-pointer-to-int-cast \
+// RUN:   -Wno-incompatible-library-redeclaration -verify %s \
 // RUN:   -DFILE_IS_STRUCT \
 // RUN:   -analyzer-checker=alpha.security.taint \
 // RUN:   -analyzer-checker=core \
 // RUN:   -analyzer-checker=alpha.security.ArrayBoundV2 \
+// RUN:   -analyzer-checker=debug.ExprInspection \
 // RUN:   -analyzer-config \
 // RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml
 
-// RUN: not %clang_analyze_cc1 -Wno-pointer-to-int-cast -verify %s \
+// RUN: not %clang_analyze_cc1 -Wno-pointer-to-int-cast \
+// RUN:   -Wno-incompatible-library-redeclaration -verify %s \
 // RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-checker=debug.ExprInspection \
 // RUN:   -analyzer-config \
 // RUN: alpha.security.taint.TaintPropagation:Config=justguessit \
 // RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-FILE
@@ -24,8 +30,10 @@
 // CHECK-INVALID-FILE-SAME:that expects a valid filename instead of
 // CHECK-INVALID-FILE-SAME:'justguessit'
 
-// RUN: not %clang_analyze_cc1 -verify %s \
+// RUN: not %clang_analyze_cc1 -Wno-incompatible-library-redeclaration \
+// RUN:   -verify %s \
 // RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-checker=debug.ExprInspection \
 // RUN:   -analyzer-config \
 // RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config-ill-formed.yaml \
 // RUN:   2>&1 | FileCheck -DMSG=%errc_EINVAL %s -check-prefix=CHECK-ILL-FORMED
@@ -34,8 +42,10 @@
 // CHECK-ILL-FORMED-SAME:'alpha.security.taint.TaintPropagation:Config',
 // CHECK-ILL-FORMED-SAME:that expects a valid yaml file: [[MSG]]
 
-// RUN: not %clang_analyze_cc1 -verify %s \
+// RUN: not %clang_analyze_cc1 -Wno-incompatible-library-redeclaration \
+// RUN:   -verify %s \
 // RUN:   -analyzer-checker=alpha.security.taint \
+// RUN:   -analyzer-checker=debug.ExprInspection \
 // RUN:   -analyzer-config \
 // RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config-invalid-arg.yaml \
 // RUN:   2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-ARG
@@ -45,6 +55,10 @@
 // CHECK-INVALID-ARG-SAME:that expects an argument number for propagation
 // CHECK-INVALID-ARG-SAME:rules greater or equal to -1
 
+void clang_analyzer_isTainted_char(char);
+void clang_analyzer_isTainted_charp(char*);
+void clang_analyzer_isTainted_int(int);
+
 int scanf(const char *restrict format, ...);
 char *gets(char *str);
 int getchar(void);
@@ -58,10 +72,15 @@
 
 #define bool _Bool
 
+#define NULL (void*)0
+
+FILE *fopen(const char *name, const char *mode);
+
 int fscanf(FILE *restrict stream, const char *restrict format, ...);
 int sprintf(char *str, const char *format, ...);
 void setproctitle(const char *fmt, ...);
 typedef __typeof(sizeof(int)) size_t;
+typedef int ssize_t;
 
 // Define string functions. Use builtin for some of them. They all default to
 // the processing in the taint checker.
@@ -352,6 +371,510 @@
   return 1 / x; // expected-warning {{Division by a tainted value, possibly zero}}
 }
 
+int fscanf_s(FILE *stream, const char *format, ...);
+void testFscanf_s(const char *fname, int *d) {
+  FILE *f = fopen(fname, "r");
+  fscanf_s(f, "%d", d);
+  clang_analyzer_isTainted_int(*d); // expected-warning {{YES}}
+}
+
+int fread(void *buffer, size_t size, size_t count, FILE *stream);
+void testFread(const char *fname, int *buffer, size_t size, size_t count) {
+  FILE *f = fopen(fname, "r");
+  size_t read = fread(buffer, size,

[PATCH] D120369: [analyzer] Add more propagations to Taint analysis

2022-02-23 Thread Balázs Benics via Phabricator via cfe-commits
steakhal added a comment.

Huh, this was a long one. 😅 🚀




Comment at: clang/docs/analyzer/checkers.rst:2361
 Default propagations defined by ``GenericTaintChecker``:
-``atoi``, ``atol``, ``atoll``, ``fgetc``, ``fgetln``, ``fgets``, ``fscanf``, 
``sscanf``, ``getc``, ``getc_unlocked``, ``getdelim``, ``getline``, ``getw``, 
``pread``, ``read``, ``strchr``, ``strrchr``, ``tolower``, ``toupper``
+``atoi``, ``atol``, ``atoll``, ``basename``, ``dirname``, ``fgetc``, 
``fgetln``, ``fgets``, ``fnmatch``, ``fread``, ``fscanf``, ``fscanf_s``, 
``index``, ``inflate``, ``isalnum``, ``isalpha``, ``isascii``, ``isblank``, 
``iscntrl``, ``isdigit``, ``isgraph``, ``islower``, ``isprint``, ``ispunct``, 
``isspace``, ``isupper``, ``isxdigit``, ``memchr``, ``memrchr``, ``sscanf``, 
``getc``, ``getc_unlocked``, ``getdelim``, ``getline``, ``getw``, ``memcmp``, 
``memcpy``, ``memmem``, ``memmove``, ``mbtowc``, ``pread``, ``qsort``, 
``qsort_r``, ``rawmemchr``, ``read``, ``readv``, ``recv``, ``recvfrom``, 
``rindex``, ``strcasestr``, ``strchr``, ``strchrnul``,  ``strcasecmp``, 
``strcmp``, ``strcspn``, ``strlen``, ``strncasecmp``, ``strncmp``, ``strndup``, 
``strndupa``, ``strpbrk``, ``strrchr``, ``strsep``, ``strspn``, ``strstr``, 
``strtol``, ``strtoll``, ``strtoul``, ``strtoull``, ``tolower``, ``toupper``, 
``ttyname``, ``ttyname_r``, ``vfscanf``, ``vfscanf``, ``wctomb``, ``wcwidth``
 

I cannot see the corresponding propagation rule.
That being said, it would be handy to mention that this is for `zlib` 
decompression and this should be probably a taint source anyway.

`vfscanf` occurs two times.

`vscanf` is not mentioned here; and there are probably a couple others like 
this.



Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:561
+  {{"vscanf"}, TR::Prop({{0}}, {{}, 1})},
+  {{"vfscanf"}, TR::Prop({{0}}, {{}, 2})},
+

This function has exactly 3 arguments.
I'm also puzzled how tainting `va_list` will work out. That should be modeled 
separately IMO.
This comment applies to all of the similar `va_list` accepting functions, such 
as `vscanf`, `vfscanf`, and possibly others.

That being said, I think `vscanf` is more like `scanf`, so it should be modeled 
as a taint source instead of a propagator.



Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:575
+  {{"fread"}, TR::Prop({{3}}, {{0, ReturnValueIndex}})},
+  {{"readv"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
+  {{"recv"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},

I'm on board with marking read operations as props/sources.
Let's look at the declaration: `ssize_t readv(int fd, const struct iovec *iov, 
int iovcnt);`
I'm not sure how could the pointee of `iov` be modified by this call, as its 
`const`.
Additionally, I doubt the effectiveness of the rule, since I don't think it 
would be too likely to have a path leading to a taint sink with an `iov` 
pointer. That being said, let it be, but don't expect much from this rule :D



Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:577
+  {{"recv"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
+  {{"recvfrom"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
+

It's sad that we don't model `recvmsg`, but it would be quite difficult to 
model. I can see why you left that out.



Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:579-580
+
+  {{"ttyname"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+  {{"ttyname_r"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
+

I'm not sure how could these be used as gadgets, but let it be.



Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:582-583
+
+  {{"dirname"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+  {{"basename"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+  {{"fnmatch"}, TR::Prop({{0}}, {{ReturnValueIndex}})},

These should be sorted, isn't it?



Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:584
+  {{"basename"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+  {{"fnmatch"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+  {{"memchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})},

> `int fnmatch(const char *pattern, const char *string, int flags);`
>The `fnmatch()` function checks whether the string argument matches the 
>pattern argument, which is a shell wildcard pattern (see `glob(7)`).
From this //man// entry I would think that we should propagate from the 
**second** argument.



Comment at: clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp:596
+  {{"memmove"}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
+  {{"memmem"}, TR::Prop({{0}}, {{ReturnValueIndex}})},
+

One could say that if `memmem` was called with a tainted //needle// and 
actually finds 

[PATCH] D120369: [analyzer] Add more propagations to Taint analysis

2022-02-22 Thread Endre Fülöp via Phabricator via cfe-commits
gamesh411 created this revision.
gamesh411 added a reviewer: steakhal.
Herald added subscribers: ASDenysPetrov, martong, dkrupp, donat.nagy, 
Szelethus, mikhail.ramalho, a.sidorin, szepet, baloghadamsoftware, xazax.hun.
Herald added a reviewer: Szelethus.
gamesh411 requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Add more functions as taint propators to GenericTaintChecker.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D120369

Files:
  clang/docs/analyzer/checkers.rst
  clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
  clang/test/Analysis/taint-generic.c

Index: clang/test/Analysis/taint-generic.c
===
--- clang/test/Analysis/taint-generic.c
+++ clang/test/Analysis/taint-generic.c
@@ -1,11 +1,13 @@
-// RUN: %clang_analyze_cc1 -Wno-format-security -Wno-pointer-to-int-cast -verify %s \
+// RUN: %clang_analyze_cc1 -Wno-format-security -Wno-pointer-to-int-cast \
+// RUN:   -Wno-incompatible-library-redeclaration -verify %s \
 // RUN:   -analyzer-checker=alpha.security.taint \
 // RUN:   -analyzer-checker=core \
 // RUN:   -analyzer-checker=alpha.security.ArrayBoundV2 \
 // RUN:   -analyzer-config \
 // RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml
 
-// RUN: %clang_analyze_cc1 -Wno-format-security -Wno-pointer-to-int-cast -verify %s \
+// RUN: %clang_analyze_cc1 -Wno-format-security -Wno-pointer-to-int-cast \
+// RUN:   -Wno-incompatible-library-redeclaration -verify %s \
 // RUN:   -DFILE_IS_STRUCT \
 // RUN:   -analyzer-checker=alpha.security.taint \
 // RUN:   -analyzer-checker=core \
@@ -13,7 +15,8 @@
 // RUN:   -analyzer-config \
 // RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml
 
-// RUN: not %clang_analyze_cc1 -Wno-pointer-to-int-cast -verify %s \
+// RUN: not %clang_analyze_cc1 -Wno-pointer-to-int-cast \
+// RUN:   -Wno-incompatible-library-redeclaration -verify %s \
 // RUN:   -analyzer-checker=alpha.security.taint \
 // RUN:   -analyzer-config \
 // RUN: alpha.security.taint.TaintPropagation:Config=justguessit \
@@ -24,7 +27,8 @@
 // CHECK-INVALID-FILE-SAME:that expects a valid filename instead of
 // CHECK-INVALID-FILE-SAME:'justguessit'
 
-// RUN: not %clang_analyze_cc1 -verify %s \
+// RUN: not %clang_analyze_cc1 -Wno-incompatible-library-redeclaration \
+// RUN:   -verify %s \
 // RUN:   -analyzer-checker=alpha.security.taint \
 // RUN:   -analyzer-config \
 // RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config-ill-formed.yaml \
@@ -34,7 +38,8 @@
 // CHECK-ILL-FORMED-SAME:'alpha.security.taint.TaintPropagation:Config',
 // CHECK-ILL-FORMED-SAME:that expects a valid yaml file: [[MSG]]
 
-// RUN: not %clang_analyze_cc1 -verify %s \
+// RUN: not %clang_analyze_cc1 -Wno-incompatible-library-redeclaration \
+// RUN:   -verify %s \
 // RUN:   -analyzer-checker=alpha.security.taint \
 // RUN:   -analyzer-config \
 // RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config-invalid-arg.yaml \
@@ -45,6 +50,8 @@
 // CHECK-INVALID-ARG-SAME:that expects an argument number for propagation
 // CHECK-INVALID-ARG-SAME:rules greater or equal to -1
 
+extern int some_global_flag_to_branch_on;
+
 int scanf(const char *restrict format, ...);
 char *gets(char *str);
 int getchar(void);
@@ -58,6 +65,8 @@
 
 #define bool _Bool
 
+FILE *fopen(const char *name, const char *mode);
+
 int fscanf(FILE *restrict stream, const char *restrict format, ...);
 int sprintf(char *str, const char *format, ...);
 void setproctitle(const char *fmt, ...);
@@ -352,6 +361,596 @@
   return 1 / x; // expected-warning {{Division by a tainted value, possibly zero}}
 }
 
+int fscanf_s(FILE *stream, const char *format, ...);
+int testFscanf_s(const char *fname, int *d) {
+  FILE *f = fopen(fname, "r");
+  fscanf_s(f, "%d", d);
+  return 1 / *d; // expected-warning {{Division by a tainted value, possibly zero}}
+}
+
+int vscanf(const char *format, ...);
+int testVscanf(int *d) {
+  char format[10];
+  scanf("%9s", format); // fake a tainted a file descriptor
+
+  vscanf(format, &d);
+  return 1 / *d; // expected-warning {{Division by a tainted value, possibly zero}}
+}
+
+int vfscanf(FILE *stream, const char *format, ...);
+int testVfscanf(const char *fname, int *d) {
+  FILE *f = fopen(fname, "r");
+  vfscanf(f, "%d", d);
+  return 1 / *d; // expected-warning {{Division by a tainted value, possibly zero}}
+}
+
+int fread(void *buffer, size_t size, size_t count, FILE *stream);
+int testFread(const char *fname, int *buffer, size_t size, size_t count) {
+  FILE *f = fopen(fname, "r");
+  size_t read = fread(buffer, size, count, f);
+
+  if (some_global_flag_to_branch_on) // just to have 2 branches, and assert 2 division by zero messages
+return 1 / *buffer;  // expected-warning {{Div