[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-04-17 Thread Tomasz Kuchta via Phabricator via cfe-commits
tkuchta added a comment.

Got it, thank you.
Please submit the patch for e-mail t.kuc...@samsung.com
I will prepare next reviews for the other functions.
Thank you!


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141389/new/

https://reviews.llvm.org/D141389

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-04-13 Thread Tomasz Kuchta via Phabricator via cfe-commits
tkuchta updated this revision to Diff 51.
tkuchta added a comment.

Thank you, I have uploaded a new diff with "&" fixed in the test function.

How should be proceed with the other functions once strsep is done? Should I 
open another review or add the code to the current patch?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141389/new/

https://reviews.llvm.org/D141389

Files:
  compiler-rt/lib/dfsan/dfsan_custom.cpp
  compiler-rt/lib/dfsan/done_abilist.txt
  compiler-rt/test/dfsan/custom.cpp

Index: compiler-rt/test/dfsan/custom.cpp
===
--- compiler-rt/test/dfsan/custom.cpp
+++ compiler-rt/test/dfsan/custom.cpp
@@ -1627,6 +1627,51 @@
 #endif
 }
 
+void test_strsep() {
+  char *s = strdup("Hello world/");
+  char *delim = strdup(" /");
+
+  char *p_s = s;
+  char *base = s;
+  char *p_delim = delim;
+
+  // taint delim bytes
+  dfsan_set_label(i_label, p_delim, strlen(p_delim));
+  // taint delim pointer
+  dfsan_set_label(j_label, _delim, sizeof(p_delim));
+  // taint the string data bytes
+  dfsan_set_label(k_label, s, 5);
+  // taint the string pointer
+  dfsan_set_label(m_label, _s, sizeof(p_s));
+
+  char *rv = strsep(_s, p_delim);
+  assert(rv == [0]);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_LABEL(rv, m_label);
+  ASSERT_READ_LABEL(rv, strlen(rv), k_label);
+#else
+  ASSERT_LABEL(rv, dfsan_union(dfsan_union(i_label, j_label),
+   dfsan_union(k_label, m_label)));
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, p_s);
+#endif
+
+  // taint the remaining string's pointer
+  char **pp_s = _s;
+  char **pp_s_base = pp_s;
+  dfsan_set_label(n_label, pp_s, sizeof(pp_s));
+
+  rv = strsep(pp_s, p_delim);
+
+  assert(rv == [6]);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_LABEL(rv, n_label);
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, *pp_s);
+#else
+  ASSERT_LABEL(rv, dfsan_union(dfsan_union(i_label, j_label), n_label));
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, *pp_s);
+#endif
+}
+
 void test_memchr() {
   char str1[] = "str1";
   dfsan_set_label(i_label, [3], 1);
@@ -2041,6 +2086,7 @@
   test_strncmp();
   test_strncpy();
   test_strpbrk();
+  test_strsep();
   test_strrchr();
   test_strstr();
   test_strtod();
Index: compiler-rt/lib/dfsan/done_abilist.txt
===
--- compiler-rt/lib/dfsan/done_abilist.txt
+++ compiler-rt/lib/dfsan/done_abilist.txt
@@ -283,6 +283,7 @@
 fun:strpbrk=custom
 fun:strrchr=custom
 fun:strstr=custom
+fun:strsep=custom
 
 # Functions which take action based on global state, such as running a callback
 # set by a separate function.
Index: compiler-rt/lib/dfsan/dfsan_custom.cpp
===
--- compiler-rt/lib/dfsan/dfsan_custom.cpp
+++ compiler-rt/lib/dfsan/dfsan_custom.cpp
@@ -204,6 +204,58 @@
   return const_cast(ret);
 }
 
+SANITIZER_INTERFACE_ATTRIBUTE char *__dfsw_strsep(char **s, const char *delim,
+  dfsan_label s_label,
+  dfsan_label delim_label,
+  dfsan_label *ret_label) {
+  dfsan_label base_label = dfsan_read_label(s, sizeof(*s));
+  char *base = *s;
+  char *res = strsep(s, delim);
+  if (res != *s) {
+char *token_start = res;
+int token_length = strlen(res);
+// the delimiter byte has been set to NULL
+dfsan_set_label(0, token_start + token_length, 1);
+  }
+
+  if (flags().strict_data_dependencies) {
+*ret_label = res ? base_label : 0;
+  } else {
+size_t s_bytes_read = (res ? strlen(res) : strlen(base)) + 1;
+*ret_label = dfsan_union(
+dfsan_union(base_label, dfsan_read_label(base, sizeof(s_bytes_read))),
+dfsan_union(dfsan_read_label(delim, strlen(delim) + 1),
+dfsan_union(s_label, delim_label)));
+  }
+
+  return res;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE char *__dfso_strsep(
+char **s, const char *delim, dfsan_label s_label, dfsan_label delim_label,
+dfsan_label *ret_label, dfsan_origin s_origin, dfsan_origin delim_origin,
+dfsan_origin *ret_origin) {
+  dfsan_origin base_origin = dfsan_read_origin_of_first_taint(s, sizeof(*s));
+  char *base = *s;
+  char *res = __dfsw_strsep(s, delim, s_label, delim_label, ret_label);
+  if (flags().strict_data_dependencies) {
+if (res)
+  *ret_origin = base_origin;
+  } else {
+if (*ret_label) {
+  if (base_origin) {
+*ret_origin = base_origin;
+  } else {
+dfsan_origin o =
+dfsan_read_origin_of_first_taint(delim, strlen(delim) + 1);
+*ret_origin = o ? o : (s_label ? s_origin : delim_origin);
+  }
+}
+  }
+
+  return res;
+}
+
 static int dfsan_memcmp_bcmp(const void *s1, const void *s2, size_t n,
  size_t *bytes_read) {
   const char *cs1 = (const char *) s1, *cs2 = (const char *) s2;
___

[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-04-03 Thread Tomasz Kuchta via Phabricator via cfe-commits
tkuchta updated this revision to Diff 510442.
tkuchta added a comment.

Applied the latest review comments for strsep


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141389/new/

https://reviews.llvm.org/D141389

Files:
  compiler-rt/lib/dfsan/dfsan_custom.cpp
  compiler-rt/lib/dfsan/done_abilist.txt
  compiler-rt/test/dfsan/custom.cpp

Index: compiler-rt/test/dfsan/custom.cpp
===
--- compiler-rt/test/dfsan/custom.cpp
+++ compiler-rt/test/dfsan/custom.cpp
@@ -1627,6 +1627,51 @@
 #endif
 }
 
+void test_strsep() {
+  char *s = strdup("Hello world/");
+  char *delim = strdup(" /");
+
+  char *p_s = s;
+  char *base = s;
+  char *p_delim = delim;
+
+  // taint delim bytes
+  dfsan_set_label(i_label, p_delim, strlen(p_delim));
+  // taint delim pointer
+  dfsan_set_label(j_label, _delim, sizeof(_delim));
+  // taint the string data bytes
+  dfsan_set_label(k_label, s, 5);
+  // taint the string pointer
+  dfsan_set_label(m_label, _s, sizeof(_s));
+
+  char *rv = strsep(_s, p_delim);
+  assert(rv == [0]);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_LABEL(rv, m_label);
+  ASSERT_READ_LABEL(rv, strlen(rv), k_label);
+#else
+  ASSERT_LABEL(rv, dfsan_union(dfsan_union(i_label, j_label),
+   dfsan_union(k_label, m_label)));
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, p_s);
+#endif
+
+  // taint the remaining string's pointer
+  char **pp_s = _s;
+  char **pp_s_base = pp_s;
+  dfsan_set_label(n_label, pp_s, sizeof(pp_s));
+
+  rv = strsep(pp_s, p_delim);
+
+  assert(rv == [6]);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_LABEL(rv, n_label);
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, *pp_s);
+#else
+  ASSERT_LABEL(rv, dfsan_union(dfsan_union(i_label, j_label), n_label));
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, *pp_s);
+#endif
+}
+
 void test_memchr() {
   char str1[] = "str1";
   dfsan_set_label(i_label, [3], 1);
@@ -2041,6 +2086,7 @@
   test_strncmp();
   test_strncpy();
   test_strpbrk();
+  test_strsep();
   test_strrchr();
   test_strstr();
   test_strtod();
Index: compiler-rt/lib/dfsan/done_abilist.txt
===
--- compiler-rt/lib/dfsan/done_abilist.txt
+++ compiler-rt/lib/dfsan/done_abilist.txt
@@ -283,6 +283,7 @@
 fun:strpbrk=custom
 fun:strrchr=custom
 fun:strstr=custom
+fun:strsep=custom
 
 # Functions which take action based on global state, such as running a callback
 # set by a separate function.
Index: compiler-rt/lib/dfsan/dfsan_custom.cpp
===
--- compiler-rt/lib/dfsan/dfsan_custom.cpp
+++ compiler-rt/lib/dfsan/dfsan_custom.cpp
@@ -204,6 +204,58 @@
   return const_cast(ret);
 }
 
+SANITIZER_INTERFACE_ATTRIBUTE char *__dfsw_strsep(char **s, const char *delim,
+  dfsan_label s_label,
+  dfsan_label delim_label,
+  dfsan_label *ret_label) {
+  dfsan_label base_label = dfsan_read_label(s, sizeof(*s));
+  char *base = *s;
+  char *res = strsep(s, delim);
+  if (res != *s) {
+char *token_start = res;
+int token_length = strlen(res);
+// the delimiter byte has been set to NULL
+dfsan_set_label(0, token_start + token_length, 1);
+  }
+
+  if (flags().strict_data_dependencies) {
+*ret_label = res ? base_label : 0;
+  } else {
+size_t s_bytes_read = (res ? strlen(res) : strlen(base)) + 1;
+*ret_label = dfsan_union(
+dfsan_union(base_label, dfsan_read_label(base, sizeof(s_bytes_read))),
+dfsan_union(dfsan_read_label(delim, strlen(delim) + 1),
+dfsan_union(s_label, delim_label)));
+  }
+
+  return res;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE char *__dfso_strsep(
+char **s, const char *delim, dfsan_label s_label, dfsan_label delim_label,
+dfsan_label *ret_label, dfsan_origin s_origin, dfsan_origin delim_origin,
+dfsan_origin *ret_origin) {
+  dfsan_origin base_origin = dfsan_read_origin_of_first_taint(s, sizeof(*s));
+  char *base = *s;
+  char *res = __dfsw_strsep(s, delim, s_label, delim_label, ret_label);
+  if (flags().strict_data_dependencies) {
+if (res)
+  *ret_origin = base_origin;
+  } else {
+if (*ret_label) {
+  if (base_origin) {
+*ret_origin = base_origin;
+  } else {
+dfsan_origin o =
+dfsan_read_origin_of_first_taint(delim, strlen(delim) + 1);
+*ret_origin = o ? o : (s_label ? s_origin : delim_origin);
+  }
+}
+  }
+
+  return res;
+}
+
 static int dfsan_memcmp_bcmp(const void *s1, const void *s2, size_t n,
  size_t *bytes_read) {
   const char *cs1 = (const char *) s1, *cs2 = (const char *) s2;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-03-31 Thread Tomasz Kuchta via Phabricator via cfe-commits
tkuchta added inline comments.



Comment at: compiler-rt/lib/dfsan/dfsan_custom.cpp:221
+  if (flags().strict_data_dependencies) {
+*ret_label = res ? dfsan_read_label(base, sizeof(base)) : 0;
+  } else {

browneee wrote:
> `base, sizeof(base)` does not make sense - the size does not correspond to 
> the pointer used.
> 
> Either
> ```
> dfsan_read_label(base, sizeof(*base))   // first byte pointed to by base
> dfsan_read_label(base, strlen(base)) // whole string pointed to by base
> dfsan_read_label(, sizeof(base))  // the base pointer
> ```
> 
> In this case I think we want the base pointer.
> 
> `dfsan_read_label(, sizeof(base))`  // the base pointer
> should be equivalent to doing
> `dfsan_label base_label = dfsan_read_label(s, sizeof(*s))`  up at the start 
> of the function, just after we declare base then use `base_label` here.
> 
> Lets go with the second option to avoid taking the address of a variable.
> 
> This is semantically equivalent to my first suggestion: 
> `dfsan_get_label(base) == dfsan_read_label(, sizeof(base)) == 
> base_label`.
> Sorry I didn't consider the other constraints (no dfsan_get_label in this 
> file because the pass is not run on this code; avoid taking address of 
> variable) and suggest this in the first place.
Thank you for your comments! Just to clarify:
If we have strict data dependencies we would like *ret_label to be related to 
the taints of the contents of the string, is that correct? Should we use the 
base_label there too? My understanding is that base_label represents a taint 
applied to the pointer, not to the data? 

In other words, would that be correct to:
1) use taints of the string data in the first case (strict dependencies) -> 
therefore no base_label there as it represents the taint of the pointer
2) use the taints of the string data + the taints of the pointer in the second 
case -> therefore using base_label there


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141389/new/

https://reviews.llvm.org/D141389

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-03-31 Thread Tomasz Kuchta via Phabricator via cfe-commits
tkuchta added a comment.

there is a strange build error which seems unrelated to my change - please let 
me know if that's an issue, I will try to rebase to newest master then


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141389/new/

https://reviews.llvm.org/D141389

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-03-31 Thread Tomasz Kuchta via Phabricator via cfe-commits
tkuchta updated this revision to Diff 510047.
tkuchta added a comment.

Updates after the review of strsep.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141389/new/

https://reviews.llvm.org/D141389

Files:
  compiler-rt/lib/dfsan/dfsan_custom.cpp
  compiler-rt/lib/dfsan/done_abilist.txt
  compiler-rt/test/dfsan/custom.cpp

Index: compiler-rt/test/dfsan/custom.cpp
===
--- compiler-rt/test/dfsan/custom.cpp
+++ compiler-rt/test/dfsan/custom.cpp
@@ -1627,6 +1627,50 @@
 #endif
 }
 
+void test_strsep() {
+  char *s = strdup("Hello world/");
+  char *delim = strdup(" /");
+
+  char *p_s = s;
+  char *base = s;
+  char *p_delim = delim;
+
+  // taint delim bytes
+  dfsan_set_label(i_label, p_delim, strlen(p_delim));
+  // taint delim pointer
+  dfsan_set_label(j_label, _delim, sizeof(_delim));
+  // taint the string data bytes
+  dfsan_set_label(k_label, s, 5);
+  // taint the string pointer
+  dfsan_set_label(m_label, _s, sizeof(_s));
+
+  char *rv = strsep(_s, p_delim);
+  assert(rv == [0]);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_LABEL(rv, k_label);
+  ASSERT_READ_LABEL(rv, strlen(rv), k_label);
+#else
+  ASSERT_LABEL(rv, dfsan_union(dfsan_union(i_label, j_label), k_label));
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, *s);
+#endif
+
+  // taint the remaining string's pointer
+  char **pp_s = _s;
+  char **pp_s_base = pp_s;
+  dfsan_set_label(n_label, _s, sizeof(_s));
+
+  rv = strsep(pp_s, p_delim);
+
+  assert(rv == [6]);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_ZERO_LABEL(rv);
+  ASSERT_ZERO_ORIGIN(rv);
+#else
+  ASSERT_LABEL(rv, dfsan_union(i_label, dfsan_union(j_label, n_label)));
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, *p_delim);
+#endif
+}
+
 void test_memchr() {
   char str1[] = "str1";
   dfsan_set_label(i_label, [3], 1);
@@ -2041,6 +2085,7 @@
   test_strncmp();
   test_strncpy();
   test_strpbrk();
+  test_strsep();
   test_strrchr();
   test_strstr();
   test_strtod();
Index: compiler-rt/lib/dfsan/done_abilist.txt
===
--- compiler-rt/lib/dfsan/done_abilist.txt
+++ compiler-rt/lib/dfsan/done_abilist.txt
@@ -283,6 +283,7 @@
 fun:strpbrk=custom
 fun:strrchr=custom
 fun:strstr=custom
+fun:strsep=custom
 
 # Functions which take action based on global state, such as running a callback
 # set by a separate function.
Index: compiler-rt/lib/dfsan/dfsan_custom.cpp
===
--- compiler-rt/lib/dfsan/dfsan_custom.cpp
+++ compiler-rt/lib/dfsan/dfsan_custom.cpp
@@ -204,6 +204,55 @@
   return const_cast(ret);
 }
 
+SANITIZER_INTERFACE_ATTRIBUTE char *__dfsw_strsep(char **s, const char *delim,
+  dfsan_label s_label,
+  dfsan_label delim_label,
+  dfsan_label *ret_label) {
+  char *base = *s;
+  char *res = strsep(s, delim);
+  if (res != *s) {
+char *token_start = res;
+int token_length = strlen(res);
+// the delimiter byte has been set to NULL
+dfsan_set_label(0, token_start + token_length, 1);
+  }
+
+  if (flags().strict_data_dependencies) {
+*ret_label = res ? dfsan_read_label(base, sizeof(base)) : 0;
+  } else {
+*ret_label =
+dfsan_union(dfsan_read_label(base, sizeof(base)),
+dfsan_union(dfsan_read_label(delim, strlen(delim) + 1),
+dfsan_union(s_label, delim_label)));
+  }
+
+  return res;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE char *__dfso_strsep(
+char **s, const char *delim, dfsan_label s_label, dfsan_label delim_label,
+dfsan_label *ret_label, dfsan_origin s_origin, dfsan_origin delim_origin,
+dfsan_origin *ret_origin) {
+  char *base = *s;
+  char *res = __dfsw_strsep(s, delim, s_label, delim_label, ret_label);
+  if (flags().strict_data_dependencies) {
+if (res)
+  *ret_origin = dfsan_read_origin_of_first_taint(base, strlen(base));
+  } else {
+if (*ret_label) {
+  dfsan_origin o = dfsan_read_origin_of_first_taint(base, strlen(base));
+  if (o) {
+*ret_origin = o;
+  } else {
+o = dfsan_read_origin_of_first_taint(delim, strlen(delim) + 1);
+*ret_origin = o ? o : (s_label ? s_origin : delim_origin);
+  }
+}
+  }
+
+  return res;
+}
+
 static int dfsan_memcmp_bcmp(const void *s1, const void *s2, size_t n,
  size_t *bytes_read) {
   const char *cs1 = (const char *) s1, *cs2 = (const char *) s2;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-03-31 Thread Tomasz Kuchta via Phabricator via cfe-commits
tkuchta updated this revision to Diff 510041.
tkuchta marked an inline comment as done.
tkuchta added a comment.

Hello, I applied the review comments for strsep.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141389/new/

https://reviews.llvm.org/D141389

Files:
  compiler-rt/lib/dfsan/dfsan_custom.cpp
  compiler-rt/test/dfsan/custom.cpp

Index: compiler-rt/test/dfsan/custom.cpp
===
--- compiler-rt/test/dfsan/custom.cpp
+++ compiler-rt/test/dfsan/custom.cpp
@@ -1636,80 +1636,38 @@
   char *p_delim = delim;
 
   // taint delim bytes
-  dfsan_set_label(n_label, p_delim, strlen(p_delim));
+  dfsan_set_label(i_label, p_delim, strlen(p_delim));
   // taint delim pointer
-  dfsan_set_label(i_label, _delim, sizeof(_delim));
+  dfsan_set_label(j_label, _delim, sizeof(_delim));
+  // taint the string data bytes
+  dfsan_set_label(k_label, s, 5);
+  // taint the string pointer
+  dfsan_set_label(m_label, _s, sizeof(_s));
 
   char *rv = strsep(_s, p_delim);
   assert(rv == [0]);
 #ifdef STRICT_DATA_DEPENDENCIES
-  ASSERT_ZERO_LABEL(rv);
-  ASSERT_READ_ZERO_LABEL(rv, strlen(rv));
+  ASSERT_LABEL(rv, k_label);
+  ASSERT_READ_LABEL(rv, strlen(rv), k_label);
 #else
-  ASSERT_LABEL(rv, dfsan_union(i_label, n_label));
-  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, *p_delim);
+  ASSERT_LABEL(rv, dfsan_union(dfsan_union(i_label, j_label), k_label));
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, *s);
 #endif
 
-  // taint the remaining string's bytes
-  dfsan_set_label(m_label, p_s, strlen(p_s));
   // taint the remaining string's pointer
   char **pp_s = _s;
-  dfsan_set_label(j_label, _s, sizeof(_s));
+  char **pp_s_base = pp_s;
+  dfsan_set_label(n_label, _s, sizeof(_s));
 
   rv = strsep(pp_s, p_delim);
 
   assert(rv == [6]);
-#ifdef STRICT_DATA_DEPENDENCIES
-  ASSERT_READ_LABEL(rv, strlen(rv), m_label);
-  ASSERT_LABEL(rv, j_label);
-  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, pp_s);
-#else
-  ASSERT_LABEL(
-  rv, dfsan_union(j_label,
-  dfsan_union(i_label, dfsan_union(m_label, n_label;
-  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, base[6]);
-#endif
-
-  free(s);
-  s = strdup("Hello world/");
-  base = s;
-  free(delim);
-  delim = strdup(" /");
-  p_delim = delim;
-
-  dfsan_set_label(j_label, [0], 1);
-
-  rv = strsep(, delim);
-  assert(rv == [0]);
 #ifdef STRICT_DATA_DEPENDENCIES
   ASSERT_ZERO_LABEL(rv);
+  ASSERT_ZERO_ORIGIN(rv);
 #else
-  ASSERT_LABEL(rv, j_label);
-  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, delim[1]);
-#endif
-
-  char *ps = s;
-  pp_s = 
-  dfsan_set_label(i_label, _s, sizeof(_s));
-  dfsan_set_label(i_label, ps, strlen(ps));
-  dfsan_set_label(dfsan_union(j_label, dfsan_read_label(ps, strlen(ps))), ps,
-  strlen(ps));
-  rv = strsep(pp_s, " /");
-  assert(rv == [6]);
-#ifdef STRICT_DATA_DEPENDENCIES
-  ASSERT_LABEL(rv, i_label);
-#else
-  ASSERT_LABEL(rv, i_j_label);
-  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, base[6]);
-#endif
-  rv = strsep(, " /");
-  assert(strlen(rv) == 0);
-#ifdef STRICT_DATA_DEPENDENCIES
-  ASSERT_ZERO_LABEL(ps);
-#else
-  ASSERT_ZERO_LABEL(rv);
-  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, 0);
-
+  ASSERT_LABEL(rv, dfsan_union(i_label, dfsan_union(j_label, n_label)));
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, *p_delim);
 #endif
 }
 
Index: compiler-rt/lib/dfsan/dfsan_custom.cpp
===
--- compiler-rt/lib/dfsan/dfsan_custom.cpp
+++ compiler-rt/lib/dfsan/dfsan_custom.cpp
@@ -218,11 +218,10 @@
   }
 
   if (flags().strict_data_dependencies) {
-*ret_label = res ? s_label : 0;
+*ret_label = res ? dfsan_read_label(base, sizeof(base)) : 0;
   } else {
-size_t s_bytes_read = (res ? strlen(res) : strlen(base)) + 1;
 *ret_label =
-dfsan_union(dfsan_read_label(base, s_bytes_read),
+dfsan_union(dfsan_read_label(base, sizeof(base)),
 dfsan_union(dfsan_read_label(delim, strlen(delim) + 1),
 dfsan_union(s_label, delim_label)));
   }
@@ -238,11 +237,10 @@
   char *res = __dfsw_strsep(s, delim, s_label, delim_label, ret_label);
   if (flags().strict_data_dependencies) {
 if (res)
-  *ret_origin = s_origin;
+  *ret_origin = dfsan_read_origin_of_first_taint(base, strlen(base));
   } else {
 if (*ret_label) {
-  size_t s_bytes_read = (res ? strlen(res) : strlen(base)) + 1;
-  dfsan_origin o = dfsan_read_origin_of_first_taint(base, s_bytes_read);
+  dfsan_origin o = dfsan_read_origin_of_first_taint(base, strlen(base));
   if (o) {
 *ret_origin = o;
   } else {
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-03-01 Thread Tomasz Kuchta via Phabricator via cfe-commits
tkuchta added inline comments.



Comment at: compiler-rt/lib/dfsan/dfsan_custom.cpp:221
+  if (flags().strict_data_dependencies) {
+*ret_label = res ? s_label : 0;
+  } else {

browneee wrote:
> When `res != NULL`, then `res` is derived from `*s`, not from `s`.
> 
> e.g.
> 
> ```
> *ret_label = res ? dfsan_get_label(base) : 0;
> ```
Apologies for a delay.

I came across some difficulty with using dfsan_get_label inside the 
dfsan_custom.cpp file.
It seems that including the dfsan_interface.h header there, which would be 
needed for dfsan_get_label, causes other conflicts and build errors.
Would there be another way to use that function inside dfsan_custom.cpp? 


Repository:
  rCRT Compiler Runtime

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141389/new/

https://reviews.llvm.org/D141389

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-02-06 Thread Tomasz Kuchta via Phabricator via cfe-commits
tkuchta updated this revision to Diff 495268.
tkuchta added a comment.

Please find the patch for strsep only attached


Repository:
  rCRT Compiler Runtime

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141389/new/

https://reviews.llvm.org/D141389

Files:
  compiler-rt/lib/dfsan/dfsan_custom.cpp
  compiler-rt/lib/dfsan/done_abilist.txt
  compiler-rt/test/dfsan/custom.cpp

Index: compiler-rt/test/dfsan/custom.cpp
===
--- compiler-rt/test/dfsan/custom.cpp
+++ compiler-rt/test/dfsan/custom.cpp
@@ -1627,6 +1627,92 @@
 #endif
 }
 
+void test_strsep() {
+  char *s = strdup("Hello world/");
+  char *delim = strdup(" /");
+
+  char *p_s = s;
+  char *base = s;
+  char *p_delim = delim;
+
+  // taint delim bytes
+  dfsan_set_label(n_label, p_delim, strlen(p_delim));
+  // taint delim pointer
+  dfsan_set_label(i_label, _delim, sizeof(_delim));
+
+  char *rv = strsep(_s, p_delim);
+  assert(rv == [0]);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_ZERO_LABEL(rv);
+  ASSERT_READ_ZERO_LABEL(rv, strlen(rv));
+#else
+  ASSERT_LABEL(rv, dfsan_union(i_label, n_label));
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, *p_delim);
+#endif
+
+  // taint the remaining string's bytes
+  dfsan_set_label(m_label, p_s, strlen(p_s));
+  // taint the remaining string's pointer
+  char **pp_s = _s;
+  dfsan_set_label(j_label, _s, sizeof(_s));
+
+  rv = strsep(pp_s, p_delim);
+
+  assert(rv == [6]);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_READ_LABEL(rv, strlen(rv), m_label);
+  ASSERT_LABEL(rv, j_label);
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, pp_s);
+#else
+  ASSERT_LABEL(
+  rv, dfsan_union(j_label,
+  dfsan_union(i_label, dfsan_union(m_label, n_label;
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, base[6]);
+#endif
+
+  free(s);
+  s = strdup("Hello world/");
+  base = s;
+  free(delim);
+  delim = strdup(" /");
+  p_delim = delim;
+
+  dfsan_set_label(j_label, [0], 1);
+
+  rv = strsep(, delim);
+  assert(rv == [0]);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_ZERO_LABEL(rv);
+#else
+  ASSERT_LABEL(rv, j_label);
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, delim[1]);
+#endif
+
+  char *ps = s;
+  pp_s = 
+  dfsan_set_label(i_label, _s, sizeof(_s));
+  dfsan_set_label(i_label, ps, strlen(ps));
+  dfsan_set_label(dfsan_union(j_label, dfsan_read_label(ps, strlen(ps))), ps,
+  strlen(ps));
+  rv = strsep(pp_s, " /");
+  assert(rv == [6]);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_LABEL(rv, i_label);
+#else
+  ASSERT_LABEL(rv, i_j_label);
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, base[6]);
+#endif
+  rv = strsep(, " /");
+  assert(strlen(rv) == 0);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_ZERO_LABEL(ps);
+#else
+  ASSERT_ZERO_LABEL(rv);
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, 0);
+
+#endif
+}
+
 void test_memchr() {
   char str1[] = "str1";
   dfsan_set_label(i_label, [3], 1);
@@ -2041,6 +2127,7 @@
   test_strncmp();
   test_strncpy();
   test_strpbrk();
+  test_strsep();
   test_strrchr();
   test_strstr();
   test_strtod();
Index: compiler-rt/lib/dfsan/done_abilist.txt
===
--- compiler-rt/lib/dfsan/done_abilist.txt
+++ compiler-rt/lib/dfsan/done_abilist.txt
@@ -283,6 +283,7 @@
 fun:strpbrk=custom
 fun:strrchr=custom
 fun:strstr=custom
+fun:strsep=custom
 
 # Functions which take action based on global state, such as running a callback
 # set by a separate function.
Index: compiler-rt/lib/dfsan/dfsan_custom.cpp
===
--- compiler-rt/lib/dfsan/dfsan_custom.cpp
+++ compiler-rt/lib/dfsan/dfsan_custom.cpp
@@ -204,6 +204,57 @@
   return const_cast(ret);
 }
 
+SANITIZER_INTERFACE_ATTRIBUTE char *__dfsw_strsep(char **s, const char *delim,
+  dfsan_label s_label,
+  dfsan_label delim_label,
+  dfsan_label *ret_label) {
+  char *base = *s;
+  char *res = strsep(s, delim);
+  if (res != *s) {
+char *token_start = res;
+int token_length = strlen(res);
+// the delimiter byte has been set to NULL
+dfsan_set_label(0, token_start + token_length, 1);
+  }
+
+  if (flags().strict_data_dependencies) {
+*ret_label = res ? s_label : 0;
+  } else {
+size_t s_bytes_read = (res ? strlen(res) : strlen(base)) + 1;
+*ret_label =
+dfsan_union(dfsan_read_label(base, s_bytes_read),
+dfsan_union(dfsan_read_label(delim, strlen(delim) + 1),
+dfsan_union(s_label, delim_label)));
+  }
+
+  return res;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE char *__dfso_strsep(
+char **s, const char *delim, dfsan_label s_label, dfsan_label delim_label,
+dfsan_label *ret_label, dfsan_origin s_origin, dfsan_origin delim_origin,
+dfsan_origin *ret_origin) {
+  char *base = *s;
+  char *res = __dfsw_strsep(s, delim, s_label, delim_label, ret_label);
+  if 

[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-02-04 Thread Tomasz Kuchta via Phabricator via cfe-commits
tkuchta added inline comments.



Comment at: compiler-rt/lib/dfsan/dfsan_custom.cpp:213
+  char *res = strsep(s, delim);
+  s_label = dfsan_read_label(base, strlen(base));
+  if (res && (res != base)) {

browneee wrote:
> tkuchta wrote:
> > browneee wrote:
> > > tkuchta wrote:
> > > > browneee wrote:
> > > > > tkuchta wrote:
> > > > > > browneee wrote:
> > > > > > > The `s_label` represents the taint label for `s` (the pointer).
> > > > > > > 
> > > > > > > This line would clobber the taint label of the pointer (`s`) with 
> > > > > > > a taint label from `s[0][0..n]`.
> > > > > > > 
> > > > > > > I think this line should be deleted.
> > > > > > Agree, s_label represents the taint associated with the **s 
> > > > > > pointer. However I am now wondering if that is the taint wich we 
> > > > > > would like to return.
> > > > > > For example, if we have
> > > > > > if (flags().strict_data_dependencies) {
> > > > > > *ret_label = res ? s_label : 0;
> > > > > > 
> > > > > > We would taint the return value with the value of the pointer, not 
> > > > > > the data. It means that if we operate on a string for which the 
> > > > > > characters are tainted, but the pointer itself isn't, we are likely 
> > > > > > going to return label 0. My understanding was that we are more 
> > > > > > concerned with the taint of the data, not the pointer, am I missing 
> > > > > > something?
> > > > > > 
> > > > > Yes, we are usually more concerned with the taint of the data, not 
> > > > > the pointer.
> > > > > 
> > > > > With strict dependencies:
> > > > > // If the input pointer is tainted, the output pointer would be 
> > > > > tainted (because it is derived from the input pointer - maybe the 
> > > > > same value).
> > > > > taint(s[0]) == dfsan_read_label(s, sizeof(s)) > taint(ret) == 
> > > > > ret_label[0]
> > > > > 
> > > > > // If the input data is tainted, the output data would be tainted 
> > > > > (because it is derived from the input data).
> > > > > taint(s[0][0]) == MEM_TO_SHADOW(s[0])[0] > taint(ret[0]) == 
> > > > > MEM_TO_SHADOW(ret)[0]
> > > > > 
> > > > > Because s[0] == ret  (or ret==null), (for the non-null case) the 
> > > > > output shadow bytes are the same bytes as input shadow bytes and so 
> > > > > these taint labels for the string data in shadow memory do not need 
> > > > > to be explicitly propagated in this function. 
> > > > > 
> > > > > I think the only case actually changing/copying string data is 
> > > > > writing a delimiter byte to NULL, which you handled.
> > > > I am working on the changes and I came across a strange behavior that I 
> > > > would like to ask about.
> > > > 
> > > > It turned out that if we do
> > > > 
> > > > char *s = " ... ";
> > > > dfsan_set_label(label, _s, sizeof(_s));
> > > > 
> > > > Then, the s_label within the handler is 0, not "label". This is 
> > > > unexpected, as we would like the pointer itself to be labeled here.
> > > > I believe this has something to do with the fact that the input string 
> > > > in strsep is a double pointer. For example this works as expected for 
> > > > the delimiter string, which is a single pointer. 
> > > > It's either I'm doing something incorrectly or there is some issue with 
> > > > propagating labels for double pointers.
> > > > Have you perhaps come across this behavior before?
> > > I'm not sure what p_s is in your example. Could you provide a more 
> > > complete example?
> > > (and maybe all in one function, not half in __dfsw_strsep and half in 
> > > another function)
> > > 
> > > Here is an example showing taint labels at different levels of 
> > > indirection:
> > > 
> > > ```
> > > #include 
> > > #include 
> > > #include 
> > > 
> > > int main(void) {
> > >   char *s = " ... ";
> > >   char **p_s = 
> > >   char ***pp_s = _s;
> > > 
> > >   dfsan_label i_label = 1 << 0;
> > >   dfsan_label j_label = 1 << 1;
> > >   dfsan_label k_label = 1 << 2;
> > >   dfsan_label m_label = 1 << 3;
> > > 
> > >   // string data
> > >   dfsan_set_label(i_label, s, strlen(s));
> > >   // pointer s
> > >   dfsan_set_label(j_label, , sizeof(s));
> > >   // pointer to pointer s
> > >   dfsan_set_label(k_label, _s, sizeof(p_s));
> > >   // pointer to pointer to pointer s
> > >   dfsan_set_label(m_label, _s, sizeof(pp_s));
> > > 
> > >   assert(pp_s[0][0] == s);
> > > 
> > >   // string data
> > >   assert(dfsan_read_label(s, strlen(s)) == i_label);
> > >   // pointer s
> > >   assert(dfsan_read_label(, sizeof(s)) == j_label);
> > >   // pointer to pointer s
> > >   assert(dfsan_read_label(_s, sizeof(p_s)) == k_label);
> > >   // pointer to pointer to pointer s
> > >   assert(dfsan_read_label(_s, sizeof(pp_s)) == m_label);
> > > 
> > >   return 0;
> > > }
> > > ```
> > Hello,
> > 
> > Thank you for the comment.
> > 
> > I should have provided a more complete example.
> > What I meant is a behavior I've found while working on the tests.
> > In the test file I have something like that:
> > 
> 

[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-02-02 Thread Tomasz Kuchta via Phabricator via cfe-commits
tkuchta added inline comments.



Comment at: compiler-rt/lib/dfsan/dfsan_custom.cpp:213
+  char *res = strsep(s, delim);
+  s_label = dfsan_read_label(base, strlen(base));
+  if (res && (res != base)) {

browneee wrote:
> tkuchta wrote:
> > browneee wrote:
> > > tkuchta wrote:
> > > > browneee wrote:
> > > > > The `s_label` represents the taint label for `s` (the pointer).
> > > > > 
> > > > > This line would clobber the taint label of the pointer (`s`) with a 
> > > > > taint label from `s[0][0..n]`.
> > > > > 
> > > > > I think this line should be deleted.
> > > > Agree, s_label represents the taint associated with the **s pointer. 
> > > > However I am now wondering if that is the taint wich we would like to 
> > > > return.
> > > > For example, if we have
> > > > if (flags().strict_data_dependencies) {
> > > > *ret_label = res ? s_label : 0;
> > > > 
> > > > We would taint the return value with the value of the pointer, not the 
> > > > data. It means that if we operate on a string for which the characters 
> > > > are tainted, but the pointer itself isn't, we are likely going to 
> > > > return label 0. My understanding was that we are more concerned with 
> > > > the taint of the data, not the pointer, am I missing something?
> > > > 
> > > Yes, we are usually more concerned with the taint of the data, not the 
> > > pointer.
> > > 
> > > With strict dependencies:
> > > // If the input pointer is tainted, the output pointer would be tainted 
> > > (because it is derived from the input pointer - maybe the same value).
> > > taint(s[0]) == dfsan_read_label(s, sizeof(s)) > taint(ret) == 
> > > ret_label[0]
> > > 
> > > // If the input data is tainted, the output data would be tainted 
> > > (because it is derived from the input data).
> > > taint(s[0][0]) == MEM_TO_SHADOW(s[0])[0] > taint(ret[0]) == 
> > > MEM_TO_SHADOW(ret)[0]
> > > 
> > > Because s[0] == ret  (or ret==null), (for the non-null case) the output 
> > > shadow bytes are the same bytes as input shadow bytes and so these taint 
> > > labels for the string data in shadow memory do not need to be explicitly 
> > > propagated in this function. 
> > > 
> > > I think the only case actually changing/copying string data is writing a 
> > > delimiter byte to NULL, which you handled.
> > I am working on the changes and I came across a strange behavior that I 
> > would like to ask about.
> > 
> > It turned out that if we do
> > 
> > char *s = " ... ";
> > dfsan_set_label(label, _s, sizeof(_s));
> > 
> > Then, the s_label within the handler is 0, not "label". This is unexpected, 
> > as we would like the pointer itself to be labeled here.
> > I believe this has something to do with the fact that the input string in 
> > strsep is a double pointer. For example this works as expected for the 
> > delimiter string, which is a single pointer. 
> > It's either I'm doing something incorrectly or there is some issue with 
> > propagating labels for double pointers.
> > Have you perhaps come across this behavior before?
> I'm not sure what p_s is in your example. Could you provide a more complete 
> example?
> (and maybe all in one function, not half in __dfsw_strsep and half in another 
> function)
> 
> Here is an example showing taint labels at different levels of indirection:
> 
> ```
> #include 
> #include 
> #include 
> 
> int main(void) {
>   char *s = " ... ";
>   char **p_s = 
>   char ***pp_s = _s;
> 
>   dfsan_label i_label = 1 << 0;
>   dfsan_label j_label = 1 << 1;
>   dfsan_label k_label = 1 << 2;
>   dfsan_label m_label = 1 << 3;
> 
>   // string data
>   dfsan_set_label(i_label, s, strlen(s));
>   // pointer s
>   dfsan_set_label(j_label, , sizeof(s));
>   // pointer to pointer s
>   dfsan_set_label(k_label, _s, sizeof(p_s));
>   // pointer to pointer to pointer s
>   dfsan_set_label(m_label, _s, sizeof(pp_s));
> 
>   assert(pp_s[0][0] == s);
> 
>   // string data
>   assert(dfsan_read_label(s, strlen(s)) == i_label);
>   // pointer s
>   assert(dfsan_read_label(, sizeof(s)) == j_label);
>   // pointer to pointer s
>   assert(dfsan_read_label(_s, sizeof(p_s)) == k_label);
>   // pointer to pointer to pointer s
>   assert(dfsan_read_label(_s, sizeof(pp_s)) == m_label);
> 
>   return 0;
> }
> ```
Hello,

Thank you for the comment.

I should have provided a more complete example.
What I meant is a behavior I've found while working on the tests.
In the test file I have something like that:

```
char *s = "Hello world/";
char *delim = " /";
dfsan_set_label(label, , sizeof());
char *rv = strep(, delim);
 ```

If I get this right, the line 
```
dfsan_set_label(label, , sizeof());

``` 
should have applied a taint to the `s` pointer.
However, when inside `strsep`, if we check the `s_label`, it's `0`, not `label`

```
SANITIZER_INTERFACE_ATTRIBUTE char *__dfsw_strsep(char **s, const char *delim,
  dfsan_label s_label,
   

[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-01-30 Thread Tomasz Kuchta via Phabricator via cfe-commits
tkuchta added inline comments.



Comment at: compiler-rt/lib/dfsan/dfsan_custom.cpp:213
+  char *res = strsep(s, delim);
+  s_label = dfsan_read_label(base, strlen(base));
+  if (res && (res != base)) {

browneee wrote:
> tkuchta wrote:
> > browneee wrote:
> > > The `s_label` represents the taint label for `s` (the pointer).
> > > 
> > > This line would clobber the taint label of the pointer (`s`) with a taint 
> > > label from `s[0][0..n]`.
> > > 
> > > I think this line should be deleted.
> > Agree, s_label represents the taint associated with the **s pointer. 
> > However I am now wondering if that is the taint wich we would like to 
> > return.
> > For example, if we have
> > if (flags().strict_data_dependencies) {
> > *ret_label = res ? s_label : 0;
> > 
> > We would taint the return value with the value of the pointer, not the 
> > data. It means that if we operate on a string for which the characters are 
> > tainted, but the pointer itself isn't, we are likely going to return label 
> > 0. My understanding was that we are more concerned with the taint of the 
> > data, not the pointer, am I missing something?
> > 
> Yes, we are usually more concerned with the taint of the data, not the 
> pointer.
> 
> With strict dependencies:
> // If the input pointer is tainted, the output pointer would be tainted 
> (because it is derived from the input pointer - maybe the same value).
> taint(s[0]) == dfsan_read_label(s, sizeof(s)) > taint(ret) == ret_label[0]
> 
> // If the input data is tainted, the output data would be tainted (because it 
> is derived from the input data).
> taint(s[0][0]) == MEM_TO_SHADOW(s[0])[0] > taint(ret[0]) == 
> MEM_TO_SHADOW(ret)[0]
> 
> Because s[0] == ret  (or ret==null), (for the non-null case) the output 
> shadow bytes are the same bytes as input shadow bytes and so these taint 
> labels for the string data in shadow memory do not need to be explicitly 
> propagated in this function. 
> 
> I think the only case actually changing/copying string data is writing a 
> delimiter byte to NULL, which you handled.
I am working on the changes and I came across a strange behavior that I would 
like to ask about.

It turned out that if we do

char *s = " ... ";
dfsan_set_label(label, _s, sizeof(_s));

Then, the s_label within the handler is 0, not "label". This is unexpected, as 
we would like the pointer itself to be labeled here.
I believe this has something to do with the fact that the input string in 
strsep is a double pointer. For example this works as expected for the 
delimiter string, which is a single pointer. 
It's either I'm doing something incorrectly or there is some issue with 
propagating labels for double pointers.
Have you perhaps come across this behavior before?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141389/new/

https://reviews.llvm.org/D141389

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-01-16 Thread Tomasz Kuchta via Phabricator via cfe-commits
tkuchta added inline comments.



Comment at: compiler-rt/lib/dfsan/dfsan_custom.cpp:213
+  char *res = strsep(s, delim);
+  s_label = dfsan_read_label(base, strlen(base));
+  if (res && (res != base)) {

browneee wrote:
> The `s_label` represents the taint label for `s` (the pointer).
> 
> This line would clobber the taint label of the pointer (`s`) with a taint 
> label from `s[0][0..n]`.
> 
> I think this line should be deleted.
Agree, s_label represents the taint associated with the **s pointer. However I 
am now wondering if that is the taint wich we would like to return.
For example, if we have
if (flags().strict_data_dependencies) {
*ret_label = res ? s_label : 0;

We would taint the return value with the value of the pointer, not the data. It 
means that if we operate on a string for which the characters are tainted, but 
the pointer itself isn't, we are likely going to return label 0. My 
understanding was that we are more concerned with the taint of the data, not 
the pointer, am I missing something?




Comment at: compiler-rt/lib/dfsan/dfsan_custom.cpp:214
+  s_label = dfsan_read_label(base, strlen(base));
+  if (res && (res != base)) {
+char *token_start = res;

browneee wrote:
> I think `res && (res != base)` is never true.
> 
> Checking an implementation of strsep 
> (http://git.musl-libc.org/cgit/musl/tree/src/string/strsep.c)
> `res` can either be `NULL` or the same value as `base`.
> 
> I think this line should be `(res != *s)`. This is different because `*s` may 
> be changed by the call to `strsep` just above.
good point, my bad


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141389/new/

https://reviews.llvm.org/D141389

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-01-11 Thread Tomasz Kuchta via Phabricator via cfe-commits
tkuchta added a comment.

Hello browneee,
Thank you very much for the comments! I will take a closer look at the issue 
you pointed out.
Do you prefer me to send a couple of diffs (one per each function) at once or 
one by one?

Kind regards,


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141389/new/

https://reviews.llvm.org/D141389

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-01-10 Thread Tomasz Kuchta via Phabricator via cfe-commits
tkuchta updated this revision to Diff 487936.
tkuchta added a comment.

applied newer version of clang-format


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D141389/new/

https://reviews.llvm.org/D141389

Files:
  compiler-rt/lib/dfsan/dfsan_custom.cpp
  compiler-rt/lib/dfsan/done_abilist.txt
  compiler-rt/test/dfsan/custom.cpp

Index: compiler-rt/test/dfsan/custom.cpp
===
--- compiler-rt/test/dfsan/custom.cpp
+++ compiler-rt/test/dfsan/custom.cpp
@@ -364,6 +364,47 @@
   ASSERT_LABEL(dst[11], j_label);
 }
 
+void test_strncat() {
+  char src[] = "world";
+  int volatile x = 0; // buffer to ensure src and dst do not share origins
+  (void)x;
+  char dst[] = "hello \0";
+  int volatile y = 0; // buffer to ensure dst and p do not share origins
+  (void)y;
+  char *p = dst;
+  dfsan_set_label(k_label, , sizeof(p));
+  dfsan_set_label(i_label, src, sizeof(src));
+  dfsan_set_label(j_label, dst, sizeof(dst));
+  dfsan_origin dst_o = dfsan_get_origin((long)dst[0]);
+  (void)dst_o;
+  char *ret = strncat(p, src, strlen(src));
+  ASSERT_LABEL(ret, k_label);
+  ASSERT_EQ_ORIGIN(ret, p);
+  assert(ret == dst);
+  assert(strcmp(src, dst + 6) == 0);
+  // Origins are assigned for every 4 contiguous 4-aligned bytes. After
+  // appending src to dst, origins of src can overwrite origins of dst if their
+  // application adddresses are within [start_aligned_down, end_aligned_up).
+  // Other origins are not changed.
+  char *start_aligned_down = (char *)(((size_t)(dst + 6)) & ~3UL);
+  char *end_aligned_up = (char *)(((size_t)(dst + 11 + 4)) & ~3UL);
+  for (int i = 0; i < 12; ++i) {
+if (dst + i < start_aligned_down || dst + i >= end_aligned_up) {
+  ASSERT_INIT_ORIGIN([i], dst_o);
+} else {
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN([i], src[0]);
+}
+  }
+  for (int i = 0; i < 6; ++i) {
+ASSERT_LABEL(dst[i], j_label);
+  }
+  for (int i = 6; i < strlen(dst); ++i) {
+ASSERT_LABEL(dst[i], i_label);
+assert(dfsan_get_label(dst[i]) == dfsan_get_label(src[i - 6]));
+  }
+  ASSERT_LABEL(dst[11], j_label);
+}
+
 void test_strlen() {
   char str1[] = "str1";
   dfsan_set_label(i_label, [3], 1);
@@ -378,6 +419,20 @@
 #endif
 }
 
+void test_strnlen() {
+  char str1[] = "str1";
+  dfsan_set_label(i_label, [3], 1);
+
+  int rv = strnlen(str1, 4);
+  assert(rv == 4);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_ZERO_LABEL(rv);
+#else
+  ASSERT_LABEL(rv, i_label);
+  ASSERT_EQ_ORIGIN(rv, str1[3]);
+#endif
+}
+
 void test_strdup() {
   char str1[] = "str1";
   dfsan_set_label(i_label, [3], 1);
@@ -1627,6 +1682,77 @@
 #endif
 }
 
+void test_strsep() {
+  char *s = strdup("Hello world/");
+  char *delim = strdup(" /");
+
+  char *p_s = s;
+  char *base = s;
+  char *p_delim = delim;
+
+  dfsan_set_label(n_label, p_delim, sizeof(p_delim));
+
+  char *rv = strsep(_s, p_delim);
+  assert(rv == [0]);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_ZERO_LABEL(rv);
+#else
+  ASSERT_LABEL(rv, n_label);
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, *p_delim);
+#endif
+
+  dfsan_set_label(m_label, p_s, sizeof(p_s));
+  rv = strsep(_s, p_delim);
+
+  assert(rv == [6]);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_LABEL(rv, m_label);
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, base[6]);
+#else
+  ASSERT_LABEL(rv, dfsan_union(m_label, n_label));
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, base[6]);
+#endif
+
+  free(s);
+  s = strdup("Hello world/");
+  base = s;
+  free(delim);
+  delim = strdup(" /");
+  p_delim = delim;
+  dfsan_set_label(i_label, [7], 1);
+  dfsan_set_label(j_label, [0], 1);
+
+  rv = strsep(, delim);
+  assert(rv == [0]);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_ZERO_LABEL(rv);
+#else
+  ASSERT_LABEL(rv, j_label);
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, delim[1]);
+#endif
+
+  char *ps = s;
+  dfsan_set_label(dfsan_union(j_label, dfsan_read_label(ps, strlen(ps))), ps,
+  strlen(ps));
+  rv = strsep(, " /");
+  assert(rv == [6]);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_LABEL(rv, i_j_label);
+#else
+  ASSERT_LABEL(rv, i_j_label);
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, base[6]);
+#endif
+  rv = strsep(, " /");
+  assert(strlen(rv) == 0);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_ZERO_LABEL(ps);
+#else
+  ASSERT_ZERO_LABEL(rv);
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, 0);
+
+#endif
+}
+
 void test_memchr() {
   char str1[] = "str1";
   dfsan_set_label(i_label, [3], 1);
@@ -1968,6 +2094,138 @@
   ASSERT_LABEL(r, 0);
 }
 
+template 
+void test_sscanf_chunk(T expected, const char *format, const char *input) {
+  char padded_input[512];
+  strcpy(padded_input, "foo ");
+  strcat(padded_input, input);
+  strcat(padded_input, " bar");
+
+  char padded_format[512];
+  strcpy(padded_format, "foo ");
+  strcat(padded_format, format);
+  strcat(padded_format, " bar");
+
+  // Non labelled arg.
+  T arg;
+  memset(, 0, sizeof(arg));
+  sscanf(padded_input, padded_format, );
+  assert(arg == expected);
+  ASSERT_READ_LABEL(, sizeof(arg), 0);
+
+  // Labelled arg.
+  memset(, 0, 

[PATCH] D141389: [DFSAN] Add support for strnlen, strncat, strsep, sscanf and _tolower

2023-01-10 Thread Tomasz Kuchta via Phabricator via cfe-commits
tkuchta created this revision.
tkuchta added reviewers: vitalybuka, glider.
tkuchta added a project: Sanitizers.
Herald added a subscriber: Enna1.
Herald added a project: All.
tkuchta requested review of this revision.

This patch adds support for the following libc functions in DFSAN: strnlen, 
strncat, strsep and sscanf. It further enables _tolower via the options file.


Repository:
  rCRT Compiler Runtime

https://reviews.llvm.org/D141389

Files:
  compiler-rt/lib/dfsan/dfsan_custom.cpp
  compiler-rt/lib/dfsan/done_abilist.txt
  compiler-rt/test/dfsan/custom.cpp

Index: compiler-rt/test/dfsan/custom.cpp
===
--- compiler-rt/test/dfsan/custom.cpp
+++ compiler-rt/test/dfsan/custom.cpp
@@ -364,6 +364,47 @@
   ASSERT_LABEL(dst[11], j_label);
 }
 
+void test_strncat() {
+  char src[] = "world";
+  int volatile x = 0; // buffer to ensure src and dst do not share origins
+  (void)x;
+  char dst[] = "hello \0";
+  int volatile y = 0; // buffer to ensure dst and p do not share origins
+  (void)y;
+  char *p = dst;
+  dfsan_set_label(k_label, , sizeof(p));
+  dfsan_set_label(i_label, src, sizeof(src));
+  dfsan_set_label(j_label, dst, sizeof(dst));
+  dfsan_origin dst_o = dfsan_get_origin((long)dst[0]);
+  (void)dst_o;
+  char *ret = strncat(p, src, strlen(src));
+  ASSERT_LABEL(ret, k_label);
+  ASSERT_EQ_ORIGIN(ret, p);
+  assert(ret == dst);
+  assert(strcmp(src, dst + 6) == 0);
+  // Origins are assigned for every 4 contiguous 4-aligned bytes. After
+  // appending src to dst, origins of src can overwrite origins of dst if their
+  // application adddresses are within [start_aligned_down, end_aligned_up).
+  // Other origins are not changed.
+  char *start_aligned_down = (char *)(((size_t)(dst + 6)) & ~3UL);
+  char *end_aligned_up = (char *)(((size_t)(dst + 11 + 4)) & ~3UL);
+  for (int i = 0; i < 12; ++i) {
+if (dst + i < start_aligned_down || dst + i >= end_aligned_up) {
+  ASSERT_INIT_ORIGIN([i], dst_o);
+} else {
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN([i], src[0]);
+}
+  }
+  for (int i = 0; i < 6; ++i) {
+ASSERT_LABEL(dst[i], j_label);
+  }
+  for (int i = 6; i < strlen(dst); ++i) {
+ASSERT_LABEL(dst[i], i_label);
+assert(dfsan_get_label(dst[i]) == dfsan_get_label(src[i - 6]));
+  }
+  ASSERT_LABEL(dst[11], j_label);
+}
+
 void test_strlen() {
   char str1[] = "str1";
   dfsan_set_label(i_label, [3], 1);
@@ -378,6 +419,20 @@
 #endif
 }
 
+void test_strnlen() {
+  char str1[] = "str1";
+  dfsan_set_label(i_label, [3], 1);
+
+  int rv = strnlen(str1, 4);
+  assert(rv == 4);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_ZERO_LABEL(rv);
+#else
+  ASSERT_LABEL(rv, i_label);
+  ASSERT_EQ_ORIGIN(rv, str1[3]);
+#endif
+}
+
 void test_strdup() {
   char str1[] = "str1";
   dfsan_set_label(i_label, [3], 1);
@@ -1627,6 +1682,77 @@
 #endif
 }
 
+void test_strsep() {
+  char *s = strdup("Hello world/");
+  char *delim = strdup(" /");
+
+  char *p_s = s;
+  char *base = s;
+  char *p_delim = delim;
+
+  dfsan_set_label(n_label, p_delim, sizeof(p_delim));
+
+  char *rv = strsep(_s, p_delim);
+  assert(rv == [0]);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_ZERO_LABEL(rv);
+#else
+  ASSERT_LABEL(rv, n_label);
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, *p_delim);
+#endif
+
+  dfsan_set_label(m_label, p_s, sizeof(p_s));
+  rv = strsep(_s, p_delim);
+
+  assert(rv == [6]);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_LABEL(rv, m_label);
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, base[6]);
+#else
+  ASSERT_LABEL(rv, dfsan_union(m_label, n_label));
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, base[6]);
+#endif
+
+  free(s);
+  s = strdup("Hello world/");
+  base = s;
+  free(delim);
+  delim = strdup(" /");
+  p_delim = delim;
+  dfsan_set_label(i_label, [7], 1);
+  dfsan_set_label(j_label, [0], 1);
+
+  rv = strsep(, delim);
+  assert(rv == [0]);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_ZERO_LABEL(rv);
+#else
+  ASSERT_LABEL(rv, j_label);
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, delim[1]);
+#endif
+
+  char *ps = s;
+  dfsan_set_label(dfsan_union(j_label, dfsan_read_label(ps, strlen(ps))), ps,
+  strlen(ps));
+  rv = strsep(, " /");
+  assert(rv == [6]);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_LABEL(rv, i_j_label);
+#else
+  ASSERT_LABEL(rv, i_j_label);
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, base[6]);
+#endif
+  rv = strsep(, " /");
+  assert(strlen(rv) == 0);
+#ifdef STRICT_DATA_DEPENDENCIES
+  ASSERT_ZERO_LABEL(ps);
+#else
+  ASSERT_ZERO_LABEL(rv);
+  ASSERT_INIT_ORIGIN_EQ_ORIGIN(, 0);
+
+#endif
+}
+
 void test_memchr() {
   char str1[] = "str1";
   dfsan_set_label(i_label, [3], 1);
@@ -1968,6 +2094,138 @@
   ASSERT_LABEL(r, 0);
 }
 
+template 
+void test_sscanf_chunk(T expected, const char *format, const char *input) {
+  char padded_input[512];
+  strcpy(padded_input, "foo ");
+  strcat(padded_input, input);
+  strcat(padded_input, " bar");
+
+  char padded_format[512];
+  strcpy(padded_format, "foo ");
+  strcat(padded_format, format);
+