Bootstrap and regression tested on x86_64. A ubsan-bootstrap for C and C++ works too (although there are other unrelated runtime errors).
I tried a bootstrap with -fsanitize=bounds-strict and there are indeed many additional runtime errors due the struct hack with last array element of size 1 (or even 2). So Jakub and Richard were right and putting this into a separate option was the right thing to do. Code-wise it seems to come just from a couple of places such as (rtl.h, tree.h, vec.h, sbitmap.h, ...). Just to see how hard it would be to make this work with strict bounds checking, I started doing this in my tree using a macro: #define CAST_TO_POINTER(x) ((__typeof(x[0])*)(x)) For example, #define RTL_CHECK1(RTX, N, C1) ((RTX)->u.fld[N]) then becomes #define RTL_CHECK1(RTX, N, C1) \ (CAST_TO_POINTER((RTX)->u.fld)[N]) Doing this in a few places gets rid of most of the errors rather quickly (getting rid of all errors would be a bit more work). If one allows VLAs one could also cast to an array of the correct length: #defined CAST_TO_VLA(x, len) (*(__typeof(x[0])(*)[len])&x) For example: #define RTL_CHECK1(RTX, N, C1) \ (CAST_TO_VLA((RTX)->u.fld, GET_RTX_LENGTH( ... ))[N]) and have ubsan auto-generate the bound checking which are now often manually added. Martin Martin Uecker <uec...@eecs.berkeley.edu>: > > I tested Marek's proposed change and it works correctly, > i.e. arrays which are not part of a struct are now > instrumented when accessed through a pointer. This also > means that the following case is diagnosed (correctly) > as undefined behaviour as pointed out by Richard: > > int > main (void) > { > int *t = (int *) __builtin_malloc (sizeof (int) * 9); > int (*a)[3][3] = (int (*)[3][3])t; > (*a)[0][9] = 1; > } > > > I also wanted arrays which are the last elements of a > struct which are not flexible-array members instrumented > correctly. So I added -fsantitize=bounds-strict which does > this. It seems to do instrumentation similar to clang > with -fsanitize=bounds. > > Comments? > > (regression testing in progress, but ubsan-related > tests all pass) > > diff --git a/gcc/ChangeLog b/gcc/ChangeLog > index ec2cb69..cb6df20 100644 > --- a/gcc/ChangeLog > +++ b/gcc/ChangeLog > @@ -1,3 +1,11 @@ > +2015-02-27 Martin Uecker <uec...@eecs.berkeley.edu> > + > + * opts.c(common_handle_option): Add option for > + -fsanitize=bounds-strict > + * flag-types.h: Add SANITIZE_BOUNDS_STRICT > + * doc/invoke.texi: Improve description for > + -fsanitize=bounds and document -fsanitize=bounds-strict > + > diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog > index ffa01c6..44a1761 100644 > --- a/gcc/c-family/ChangeLog > +++ b/gcc/c-family/ChangeLog > @@ -1,3 +1,9 @@ > +2015-02-27 Martin Uecker <uec...@eecs.berkeley.edu> > + > + * c-ubsan.c (ubsan_instrument_bounds): Instrument > + arrays which are accessed directly through a pointer. > + For strict checking, instrument last elements of a struct. > + > diff --git a/gcc/c-family/c-ubsan.c b/gcc/c-family/c-ubsan.c > index 90d59c0..1a0e2da 100644 > --- a/gcc/c-family/c-ubsan.c > +++ b/gcc/c-family/c-ubsan.c > @@ -293,6 +293,7 @@ ubsan_instrument_bounds (location_t loc, tree array, tree > *index, > tree type = TREE_TYPE (array); > tree domain = TYPE_DOMAIN (type); > > + /* This also takes care of flexible array members. */ > if (domain == NULL_TREE || TYPE_MAX_VALUE (domain) == NULL_TREE) > return NULL_TREE; > > @@ -301,10 +302,13 @@ ubsan_instrument_bounds (location_t loc, tree array, > tree *index, > bound = fold_build2 (PLUS_EXPR, TREE_TYPE (bound), bound, > build_int_cst (TREE_TYPE (bound), 1)); > > - /* Detect flexible array members and suchlike. */ > + /* Don't instrument arrays which are the last element of > + a struct. */ > tree base = get_base_address (array); > - if (base && (TREE_CODE (base) == INDIRECT_REF > - || TREE_CODE (base) == MEM_REF)) > + if (!(flag_sanitize & SANITIZE_BOUNDS_STRICT) > + && (TREE_CODE (array) == COMPONENT_REF) > + && base && (TREE_CODE (base) == INDIRECT_REF > + || TREE_CODE (base) == MEM_REF)) > { > tree next = NULL_TREE; > tree cref = array; > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > index ef4cc75..5a93757 100644 > --- a/gcc/doc/invoke.texi > +++ b/gcc/doc/invoke.texi > @@ -5704,8 +5704,15 @@ a++; > @item -fsanitize=bounds > @opindex fsanitize=bounds > This option enables instrumentation of array bounds. Various out of bounds > -accesses are detected. Flexible array members and initializers of variables > -with static storage are not instrumented. > +accesses are detected. Accesses to arrays which are the last member of a > +struct and initializers of variables with static storage are not > instrumented. > + > +@item -fsanitize=bounds-strict > +@opindex fsanitize=bounds-strict > +This option enables strict instrumentation of array bounds. Most out of > bounds > +accesses are detected including accesses to arrays which are the last member > of a > +struct. Initializers of variables with static storage are not instrumented. > + > > @item -fsanitize=alignment > @opindex fsanitize=alignment > diff --git a/gcc/flag-types.h b/gcc/flag-types.h > index bfdce44..c9ad4df 100644 > --- a/gcc/flag-types.h > +++ b/gcc/flag-types.h > @@ -238,6 +238,7 @@ enum sanitize_code { > SANITIZE_RETURNS_NONNULL_ATTRIBUTE = 1UL << 19, > SANITIZE_OBJECT_SIZE = 1UL << 20, > SANITIZE_VPTR = 1UL << 21, > + SANITIZE_BOUNDS_STRICT = 1UL << 22, > SANITIZE_UNDEFINED = SANITIZE_SHIFT | SANITIZE_DIVIDE | > SANITIZE_UNREACHABLE > | SANITIZE_VLA | SANITIZE_NULL | SANITIZE_RETURN > | SANITIZE_SI_OVERFLOW | SANITIZE_BOOL | SANITIZE_ENUM > @@ -245,7 +246,7 @@ enum sanitize_code { > | SANITIZE_NONNULL_ATTRIBUTE > | SANITIZE_RETURNS_NONNULL_ATTRIBUTE > | SANITIZE_OBJECT_SIZE | SANITIZE_VPTR, > - SANITIZE_NONDEFAULT = SANITIZE_FLOAT_DIVIDE | SANITIZE_FLOAT_CAST > + SANITIZE_NONDEFAULT = SANITIZE_FLOAT_DIVIDE | SANITIZE_FLOAT_CAST | > SANITIZE_BOUNDS_STRICT > }; > > /* flag_vtable_verify initialization levels. */ > diff --git a/gcc/opts.c b/gcc/opts.c > index 39c190d..7fe77fa 100644 > --- a/gcc/opts.c > +++ b/gcc/opts.c > @@ -1584,6 +1584,7 @@ common_handle_option (struct gcc_options *opts, > { "float-cast-overflow", SANITIZE_FLOAT_CAST, > sizeof "float-cast-overflow" - 1 }, > { "bounds", SANITIZE_BOUNDS, sizeof "bounds" - 1 }, > + { "bounds-strict", SANITIZE_BOUNDS | SANITIZE_BOUNDS_STRICT, > sizeof "bounds-strict" - 1 }, > { "alignment", SANITIZE_ALIGNMENT, sizeof "alignment" - 1 }, > { "nonnull-attribute", SANITIZE_NONNULL_ATTRIBUTE, > sizeof "nonnull-attribute" - 1 }, > diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog > index 28db896..c46cbe4 100644 > --- a/gcc/testsuite/ChangeLog > +++ b/gcc/testsuite/ChangeLog > @@ -1,3 +1,11 @@ > +2015-02-27 Martin Uecker <uec...@eecs.berkeley.edu> > + > + * gcc.dg/ubsan/bounds-2.c: New > + * c-c++-common/ubsan/bounds-1.c: Add case for non-flexible > + array member which is the last element of a struct. > + * c-c++-common/ubsan/bounds-8.c: New > + * c-c++-common/ubsan/bounds-9.c: New > + > 2015-02-25 Pat Haugen <pthau...@us.ibm.com> > > * gcc.target/powerpc/direct-move.h: Include string.h/stdlib.h. > diff --git a/gcc/testsuite/c-c++-common/ubsan/bounds-1.c > b/gcc/testsuite/c-c++-common/ubsan/bounds-1.c > index 20e390f..5014f6f 100644 > --- a/gcc/testsuite/c-c++-common/ubsan/bounds-1.c > +++ b/gcc/testsuite/c-c++-common/ubsan/bounds-1.c > @@ -6,6 +6,7 @@ > struct S { int a[10]; }; > struct T { int l; int a[]; }; > struct U { int l; int a[0]; }; > +struct V { int l; int a[1]; }; > > __attribute__ ((noinline, noclone)) > void > @@ -64,9 +65,14 @@ main (void) > struct T *t = (struct T *) __builtin_malloc (sizeof (struct T) + 10); > t->a[1] = 1; > > + /* Don't instrument zero-sized arrays (GNU extension). */ > struct U *u = (struct U *) __builtin_malloc (sizeof (struct U) + 10); > u->a[1] = 1; > > + /* Don't instrument last array in a struct. */ > + struct V *v = (struct V *) __builtin_malloc (sizeof (struct V) + 10); > + v->a[1] = 1; > + > long int *d[10][5]; > d[9][0] = (long int *) 0; > d[8][3] = d[9][0]; > diff --git a/gcc/testsuite/c-c++-common/ubsan/bounds-8.c > b/gcc/testsuite/c-c++-common/ubsan/bounds-8.c > new file mode 100644 > index 0000000..e84e42e > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/ubsan/bounds-8.c > @@ -0,0 +1,21 @@ > +/* { dg-do run } */ > +/* { dg-options "-fsanitize=bounds" } */ > +/* Origin: Martin Uecker <uec...@eecs.berkeley.edu> */ > + > +void foo(volatile int (*a)[3]) > +{ > + (*a)[3] = 1; // error > + a[0][0] = 1; // ok > + a[1][0] = 1; // ok > + a[1][4] = 1; // error > +} > + > +int main() > +{ > + volatile int a[20]; > + foo((int (*)[3])&a); > + return 0; > +} > + > +/* { dg-output "index 3 out of bounds for type 'int > \\\[3\\\]'\[^\n\r]*(\n|\r\n|\r)" } */ > +/* { dg-output "\[^\n\r]*index 4 out of bounds for type 'int \\\[3\\\]'" } */ > diff --git a/gcc/testsuite/c-c++-common/ubsan/bounds-9.c > b/gcc/testsuite/c-c++-common/ubsan/bounds-9.c > new file mode 100644 > index 0000000..37edc1b > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/ubsan/bounds-9.c > @@ -0,0 +1,16 @@ > +/* { dg-do run } */ > +/* { dg-options "-fsanitize=bounds-strict -Wall" } */ > + > +struct V { int l; int a[1]; }; > + > +int > +main (void) > +{ > + /* For strict, do instrument last array in a struct. */ > + struct V *v = (struct V *) __builtin_malloc (sizeof (struct V) + 10); > + v->a[1] = 1; > + > + return 0; > +} > + > +/* { dg-output "index 1 out of bounds for type 'int \\\[1\\\]'" } */ > diff --git a/gcc/testsuite/gcc.dg/ubsan/bounds-2.c > b/gcc/testsuite/gcc.dg/ubsan/bounds-2.c > new file mode 100644 > index 0000000..096a772 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/ubsan/bounds-2.c > @@ -0,0 +1,17 @@ > +/* { dg-do run } */ > +/* { dg-options "-fsanitize=bounds" } */ > +/* Origin: Martin Uecker <uec...@eecs.berkeley.edu> */ > + > +void foo(int n, volatile int (*b)[n]) > +{ > + (*b)[n] = 1; // error > +} > + > +int main() > +{ > + volatile int a[20]; > + foo(3, (int (*)[3])&a); > + return 0; > +} > + > +/* { dg-output "index 3 out of bounds for type 'int \\\[\\\*\\\]'" } */ > >