Thanks for the reviews, here is another version. I haven't touched the division by zero instrumentation, but the shift instrumentation is revamped; what it should instrument now is, as Jakub wrote: 1) if ((unsigned) y > precm1) ub plus for signed x << y: 2) for C99/C11 if ((unsigned) x >> (precm1 - y)) ub 3) for C++11/C++14 if (x < 0 || ((unsigned) x >> (precm1 - y)) > 1) ub Is there anything left to do for shifts?
The c-ubsan.c now resides in c-family/. I also fixed the ICE with long/long long type (the resulting type of a RSHIFT_EXPR should really be that of the left operand). But I have no clue how it behaves with e.g. constexpr specifier. When/if this is ok enough, I think we could put this on a branch, together with Jakub's patches from http://gcc.gnu.org/ml/gcc-patches/2013-06/msg00291.html The next part will be to construct arguments for the ubsan library; hopefully I can (mis)use parts of asan.c code. Then the command-line parsing (parse options into the flag_sanitize bitmask). And after this is done, I'd say we should add a testsuite, ideally something in c-c++-common/ and something in g++.dg/ (templates and other stuff). Regtested/bootstrapped on x86_64-linux. 2013-06-07 Marek Polacek <pola...@redhat.com> * Makefile.in: Add ubsan.c. * common.opt: Add -fsanitize=undefined option. * doc/invoke.texi: Document the new flag. * sanitizer.def (DEF_SANITIZER_BUILTIN): Define. * builtin-attrs.def (ATTR_COLD): Define. * asan.c (initialize_sanitizer_builtins): Build BT_FN_VOID_PTR_PTR_PTR. * builtins.def (BUILT_IN_UBSAN_HANDLE_DIVREM_OVERFLOW, BUILT_IN_UBSAN_HANDLE_SHIFT_OUT_OF_BOUNDS): Define. c-family/ * c-ubsan.c: New file. * c-ubsan.h: New file. cp/ * typeck.c (cp_build_binary_op): Add division by zero and shift instrumentation. c/ * c-typeck.c (build_binary_op): Add division by zero and shift instrumentation. --- gcc/c-family/c-ubsan.c.mp 2013-06-07 22:58:18.084990548 +0200 +++ gcc/c-family/c-ubsan.c 2013-06-07 22:29:34.677588474 +0200 @@ -0,0 +1,120 @@ +/* UndefinedBehaviorSanitizer, undefined behavior detector. + Copyright (C) 2013 Free Software Foundation, Inc. + Contributed by Marek Polacek <pola...@redhat.com> + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tree.h" +#include "c-family/c-common.h" +#include "c-family/c-ubsan.h" + +/* Instrument division by zero and INT_MIN / -1. */ + +tree +ubsan_instrument_division (location_t loc, enum tree_code code, + tree op0, tree op1) +{ + tree t, tt; + tree orig = build2 (code, TREE_TYPE (op0), op0, op1); + + if (TREE_CODE (TREE_TYPE (op0)) != INTEGER_TYPE + || TREE_CODE (TREE_TYPE (op1)) != INTEGER_TYPE) + return orig; + + /* If we *know* that the divisor is not -1 or 0, we don't have to + instrument this expression. + ??? We could use decl_constant_value to cover up more cases. */ + if (TREE_CODE (op1) == INTEGER_CST + && integer_nonzerop (op1) + && !integer_minus_onep (op1)) + return orig; + + tt = fold_build2 (EQ_EXPR, boolean_type_node, op1, + integer_minus_one_node); + t = fold_build2 (EQ_EXPR, boolean_type_node, op0, + TYPE_MIN_VALUE (TREE_TYPE (op0))); + t = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, t, tt); + tt = build2 (EQ_EXPR, boolean_type_node, + op1, integer_zero_node); + t = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, tt, t); + tt = builtin_decl_explicit (BUILT_IN_UBSAN_HANDLE_DIVREM_OVERFLOW); + tt = build_call_expr_loc (loc, tt, 0); + t = fold_build3 (COND_EXPR, void_type_node, t, tt, void_zero_node); + t = fold_build2 (COMPOUND_EXPR, TREE_TYPE (orig), t, orig); + + return t; +} + +/* Instrument left and right shifts. */ + +tree +ubsan_instrument_shift (location_t loc, enum tree_code code, + tree op0, tree op1) +{ + tree t, tt = NULL_TREE; + tree orig = build2 (code, TREE_TYPE (op0), op0, op1); + tree uprecm1 = build_int_cst (unsigned_type_for (TREE_TYPE (op1)), + TYPE_PRECISION (TREE_TYPE (op0)) - 1); + tree precm1 = build_int_cst (TREE_TYPE (op1), + TYPE_PRECISION (TREE_TYPE (op0)) - 1); + + t = fold_convert_loc (loc, unsigned_type_for (TREE_TYPE (op1)), op1); + t = fold_build2 (GT_EXPR, boolean_type_node, t, uprecm1); + + /* For signed x << y, in C99/C11, the following: + (unsigned) x >> (precm1 - y) + if non-zero, is undefined. */ + if (code == LSHIFT_EXPR + && !TYPE_UNSIGNED (TREE_TYPE (op0)) + && (flag_isoc99 || flag_isoc11)) + { + tree x = fold_build2 (MINUS_EXPR, integer_type_node, precm1, op1); + tt = fold_convert_loc (loc, unsigned_type_for (TREE_TYPE (op0)), op0); + tt = fold_build2 (RSHIFT_EXPR, TREE_TYPE (tt), tt, x); + tt = fold_build2 (NE_EXPR, boolean_type_node, tt, + build_int_cst (TREE_TYPE (tt), 0)); + } + + /* For signed x << y, in C++11/C++14, the following: + x < 0 || ((unsigned) x >> (precm1 - y)) + if > 1, is undefined. */ + if (code == LSHIFT_EXPR + && !TYPE_UNSIGNED (TREE_TYPE (op0)) + && (cxx_dialect == cxx11 || cxx_dialect == cxx1y)) + { + tree x = fold_build2 (MINUS_EXPR, integer_type_node, precm1, op1); + tt = fold_convert_loc (loc, unsigned_type_for (TREE_TYPE (op0)), op0); + tt = fold_build2 (RSHIFT_EXPR, TREE_TYPE (tt), tt, x); + tt = fold_build2 (GT_EXPR, boolean_type_node, tt, + build_int_cst (TREE_TYPE (tt), 1)); + x = fold_build2 (LT_EXPR, boolean_type_node, op0, + build_int_cst (TREE_TYPE (op0), 0)); + tt = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, x, tt); + } + + t = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, t, + tt ? tt : integer_zero_node); + tt = builtin_decl_explicit (BUILT_IN_UBSAN_HANDLE_SHIFT_OUT_OF_BOUNDS); + tt = build_call_expr_loc (loc, tt, 0); + t = fold_build3 (COND_EXPR, void_type_node, t, tt, void_zero_node); + t = fold_build2 (COMPOUND_EXPR, TREE_TYPE (orig), t, orig); + + return t; +} --- gcc/c-family/c-ubsan.h.mp 2013-06-07 22:58:23.009004841 +0200 +++ gcc/c-family/c-ubsan.h 2013-06-05 18:10:21.284693807 +0200 @@ -0,0 +1,27 @@ +/* UndefinedBehaviorSanitizer, undefined behavior detector. + Copyright (C) 2013 Free Software Foundation, Inc. + Contributed by Marek Polacek <pola...@redhat.com> + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef GCC_UBSAN_H +#define GCC_UBSAN_H + +extern tree ubsan_instrument_division (location_t, enum tree_code, tree, tree); +extern tree ubsan_instrument_shift (location_t, enum tree_code, tree, tree); + +#endif /* GCC_UBSAN_H */ --- gcc/sanitizer.def.mp 2013-06-07 23:01:16.783536178 +0200 +++ gcc/sanitizer.def 2013-06-07 23:04:47.646161427 +0200 @@ -283,3 +283,13 @@ DEF_SANITIZER_BUILTIN(BUILT_IN_TSAN_ATOM DEF_SANITIZER_BUILTIN(BUILT_IN_TSAN_ATOMIC_SIGNAL_FENCE, "__tsan_atomic_signal_fence", BT_FN_VOID_INT, ATTR_NOTHROW_LEAF_LIST) + +/* Undefined Behavior Sanitizer */ +DEF_SANITIZER_BUILTIN(BUILT_IN_UBSAN_HANDLE_DIVREM_OVERFLOW, + "__ubsan_handle_divrem_overflow", + BT_FN_VOID_PTR_PTR_PTR, + ATTR_COLD_NORETURN_NOTHROW_LEAF_LIST) +DEF_SANITIZER_BUILTIN(BUILT_IN_UBSAN_HANDLE_SHIFT_OUT_OF_BOUNDS, + "__ubsan_handle_shift_out_of_bounds", + BT_FN_VOID_PTR_PTR_PTR, + ATTR_COLD_NORETURN_NOTHROW_LEAF_LIST) --- gcc/builtins.def.mp 2013-06-07 23:01:16.775536150 +0200 +++ gcc/builtins.def 2013-06-07 23:04:47.611161301 +0200 @@ -155,7 +155,7 @@ along with GCC; see the file COPYING3. #define DEF_SANITIZER_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ DEF_BUILTIN (ENUM, "__builtin_" NAME, BUILT_IN_NORMAL, TYPE, TYPE, \ true, true, true, ATTRS, true, \ - (flag_asan || flag_tsan)) + (flag_asan || flag_tsan || flag_ubsan)) #undef DEF_CILKPLUS_BUILTIN #define DEF_CILKPLUS_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ --- gcc/Makefile.in.mp 2013-06-07 23:01:16.771536135 +0200 +++ gcc/Makefile.in 2013-06-07 23:04:47.602161268 +0200 @@ -1150,7 +1150,7 @@ C_COMMON_OBJS = c-family/c-common.o c-fa c-family/c-omp.o c-family/c-opts.o c-family/c-pch.o \ c-family/c-ppoutput.o c-family/c-pragma.o c-family/c-pretty-print.o \ c-family/c-semantics.o c-family/c-ada-spec.o tree-mudflap.o \ - c-family/array-notation-common.o + c-family/array-notation-common.o c-family/c-ubsan.o # Language-independent object files. # We put the insn-*.o files first so that a parallel make will build @@ -2021,6 +2021,9 @@ c-family/array-notation-common.o : c-fam c-family/stub-objc.o : c-family/stub-objc.c $(CONFIG_H) $(SYSTEM_H) \ coretypes.h $(TREE_H) $(C_COMMON_H) c-family/c-objc.h +c-family/c-ubsan.o : c-family/c-ubsan.c $(CONFIG_H) $(SYSTEM_H) \ + coretypes.h $(TREE_H) $(C_COMMON_H) c-family/c-ubsan.h + default-c.o: config/default-c.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(C_TARGET_H) $(C_TARGET_DEF_H) $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) \ --- gcc/doc/invoke.texi.mp 2013-06-07 23:01:16.781536172 +0200 +++ gcc/doc/invoke.texi 2013-06-07 23:04:47.639161403 +0200 @@ -5143,6 +5143,11 @@ Memory access instructions will be instr data race bugs. See @uref{http://code.google.com/p/data-race-test/wiki/ThreadSanitizer} for more details. +@item -fsanitize=undefined +Enable UndefinedBehaviorSanitizer, a fast undefined behavior detector +Various computations will be instrumented to detect +undefined behavior, e.g.@: division by zero or various overflows. + @item -fdump-final-insns@r{[}=@var{file}@r{]} @opindex fdump-final-insns Dump the final internal representation (RTL) to @var{file}. If the --- gcc/cp/typeck.c.mp 2013-06-07 23:01:16.779536165 +0200 +++ gcc/cp/typeck.c 2013-06-07 23:04:47.625161351 +0200 @@ -37,6 +37,7 @@ along with GCC; see the file COPYING3. #include "convert.h" #include "c-family/c-common.h" #include "c-family/c-objc.h" +#include "c-family/c-ubsan.h" #include "params.h" static tree pfn_from_ptrmemfunc (tree); @@ -3891,6 +3892,12 @@ cp_build_binary_op (location_t location, op0 = orig_op0; op1 = orig_op1; + /* Remember whether we're doing / or %. */ + bool doing_div_or_mod = false; + + /* Remember whether we're doing << or >>. */ + bool doing_shift = false; + if (code == TRUTH_AND_EXPR || code == TRUTH_ANDIF_EXPR || code == TRUTH_OR_EXPR || code == TRUTH_ORIF_EXPR || code == TRUTH_XOR_EXPR) @@ -4070,8 +4077,15 @@ cp_build_binary_op (location_t location, { enum tree_code tcode0 = code0, tcode1 = code1; tree cop1 = fold_non_dependent_expr_sfinae (op1, tf_none); + cop1 = maybe_constant_value (cop1); - warn_for_div_by_zero (location, maybe_constant_value (cop1)); + if (!processing_template_decl && tcode0 == INTEGER_TYPE + && (TREE_CODE (cop1) != INTEGER_CST + || integer_zerop (cop1) + || integer_minus_onep (cop1))) + doing_div_or_mod = true; + + warn_for_div_by_zero (location, cop1); if (tcode0 == COMPLEX_TYPE || tcode0 == VECTOR_TYPE) tcode0 = TREE_CODE (TREE_TYPE (TREE_TYPE (op0))); @@ -4109,8 +4123,14 @@ cp_build_binary_op (location_t location, case FLOOR_MOD_EXPR: { tree cop1 = fold_non_dependent_expr_sfinae (op1, tf_none); + cop1 = maybe_constant_value (cop1); - warn_for_div_by_zero (location, maybe_constant_value (cop1)); + if (!processing_template_decl && code0 == INTEGER_TYPE + && (TREE_CODE (cop1) != INTEGER_CST + || integer_zerop (cop1) + || integer_minus_onep (cop1))) + doing_div_or_mod = true; + warn_for_div_by_zero (location, cop1); } if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE @@ -4164,6 +4184,7 @@ cp_build_binary_op (location_t location, if (TREE_CODE (const_op1) != INTEGER_CST) const_op1 = op1; result_type = type0; + doing_shift = true; if (TREE_CODE (const_op1) == INTEGER_CST) { if (tree_int_cst_lt (const_op1, integer_zero_node)) @@ -4211,6 +4232,7 @@ cp_build_binary_op (location_t location, if (TREE_CODE (const_op1) != INTEGER_CST) const_op1 = op1; result_type = type0; + doing_shift = true; if (TREE_CODE (const_op1) == INTEGER_CST) { if (tree_int_cst_lt (const_op1, integer_zero_node)) @@ -4607,6 +4629,17 @@ cp_build_binary_op (location_t location, break; } + if (flag_ubsan && doing_div_or_mod && !processing_template_decl) + { + resultcode = COMPOUND_EXPR; + return ubsan_instrument_division (location, code, op0, op1); + } + else if (flag_ubsan && doing_shift && !processing_template_decl) + { + resultcode = COMPOUND_EXPR; + return ubsan_instrument_shift (location, code, op0, op1); + } + if (((code0 == INTEGER_TYPE || code0 == REAL_TYPE || code0 == COMPLEX_TYPE || code0 == ENUMERAL_TYPE) && (code1 == INTEGER_TYPE || code1 == REAL_TYPE --- gcc/common.opt.mp 2013-06-07 23:01:16.778536161 +0200 +++ gcc/common.opt 2013-06-07 23:04:47.621161337 +0200 @@ -858,6 +858,10 @@ fsanitize=thread Common Report Var(flag_tsan) Enable ThreadSanitizer, a data race detector +fsanitize=undefined +Common Report Var(flag_ubsan) +Enable UndefinedBehaviorSanitizer, an undefined behavior detector + fasynchronous-unwind-tables Common Report Var(flag_asynchronous_unwind_tables) Optimization Generate unwind tables that are exact at each instruction boundary --- gcc/builtin-attrs.def.mp 2013-06-07 23:01:16.774536147 +0200 +++ gcc/builtin-attrs.def 2013-06-07 23:04:47.609161293 +0200 @@ -83,6 +83,7 @@ DEF_LIST_INT_INT (5,6) #undef DEF_LIST_INT_INT /* Construct trees for identifiers. */ +DEF_ATTR_IDENT (ATTR_COLD, "cold") DEF_ATTR_IDENT (ATTR_CONST, "const") DEF_ATTR_IDENT (ATTR_FORMAT, "format") DEF_ATTR_IDENT (ATTR_FORMAT_ARG, "format_arg") @@ -130,6 +131,8 @@ DEF_ATTR_TREE_LIST (ATTR_NORETURN_NOTHRO ATTR_NULL, ATTR_NOTHROW_LIST) DEF_ATTR_TREE_LIST (ATTR_NORETURN_NOTHROW_LEAF_LIST, ATTR_NORETURN,\ ATTR_NULL, ATTR_NOTHROW_LEAF_LIST) +DEF_ATTR_TREE_LIST (ATTR_COLD_NORETURN_NOTHROW_LEAF_LIST, ATTR_COLD,\ + ATTR_NULL, ATTR_NORETURN_NOTHROW_LEAF_LIST) DEF_ATTR_TREE_LIST (ATTR_CONST_NORETURN_NOTHROW_LEAF_LIST, ATTR_CONST,\ ATTR_NULL, ATTR_NORETURN_NOTHROW_LEAF_LIST) DEF_ATTR_TREE_LIST (ATTR_MALLOC_NOTHROW_LIST, ATTR_MALLOC, \ --- gcc/c/c-typeck.c.mp 2013-06-07 23:01:16.776536153 +0200 +++ gcc/c/c-typeck.c 2013-06-07 23:04:47.617161321 +0200 @@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. #include "gimple.h" #include "c-family/c-objc.h" #include "c-family/c-common.h" +#include "c-family/c-ubsan.h" /* Possible cases of implicit bad conversions. Used to select diagnostic messages in convert_for_assignment. */ @@ -9527,6 +9528,12 @@ build_binary_op (location_t location, en operands to truth-values. */ bool boolean_op = false; + /* Remember whether we're doing / or %. */ + bool doing_div_or_mod = false; + + /* Remember whether we're doing << or >>. */ + bool doing_shift = false; + if (location == UNKNOWN_LOCATION) location = input_location; @@ -9728,6 +9735,7 @@ build_binary_op (location_t location, en case FLOOR_DIV_EXPR: case ROUND_DIV_EXPR: case EXACT_DIV_EXPR: + doing_div_or_mod = true; warn_for_div_by_zero (location, op1); if ((code0 == INTEGER_TYPE || code0 == REAL_TYPE @@ -9775,6 +9783,7 @@ build_binary_op (location_t location, en case TRUNC_MOD_EXPR: case FLOOR_MOD_EXPR: + doing_div_or_mod = true; warn_for_div_by_zero (location, op1); if (code0 == VECTOR_TYPE && code1 == VECTOR_TYPE @@ -9873,6 +9882,7 @@ build_binary_op (location_t location, en else if ((code0 == INTEGER_TYPE || code0 == FIXED_POINT_TYPE) && code1 == INTEGER_TYPE) { + doing_shift = true; if (TREE_CODE (op1) == INTEGER_CST) { if (tree_int_cst_sgn (op1) < 0) @@ -9925,6 +9935,7 @@ build_binary_op (location_t location, en else if ((code0 == INTEGER_TYPE || code0 == FIXED_POINT_TYPE) && code1 == INTEGER_TYPE) { + doing_shift = true; if (TREE_CODE (op1) == INTEGER_CST) { if (tree_int_cst_sgn (op1) < 0) @@ -10209,6 +10220,19 @@ build_binary_op (location_t location, en return error_mark_node; } + if (flag_ubsan && doing_div_or_mod) + { + ret = ubsan_instrument_division (location, code, op0, op1); + resultcode = COMPOUND_EXPR; + goto return_build_binary_op; + } + else if (flag_ubsan && doing_shift) + { + ret = ubsan_instrument_shift (location, code, op0, op1); + resultcode = COMPOUND_EXPR; + goto return_build_binary_op; + } + if ((code0 == INTEGER_TYPE || code0 == REAL_TYPE || code0 == COMPLEX_TYPE || code0 == FIXED_POINT_TYPE || code0 == VECTOR_TYPE) && --- gcc/asan.c.mp 2013-06-07 23:01:16.773536143 +0200 +++ gcc/asan.c 2013-06-07 23:04:47.603161271 +0200 @@ -2034,6 +2034,9 @@ initialize_sanitizer_builtins (void) tree BT_FN_VOID = build_function_type_list (void_type_node, NULL_TREE); tree BT_FN_VOID_PTR = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE); + tree BT_FN_VOID_PTR_PTR_PTR + = build_function_type_list (void_type_node, ptr_type_node, + ptr_type_node, ptr_type_node, NULL_TREE); tree BT_FN_VOID_PTR_PTRMODE = build_function_type_list (void_type_node, ptr_type_node, build_nonstandard_integer_type (POINTER_SIZE, @@ -2099,6 +2102,9 @@ initialize_sanitizer_builtins (void) #undef ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST #define ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST \ ECF_TM_PURE | ATTR_NORETURN_NOTHROW_LEAF_LIST +#undef ATTR_COLD_NORETURN_NOTHROW_LEAF_LIST +#define ATTR_COLD_NORETURN_NOTHROW_LEAF_LIST \ + /* ECF_COLD missing */ ATTR_NORETURN_NOTHROW_LEAF_LIST #undef DEF_SANITIZER_BUILTIN #define DEF_SANITIZER_BUILTIN(ENUM, NAME, TYPE, ATTRS) \ decl = add_builtin_function ("__builtin_" NAME, TYPE, ENUM, \ Marek