Index: tree-asan.c =================================================================== --- tree-asan.c (revision 0) +++ tree-asan.c (revision 0) @@ -0,0 +1,512 @@ +/* AddressSanitizer, a fast memory error detector. + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Kostya Serebryany <k...@google.com> + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "tree.h" +#include "tm_p.h" +#include "basic-block.h" +#include "flags.h" +#include "function.h" +#include "tree-inline.h" +#include "gimple.h" +#include "tree-iterator.h" +#include "tree-flow.h" +#include "tree-dump.h" +#include "tree-pass.h" +#include "diagnostic.h" +#include "demangle.h" +#include "langhooks.h" +#include "ggc.h" +#include "cgraph.h" +#include "gimple.h" +#include "tree-asan.h" + +/* + AddressSanitizer finds out-of-bounds and use-after-free bugs + with <2x slowdown on average. + + The tool consists of two parts: + instrumentation module (this file) and a run-time library. + The instrumentation module adds a run-time check before every memory insn. + For a 8 byte load accessing address X: + ShadowAddr = (X >> 3) + Offset + ShadowValue = (char*)ShadowAddr; + if (ShadowValue) + __asan_report_load8(X); + For a load of N bytes (N=1, 2 or 4) from address X: + ShadowAddr = (X >> 3) + Offset + ShadowValue = (char*)ShadowAddr; + if (ShadowValue) + if ((X & 7) + N - 1 > ShadowValue) + __asan_report_loadN(X); + Stores are instrumented similarly, but using __asan_report_storeN functions. + A call too __asan_init() is inserted to the list of module CTORs. + + The run-time library redefines malloc (so that redzone are inserted around + the allocated memory) and free (so that reuse of free-ed memory is delayed), + provides __asan_report* and __asan_init functions. + + Read more: + http://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm + + Implementation details: + This is my first code in gcc. I wrote it by copying tree-mudflap.c, + stripping 70% of irrelevant code and modifying the instrumentation routine + build_check_stmt. The code seems to work, but I don't feel I understand it. + In particular, transform_derefs() and transform_statements() seem too complex. + Suggestions are welcome on how to simplify them. + (All I need is to traverse *all* memory accesses and instrument them). + + Future work: + The current implementation supports only detection of out-of-bounds and + use-after-free bugs in heap. + In order to support out-of-bounds for stack and globals we will need + to create redzones for stack and global object and poison them. +*/ + +/* The shadow address is computed as (X>>asan_scale) + (1<<asan_offset_log). + We may want to add command line flags to change these values. */ +static int asan_scale = 3; +static int asan_offset_log_32 = 29; +static int asan_offset_log_64 = 44; +static int asan_offset_log; + + +/* Construct a function tree for __asan_report_{load,store}{1,2,4,8}. */ +static tree +report_error_func (int is_store, int size) +{ + tree fn_type; + tree def; + + char name[100]; + sprintf(name, "__asan_report_%s%d\n", is_store ? "store" : "load", size); + fn_type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE); + def = build_fn_decl (name, fn_type); + TREE_NOTHROW (def) = 1; + DECL_ATTRIBUTES (def) = tree_cons (get_identifier ("leaf"), NULL, DECL_ATTRIBUTES (def)); + DECL_ASSEMBLER_NAME (def); + return def; +} + +/* Construct a function tree for __asan_init(). */ +static tree +asan_init_func(void) +{ + tree fn_type; + tree def; + + fn_type = build_function_type_list (void_type_node, NULL_TREE); + def = build_fn_decl ("__asan_init", fn_type); + TREE_NOTHROW (def) = 1; + DECL_ASSEMBLER_NAME (def); + return def; +} + + +/* perform the instrumentation */ +static void +build_check_stmt (tree base, + gimple_stmt_iterator *instr_gsi, + location_t location, int is_store, int size) +{ + gimple_stmt_iterator gsi; + basic_block cond_bb, then_bb, join_bb; + edge e; + tree cond, t, u; + tree base_addr; + tree shadow_value; + gimple g; + gimple_seq seq, stmts; + tree char_ptr_type = build_pointer_type(char_type_node); + tree shadow_type = char_type_node; + tree uintptr_type = lang_hooks.types.type_for_mode (ptr_mode, + /*unsignedp=*/true); + + /* We first need to split the current basic block, and start altering + the CFG. This allows us to insert the statements we're about to + construct into the right basic blocks. */ + + cond_bb = gimple_bb (gsi_stmt (*instr_gsi)); + gsi = *instr_gsi; + gsi_prev (&gsi); + if (! gsi_end_p (gsi)) + e = split_block (cond_bb, gsi_stmt (gsi)); + else + e = split_block_after_labels (cond_bb); + cond_bb = e->src; + join_bb = e->dest; + + /* A recap at this point: join_bb is the basic block at whose head + is the gimple statement for which this check expression is being + built. cond_bb is the (possibly new, synthetic) basic block the + end of which will contain the cache-lookup code, and a + conditional that jumps to the cache-miss code or, much more + likely, over to join_bb. */ + + /* Create the bb that contains the crash block. */ + then_bb = create_empty_bb (cond_bb); + make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE); + make_single_succ_edge (then_bb, join_bb, EDGE_FALLTHRU); + + /* Mark the pseudo-fallthrough edge from cond_bb to join_bb. */ + e = find_edge (cond_bb, join_bb); + e->flags = EDGE_FALSE_VALUE; + e->count = cond_bb->count; + e->probability = REG_BR_PROB_BASE; + + /* Update dominance info. Note that bb_join's data was + updated by split_block. */ + if (dom_info_available_p (CDI_DOMINATORS)) + { + set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); + set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb); + } + + base_addr = make_rename_temp (uintptr_type, "base_addr"); + + seq = gimple_seq_alloc (); + t = fold_convert_loc (location, uintptr_type, + unshare_expr (base)); + t = force_gimple_operand (t, &stmts, false, NULL_TREE); + gimple_seq_add_seq (&seq, stmts); + g = gimple_build_assign (base_addr, t); + gimple_set_location (g, location); + gimple_seq_add_stmt (&seq, g); + + t = build2 (RSHIFT_EXPR, uintptr_type, base_addr, + build_int_cst(uintptr_type, asan_scale) + ); + t = build2 (PLUS_EXPR, uintptr_type, t, + build2 (LSHIFT_EXPR, uintptr_type, + build_int_cst(uintptr_type, 1), + build_int_cst(uintptr_type, asan_offset_log) + ) + ); + t = build1 (INDIRECT_REF, shadow_type, + build1 (VIEW_CONVERT_EXPR, char_ptr_type, t)); + t = force_gimple_operand (t, &stmts, false, NULL_TREE); + gimple_seq_add_seq (&seq, stmts); + shadow_value = make_rename_temp (shadow_type, "__asan_shadow"); + g = gimple_build_assign (shadow_value, t); + gimple_set_location (g, location); + gimple_seq_add_stmt (&seq, g); + + t = build2 (NE_EXPR, boolean_type_node, shadow_value, + build_int_cst(char_type_node, 0)); + + if (size < 8) + { + u = build2 (BIT_AND_EXPR, uintptr_type, + base_addr, + build_int_cst (uintptr_type, 7)); + u = build1 (CONVERT_EXPR, char_type_node, u); + u = build2 (PLUS_EXPR, char_type_node, u, + build_int_cst (char_type_node, size - 1)); + u = build2 (GE_EXPR, uintptr_type, u, shadow_value); + } + else + { + u = build_int_cst (boolean_type_node, 1); + } + + t = build2 (TRUTH_AND_EXPR, boolean_type_node, t, u); + t = force_gimple_operand (t, &stmts, false, NULL_TREE); + gimple_seq_add_seq (&seq, stmts); + cond = make_rename_temp (boolean_type_node, "asan_crash_cond"); + g = gimple_build_assign (cond, t); + gimple_set_location (g, location); + gimple_seq_add_stmt (&seq, g); + + g = gimple_build_cond (NE_EXPR, cond, boolean_false_node, NULL_TREE, + NULL_TREE); + gimple_set_location (g, location); + gimple_seq_add_stmt (&seq, g); + + /* crash */ + gsi = gsi_last_bb (cond_bb); + gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); + seq = gimple_seq_alloc (); + g = gimple_build_call (report_error_func(is_store, size), 1, base_addr); + gimple_seq_add_stmt (&seq, g); + + + /* Insert the check code in the THEN block. */ + gsi = gsi_start_bb (then_bb); + gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING); + + *instr_gsi = gsi_start_bb (join_bb); +} + +/* asan: do we need this */ +/* Check whether the given decl, generally a VAR_DECL or PARM_DECL, is + eligible for instrumentation. For the mudflap1 pass, this implies + that it should be registered with the libmudflap runtime. For the + mudflap2 pass this means instrumenting an indirection operation with + respect to the object. +*/ +static int +decl_eligible_p (tree decl) +{ + return ((TREE_CODE (decl) == VAR_DECL || TREE_CODE (decl) == PARM_DECL) + /* The decl must have its address taken. In the case of + arrays, this flag is also set if the indexes are not + compile-time known valid constants. */ + /* XXX: not sufficient: return-by-value structs! */ + && TREE_ADDRESSABLE (decl) + /* The type of the variable must be complete. */ + && COMPLETE_OR_VOID_TYPE_P (TREE_TYPE (decl)) + /* The decl hasn't been decomposed somehow. */ + && !DECL_HAS_VALUE_EXPR_P (decl)); +} + +/* asan: this looks too complex. Can this be done simpler? */ +static void +transform_derefs (gimple_stmt_iterator *iter, tree *tp, + location_t location, int is_store) +{ + tree type, base, addr, t; + int size; + + t = *tp; + type = TREE_TYPE (t); + + if (type == error_mark_node) + return; + + size = tree_to_double_int (TYPE_SIZE (type)).low / 8; + + switch (TREE_CODE (t)) + { + case ARRAY_REF: + case COMPONENT_REF: + { + /* This is trickier than it may first appear. The reason is + that we are looking at expressions from the "inside out" at + this point. We may have a complex nested aggregate/array + expression (e.g. "a.b[i].c"), maybe with an indirection as + the leftmost operator ("p->a.b.d"), where instrumentation + is necessary. Or we may have an innocent "a.b.c" + expression that must not be instrumented. We need to + recurse all the way down the nesting structure to figure it + out: looking just at the outer node is not enough. */ + tree var; + int component_ref_only = (TREE_CODE (t) == COMPONENT_REF); + int bitfield_ref_p = (TREE_CODE (t) == COMPONENT_REF + && DECL_BIT_FIELD_TYPE (TREE_OPERAND (t, 1))); + + if (bitfield_ref_p) + { + return; + } + /* Iterate to the top of the ARRAY_REF/COMPONENT_REF + containment hierarchy to find the outermost VAR_DECL. */ + var = TREE_OPERAND (t, 0); + while (1) + { + if (TREE_CODE (var) == ARRAY_REF) + { + component_ref_only = 0; + var = TREE_OPERAND (var, 0); + } + else if (TREE_CODE (var) == COMPONENT_REF) + var = TREE_OPERAND (var, 0); + else if (INDIRECT_REF_P (var) + || TREE_CODE (var) == MEM_REF) + { + base = TREE_OPERAND (var, 0); + break; + } + else if (TREE_CODE (var) == VIEW_CONVERT_EXPR) + { + var = TREE_OPERAND (var, 0); + if (CONSTANT_CLASS_P (var) + && TREE_CODE (var) != STRING_CST) + return; + } + else + { + gcc_assert (TREE_CODE (var) == VAR_DECL + || TREE_CODE (var) == PARM_DECL + || TREE_CODE (var) == RESULT_DECL + || TREE_CODE (var) == STRING_CST); + /* Don't instrument this access if the underlying + variable is not "eligible". This test matches + those arrays that have only known-valid indexes, + and thus are not labeled TREE_ADDRESSABLE. */ + if (! decl_eligible_p (var) || component_ref_only) + return; + else + { + base = build1 (ADDR_EXPR, + build_pointer_type (TREE_TYPE (var)), var); + break; + } + } + } + + addr = build1 (ADDR_EXPR, build_pointer_type (type), t); + + } + break; + + case INDIRECT_REF: + addr = TREE_OPERAND (t, 0); + base = addr; + break; + + case MEM_REF: + addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (TREE_OPERAND (t, 1)), + TREE_OPERAND (t, 0), + fold_convert (sizetype, TREE_OPERAND (t, 1))); + base = addr; + break; + + case TARGET_MEM_REF: + addr = tree_mem_ref_addr (ptr_type_node, t); + base = addr; + break; + + default: + return; + } + + if (size != 1 && size != 2 && size != 4 && size != 8) + return; + build_check_stmt (base, iter, location, is_store, size); +} + +/* asan: this looks too complex. Can this be done simpler? */ +/* Transform + 1) Memory references. + 2) BUILTIN_ALLOCA calls. +*/ +static void +transform_statements (void) +{ + basic_block bb, next; + gimple_stmt_iterator i; + int saved_last_basic_block = last_basic_block; + enum gimple_rhs_class grhs_class; + + bb = ENTRY_BLOCK_PTR ->next_bb; + do + { + next = bb->next_bb; + for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i)) + { + gimple s = gsi_stmt (i); + + /* Only a few GIMPLE statements can reference memory. */ + switch (gimple_code (s)) + { + case GIMPLE_ASSIGN: + transform_derefs (&i, gimple_assign_lhs_ptr (s), + gimple_location (s), 1); + transform_derefs (&i, gimple_assign_rhs1_ptr (s), + gimple_location (s), 0); + grhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (s)); + if (grhs_class == GIMPLE_BINARY_RHS) + transform_derefs (&i, gimple_assign_rhs2_ptr (s), + gimple_location (s), 0); + break; + + case GIMPLE_RETURN: + if (gimple_return_retval (s) != NULL_TREE) + { + transform_derefs (&i, gimple_return_retval_ptr (s), + gimple_location (s), + 0); + } + break; + + case GIMPLE_CALL: + { + tree fndecl = gimple_call_fndecl (s); + if (fndecl && (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_ALLOCA)) + gimple_call_set_cannot_inline (s, true); + } + break; + + default: + ; + } + } + bb = next; + } + while (bb && bb->index <= saved_last_basic_block); +} + +/* Module-level instrumentation. + - Insert __asan_init() into the list of CTORs. + - TODO: insert redzones around globals. + */ +void +asan_finish_file (void) +{ + tree ctor_statements = NULL_TREE; + append_to_statement_list (build_call_expr (asan_init_func(), 0), + &ctor_statements); + cgraph_build_static_cdtor ('I', ctor_statements, + MAX_RESERVED_INIT_PRIORITY-1); +} + +/* Instrument the current function. */ +static unsigned int +asan_instrument (void) +{ + struct gimplify_ctx gctx; + int is_64 = 1; /*TODO*/ + asan_offset_log = is_64 ? asan_offset_log_64 : asan_offset_log_32; + push_gimplify_context (&gctx); + transform_statements (); + pop_gimplify_context (NULL); + return 0; +} + +static bool +gate_asan (void) +{ + return flag_asan != 0; +} + +struct gimple_opt_pass pass_asan = +{ + { + GIMPLE_PASS, + "asan", /* name */ + gate_asan, /* gate */ + asan_instrument, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_NONE, /* tv_id */ + PROP_ssa | PROP_cfg | PROP_gimple_leh,/* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_verify_flow | TODO_verify_stmts + | TODO_dump_func | TODO_update_ssa /* todo_flags_finish */ + } +}; Index: tree-asan.h =================================================================== --- tree-asan.h (revision 0) +++ tree-asan.h (revision 0) @@ -0,0 +1,26 @@ +/* AddressSanitizer, a fast memory error detector. + Copyright (C) 2011 Free Software Foundation, Inc. + Contributed by Kostya Serebryany <k...@google.com> + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#ifndef TREE_ASAN +#define TREE_ASAN + +extern void asan_finish_file(void); + +#endif /* TREE_ASAN */ Index: tree-pass.h =================================================================== --- tree-pass.h (revision 179959) +++ tree-pass.h (working copy) @@ -352,6 +352,7 @@ extern struct gimple_opt_pass pass_mudflap_1; extern struct gimple_opt_pass pass_mudflap_2; +extern struct gimple_opt_pass pass_asan; extern struct gimple_opt_pass pass_lower_cf; extern struct gimple_opt_pass pass_refactor_eh; extern struct gimple_opt_pass pass_lower_eh; Index: toplev.c =================================================================== --- toplev.c (revision 179959) +++ toplev.c (working copy) @@ -74,6 +74,7 @@ #include "value-prof.h" #include "alloc-pool.h" #include "tree-mudflap.h" +#include "tree-asan.h" #include "tree-pass.h" #include "gimple.h" #include "tree-ssa-alias.h" @@ -615,6 +616,10 @@ if (flag_mudflap) mudflap_finish_file (); + /* File-scope initialization for AddressSanitizer. */ + if (flag_asan) + asan_finish_file(); + output_shared_constant_pool (); output_object_blocks (); Index: common.opt =================================================================== --- common.opt (revision 179959) +++ common.opt (working copy) @@ -887,6 +887,10 @@ Common Ignore Does nothing. Preserved for backward compatibility. +fasan +Common RejectNegative Report Var(flag_asan) +Enable AddressSanitizer, a memory error detector + fasynchronous-unwind-tables Common Report Var(flag_asynchronous_unwind_tables) Optimization Generate unwind tables that are exact at each instruction boundary Index: Makefile.in =================================================================== --- Makefile.in (revision 179959) +++ Makefile.in (working copy) @@ -1418,6 +1418,7 @@ toplev.o \ tracer.o \ tree-affine.o \ + tree-asan.o \ tree-call-cdce.o \ tree-cfg.o \ tree-cfgcleanup.o \ @@ -2393,6 +2394,10 @@ $(TREE_H) $(PARAMS_H) $(FLAGS_H) $(FUNCTION_H) $(EXPR_H) output.h $(RTL_H) \ $(GGC_H) $(TM_P_H) $(TARGET_H) langhooks.h $(REGS_H) gt-stor-layout.h \ $(DIAGNOSTIC_CORE_H) $(CGRAPH_H) $(TREE_INLINE_H) $(TREE_DUMP_H) $(GIMPLE_H) +tree-asan.o : tree-asan.c tree-asan.h $(CONFIG_H) pointer-set.h \ + $(SYSTEM_H) $(TREE_H) $(GIMPLE_H) \ + output.h $(DIAGNOSTIC_H) coretypes.h $(TREE_DUMP_H) $(FLAGS_H) \ + tree-pretty-print.h tree-ssa-tail-merge.o: tree-ssa-tail-merge.c \ $(SYSTEM_H) $(CONFIG_H) coretypes.h $(TM_H) $(BITMAP_H) \ $(FLAGS_H) $(TM_P_H) $(BASIC_BLOCK_H) output.h \ Index: passes.c =================================================================== --- passes.c (revision 179959) +++ passes.c (working copy) @@ -1420,6 +1420,7 @@ NEXT_PASS (pass_lower_resx); NEXT_PASS (pass_nrv); NEXT_PASS (pass_mudflap_2); + NEXT_PASS (pass_asan); NEXT_PASS (pass_cleanup_cfg_post_optimizing); NEXT_PASS (pass_warn_function_noreturn);
-- This patch is available for review at http://codereview.appspot.com/5272048