Index: tree-asan.c
===================================================================
--- tree-asan.c (revision 0)
+++ tree-asan.c (revision 0)
@@ -0,0 +1,512 @@
+/* AddressSanitizer, a fast memory error detector.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Kostya Serebryany <k...@google.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "tm_p.h"
+#include "basic-block.h"
+#include "flags.h"
+#include "function.h"
+#include "tree-inline.h"
+#include "gimple.h"
+#include "tree-iterator.h"
+#include "tree-flow.h"
+#include "tree-dump.h"
+#include "tree-pass.h"
+#include "diagnostic.h"
+#include "demangle.h"
+#include "langhooks.h"
+#include "ggc.h"
+#include "cgraph.h"
+#include "gimple.h"
+#include "tree-asan.h"
+
+/*
+ AddressSanitizer finds out-of-bounds and use-after-free bugs 
+ with <2x slowdown on average.
+
+ The tool consists of two parts:
+ instrumentation module (this file) and a run-time library.
+ The instrumentation module adds a run-time check before every memory insn.
+   For a 8 byte load accessing address X:
+     ShadowAddr = (X >> 3) + Offset
+     ShadowValue = (char*)ShadowAddr;
+     if (ShadowValue)
+       __asan_report_load8(X);
+   For a load of N bytes (N=1, 2 or 4) from address X:
+     ShadowAddr = (X >> 3) + Offset
+     ShadowValue = (char*)ShadowAddr;
+     if (ShadowValue)
+       if ((X & 7) + N - 1 > ShadowValue)
+         __asan_report_loadN(X);
+ Stores are instrumented similarly, but using __asan_report_storeN functions.
+ A call too __asan_init() is inserted to the list of module CTORs.
+
+ The run-time library redefines malloc (so that redzone are inserted around
+ the allocated memory) and free (so that reuse of free-ed memory is delayed), 
+ provides __asan_report* and __asan_init functions.
+ 
+ Read more:
+ http://code.google.com/p/address-sanitizer/wiki/AddressSanitizerAlgorithm
+
+ Implementation details:
+ This is my first code in gcc. I wrote it by copying tree-mudflap.c,
+ stripping 70% of irrelevant code and modifying the instrumentation routine
+ build_check_stmt. The code seems to work, but I don't feel I understand it.
+ In particular, transform_derefs() and transform_statements() seem too complex.
+ Suggestions are welcome on how to simplify them.
+ (All I need is to traverse *all* memory accesses and instrument them).
+
+ Future work:
+ The current implementation supports only detection of out-of-bounds and
+ use-after-free bugs in heap.
+ In order to support out-of-bounds for stack and globals we will need
+ to create redzones for stack and global object and poison them.
+*/
+
+/* The shadow address is computed as (X>>asan_scale) + (1<<asan_offset_log).
+ We may want to add command line flags to change these values. */
+static int asan_scale = 3;
+static int asan_offset_log_32 = 29;
+static int asan_offset_log_64 = 44;
+static int asan_offset_log;
+
+
+/* Construct a function tree for __asan_report_{load,store}{1,2,4,8}. */
+static tree
+report_error_func (int is_store, int size)
+{
+  tree fn_type;
+  tree def;
+
+  char name[100];
+  sprintf(name, "__asan_report_%s%d\n", is_store ? "store" : "load", size);
+  fn_type = build_function_type_list (void_type_node, ptr_type_node, 
NULL_TREE);
+  def = build_fn_decl (name, fn_type);
+  TREE_NOTHROW (def) = 1;
+  DECL_ATTRIBUTES (def) = tree_cons (get_identifier ("leaf"), NULL, 
DECL_ATTRIBUTES (def));
+  DECL_ASSEMBLER_NAME (def);
+  return def;
+}
+
+/* Construct a function tree for __asan_init(). */
+static tree
+asan_init_func(void)
+{
+  tree fn_type;
+  tree def;
+
+  fn_type = build_function_type_list (void_type_node, NULL_TREE);
+  def = build_fn_decl ("__asan_init", fn_type);
+  TREE_NOTHROW (def) = 1;
+  DECL_ASSEMBLER_NAME (def);
+  return def;
+}
+
+
+/* perform the instrumentation */
+static void
+build_check_stmt (tree base,
+                  gimple_stmt_iterator *instr_gsi,
+                  location_t location, int is_store, int size)
+{
+  gimple_stmt_iterator gsi;
+  basic_block cond_bb, then_bb, join_bb;
+  edge e;
+  tree cond, t, u;
+  tree base_addr;
+  tree shadow_value;
+  gimple g;
+  gimple_seq seq, stmts;
+  tree char_ptr_type = build_pointer_type(char_type_node);
+  tree shadow_type = char_type_node;
+  tree uintptr_type = lang_hooks.types.type_for_mode (ptr_mode,
+                                                      /*unsignedp=*/true);
+
+  /* We first need to split the current basic block, and start altering
+     the CFG.  This allows us to insert the statements we're about to
+     construct into the right basic blocks.  */
+
+  cond_bb = gimple_bb (gsi_stmt (*instr_gsi));
+  gsi = *instr_gsi;
+  gsi_prev (&gsi);
+  if (! gsi_end_p (gsi))
+    e = split_block (cond_bb, gsi_stmt (gsi));
+  else
+    e = split_block_after_labels (cond_bb);
+  cond_bb = e->src;
+  join_bb = e->dest;
+
+  /* A recap at this point: join_bb is the basic block at whose head
+     is the gimple statement for which this check expression is being
+     built.  cond_bb is the (possibly new, synthetic) basic block the
+     end of which will contain the cache-lookup code, and a
+     conditional that jumps to the cache-miss code or, much more
+     likely, over to join_bb.  */
+
+  /* Create the bb that contains the crash block.  */
+  then_bb = create_empty_bb (cond_bb);
+  make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
+  make_single_succ_edge (then_bb, join_bb, EDGE_FALLTHRU);
+
+  /* Mark the pseudo-fallthrough edge from cond_bb to join_bb.  */
+  e = find_edge (cond_bb, join_bb);
+  e->flags = EDGE_FALSE_VALUE;
+  e->count = cond_bb->count;
+  e->probability = REG_BR_PROB_BASE;
+
+  /* Update dominance info.  Note that bb_join's data was
+     updated by split_block.  */
+  if (dom_info_available_p (CDI_DOMINATORS))
+    {
+      set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
+      set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb);
+    }
+
+  base_addr = make_rename_temp (uintptr_type, "base_addr");
+
+  seq = gimple_seq_alloc ();
+  t = fold_convert_loc (location, uintptr_type,
+                        unshare_expr (base));
+  t = force_gimple_operand (t, &stmts, false, NULL_TREE);
+  gimple_seq_add_seq (&seq, stmts);
+  g = gimple_build_assign (base_addr, t);
+  gimple_set_location (g, location);
+  gimple_seq_add_stmt (&seq, g);
+
+  t = build2 (RSHIFT_EXPR, uintptr_type, base_addr,
+              build_int_cst(uintptr_type, asan_scale)
+             );
+  t = build2 (PLUS_EXPR, uintptr_type, t,
+              build2 (LSHIFT_EXPR, uintptr_type,
+                      build_int_cst(uintptr_type, 1),
+                      build_int_cst(uintptr_type, asan_offset_log)
+                     )
+             );
+  t = build1 (INDIRECT_REF, shadow_type,
+              build1 (VIEW_CONVERT_EXPR, char_ptr_type, t));
+  t = force_gimple_operand (t, &stmts, false, NULL_TREE);
+  gimple_seq_add_seq (&seq, stmts);
+  shadow_value = make_rename_temp (shadow_type, "__asan_shadow");
+  g = gimple_build_assign (shadow_value, t);
+  gimple_set_location (g, location);
+  gimple_seq_add_stmt (&seq, g);
+
+  t = build2 (NE_EXPR, boolean_type_node, shadow_value,
+              build_int_cst(char_type_node, 0));
+
+  if (size < 8)
+    {
+      u = build2 (BIT_AND_EXPR, uintptr_type,
+                  base_addr,
+                  build_int_cst (uintptr_type, 7));
+      u = build1 (CONVERT_EXPR, char_type_node, u);
+      u = build2 (PLUS_EXPR, char_type_node, u,
+                  build_int_cst (char_type_node, size - 1));
+      u = build2 (GE_EXPR, uintptr_type, u, shadow_value);
+    }
+  else
+    {
+      u = build_int_cst (boolean_type_node, 1);
+    }
+
+  t = build2 (TRUTH_AND_EXPR, boolean_type_node, t, u);
+  t = force_gimple_operand (t, &stmts, false, NULL_TREE);
+  gimple_seq_add_seq (&seq, stmts);
+  cond = make_rename_temp (boolean_type_node, "asan_crash_cond");
+  g = gimple_build_assign  (cond, t);
+  gimple_set_location (g, location);
+  gimple_seq_add_stmt (&seq, g);
+
+  g = gimple_build_cond (NE_EXPR, cond, boolean_false_node, NULL_TREE,
+                         NULL_TREE);
+  gimple_set_location (g, location);
+  gimple_seq_add_stmt (&seq, g);
+
+  /* crash */
+  gsi = gsi_last_bb (cond_bb);
+  gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
+  seq = gimple_seq_alloc ();
+  g = gimple_build_call (report_error_func(is_store, size), 1, base_addr);
+  gimple_seq_add_stmt (&seq, g);
+
+
+  /* Insert the check code in the THEN block.  */
+  gsi = gsi_start_bb (then_bb);
+  gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
+
+  *instr_gsi = gsi_start_bb (join_bb);
+}
+
+/* asan: do we need this */
+/* Check whether the given decl, generally a VAR_DECL or PARM_DECL, is
+   eligible for instrumentation.  For the mudflap1 pass, this implies
+   that it should be registered with the libmudflap runtime.  For the
+   mudflap2 pass this means instrumenting an indirection operation with
+   respect to the object.
+*/
+static int
+decl_eligible_p (tree decl)
+{
+  return ((TREE_CODE (decl) == VAR_DECL || TREE_CODE (decl) == PARM_DECL)
+          /* The decl must have its address taken.  In the case of
+             arrays, this flag is also set if the indexes are not
+             compile-time known valid constants.  */
+          /* XXX: not sufficient: return-by-value structs! */
+          && TREE_ADDRESSABLE (decl)
+          /* The type of the variable must be complete.  */
+          && COMPLETE_OR_VOID_TYPE_P (TREE_TYPE (decl))
+          /* The decl hasn't been decomposed somehow.  */
+          && !DECL_HAS_VALUE_EXPR_P (decl));
+}
+
+/* asan: this looks too complex. Can this be done simpler? */
+static void
+transform_derefs (gimple_stmt_iterator *iter, tree *tp,
+                   location_t location, int is_store)
+{
+  tree type, base, addr, t;
+  int size;
+
+  t = *tp;
+  type = TREE_TYPE (t);
+
+  if (type == error_mark_node)
+    return;
+
+  size = tree_to_double_int (TYPE_SIZE (type)).low / 8;
+
+  switch (TREE_CODE (t))
+    {
+    case ARRAY_REF:
+    case COMPONENT_REF:
+      {
+        /* This is trickier than it may first appear.  The reason is
+           that we are looking at expressions from the "inside out" at
+           this point.  We may have a complex nested aggregate/array
+           expression (e.g. "a.b[i].c"), maybe with an indirection as
+           the leftmost operator ("p->a.b.d"), where instrumentation
+           is necessary.  Or we may have an innocent "a.b.c"
+           expression that must not be instrumented.  We need to
+           recurse all the way down the nesting structure to figure it
+           out: looking just at the outer node is not enough.  */
+        tree var;
+        int component_ref_only = (TREE_CODE (t) == COMPONENT_REF);
+        int bitfield_ref_p = (TREE_CODE (t) == COMPONENT_REF
+                              && DECL_BIT_FIELD_TYPE (TREE_OPERAND (t, 1)));
+
+        if (bitfield_ref_p)
+          {
+            return;
+          }
+        /* Iterate to the top of the ARRAY_REF/COMPONENT_REF
+           containment hierarchy to find the outermost VAR_DECL.  */
+        var = TREE_OPERAND (t, 0);
+        while (1)
+          {
+            if (TREE_CODE (var) == ARRAY_REF)
+              {
+                component_ref_only = 0;
+                var = TREE_OPERAND (var, 0);
+              }
+            else if (TREE_CODE (var) == COMPONENT_REF)
+              var = TREE_OPERAND (var, 0);
+            else if (INDIRECT_REF_P (var)
+                     || TREE_CODE (var) == MEM_REF)
+              {
+                base = TREE_OPERAND (var, 0);
+                break;
+              }
+            else if (TREE_CODE (var) == VIEW_CONVERT_EXPR)
+              {
+                var = TREE_OPERAND (var, 0);
+                if (CONSTANT_CLASS_P (var)
+                    && TREE_CODE (var) != STRING_CST)
+                  return;
+              }
+            else
+              {
+                gcc_assert (TREE_CODE (var) == VAR_DECL
+                            || TREE_CODE (var) == PARM_DECL
+                            || TREE_CODE (var) == RESULT_DECL
+                            || TREE_CODE (var) == STRING_CST);
+                /* Don't instrument this access if the underlying
+                   variable is not "eligible".  This test matches
+                   those arrays that have only known-valid indexes,
+                   and thus are not labeled TREE_ADDRESSABLE.  */
+                if (! decl_eligible_p (var) || component_ref_only)
+                  return;
+                else
+                  {
+                    base = build1 (ADDR_EXPR,
+                                   build_pointer_type (TREE_TYPE (var)), var);
+                    break;
+                  }
+              }
+          }
+
+        addr = build1 (ADDR_EXPR, build_pointer_type (type), t);
+
+      }
+      break;
+
+    case INDIRECT_REF:
+      addr = TREE_OPERAND (t, 0);
+      base = addr;
+      break;
+
+    case MEM_REF:
+      addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (TREE_OPERAND (t, 1)),
+                     TREE_OPERAND (t, 0),
+                     fold_convert (sizetype, TREE_OPERAND (t, 1)));
+      base = addr;
+      break;
+
+    case TARGET_MEM_REF:
+      addr = tree_mem_ref_addr (ptr_type_node, t);
+      base = addr;
+      break;
+
+    default:
+      return;
+    }
+
+  if (size != 1 && size != 2 && size != 4 && size != 8)
+    return;
+  build_check_stmt (base, iter, location, is_store, size);
+}
+
+/* asan: this looks too complex. Can this be done simpler? */
+/* Transform
+   1) Memory references.
+   2) BUILTIN_ALLOCA calls.
+*/
+static void
+transform_statements (void)
+{
+  basic_block bb, next;
+  gimple_stmt_iterator i;
+  int saved_last_basic_block = last_basic_block;
+  enum gimple_rhs_class grhs_class;
+
+  bb = ENTRY_BLOCK_PTR ->next_bb;
+  do
+    {
+      next = bb->next_bb;
+      for (i = gsi_start_bb (bb); !gsi_end_p (i); gsi_next (&i))
+        {
+          gimple s = gsi_stmt (i);
+
+          /* Only a few GIMPLE statements can reference memory.  */
+          switch (gimple_code (s))
+            {
+            case GIMPLE_ASSIGN:
+              transform_derefs (&i, gimple_assign_lhs_ptr (s),
+                                   gimple_location (s), 1);
+              transform_derefs (&i, gimple_assign_rhs1_ptr (s),
+                                   gimple_location (s), 0);
+              grhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (s));
+              if (grhs_class == GIMPLE_BINARY_RHS)
+                transform_derefs (&i, gimple_assign_rhs2_ptr (s),
+                                   gimple_location (s), 0);
+              break;
+
+            case GIMPLE_RETURN:
+              if (gimple_return_retval (s) != NULL_TREE)
+                {
+                  transform_derefs (&i, gimple_return_retval_ptr (s),
+                                     gimple_location (s),
+                                     0);
+                }
+              break;
+
+            case GIMPLE_CALL:
+              {
+                tree fndecl = gimple_call_fndecl (s);
+                if (fndecl && (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_ALLOCA))
+                  gimple_call_set_cannot_inline (s, true);
+              }
+              break;
+
+            default:
+              ;
+            }
+        }
+      bb = next;
+    }
+  while (bb && bb->index <= saved_last_basic_block);
+}
+
+/* Module-level instrumentation.
+   - Insert __asan_init() into the list of CTORs.
+   - TODO: insert redzones around globals.
+ */
+void
+asan_finish_file (void)
+{
+  tree ctor_statements = NULL_TREE;
+  append_to_statement_list (build_call_expr (asan_init_func(), 0),
+                            &ctor_statements);
+  cgraph_build_static_cdtor ('I', ctor_statements,
+                             MAX_RESERVED_INIT_PRIORITY-1);
+}
+
+/* Instrument the current function. */
+static unsigned int
+asan_instrument (void)
+{
+  struct gimplify_ctx gctx;
+  int is_64 = 1;  /*TODO*/
+  asan_offset_log = is_64 ? asan_offset_log_64 : asan_offset_log_32;
+  push_gimplify_context (&gctx);
+  transform_statements ();
+  pop_gimplify_context (NULL);
+  return 0;
+}
+
+static bool
+gate_asan (void)
+{
+  return flag_asan != 0;
+}
+
+struct gimple_opt_pass pass_asan =
+{
+ {
+  GIMPLE_PASS,
+  "asan",                               /* name */
+  gate_asan,                            /* gate */
+  asan_instrument,                      /* execute */
+  NULL,                                 /* sub */
+  NULL,                                 /* next */
+  0,                                    /* static_pass_number */
+  TV_NONE,                              /* tv_id */
+  PROP_ssa | PROP_cfg | PROP_gimple_leh,/* properties_required */
+  0,                                    /* properties_provided */
+  0,                                    /* properties_destroyed */
+  0,                                    /* todo_flags_start */
+  TODO_verify_flow | TODO_verify_stmts
+  | TODO_dump_func | TODO_update_ssa    /* todo_flags_finish */
+ }
+};
Index: tree-asan.h
===================================================================
--- tree-asan.h (revision 0)
+++ tree-asan.h (revision 0)
@@ -0,0 +1,26 @@
+/* AddressSanitizer, a fast memory error detector.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Kostya Serebryany <k...@google.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef TREE_ASAN
+#define TREE_ASAN
+
+extern void asan_finish_file(void);
+
+#endif /* TREE_ASAN */
Index: tree-pass.h
===================================================================
--- tree-pass.h (revision 179959)
+++ tree-pass.h (working copy)
@@ -352,6 +352,7 @@
 
 extern struct gimple_opt_pass pass_mudflap_1;
 extern struct gimple_opt_pass pass_mudflap_2;
+extern struct gimple_opt_pass pass_asan;
 extern struct gimple_opt_pass pass_lower_cf;
 extern struct gimple_opt_pass pass_refactor_eh;
 extern struct gimple_opt_pass pass_lower_eh;
Index: toplev.c
===================================================================
--- toplev.c    (revision 179959)
+++ toplev.c    (working copy)
@@ -74,6 +74,7 @@
 #include "value-prof.h"
 #include "alloc-pool.h"
 #include "tree-mudflap.h"
+#include "tree-asan.h"
 #include "tree-pass.h"
 #include "gimple.h"
 #include "tree-ssa-alias.h"
@@ -615,6 +616,10 @@
       if (flag_mudflap)
        mudflap_finish_file ();
 
+      /* File-scope initialization for AddressSanitizer. */
+      if (flag_asan)
+        asan_finish_file();
+
       output_shared_constant_pool ();
       output_object_blocks ();
 
Index: common.opt
===================================================================
--- common.opt  (revision 179959)
+++ common.opt  (working copy)
@@ -887,6 +887,10 @@
 Common Ignore
 Does nothing. Preserved for backward compatibility.
 
+fasan
+Common RejectNegative Report Var(flag_asan)
+Enable AddressSanitizer, a memory error detector
+
 fasynchronous-unwind-tables
 Common Report Var(flag_asynchronous_unwind_tables) Optimization
 Generate unwind tables that are exact at each instruction boundary
Index: Makefile.in
===================================================================
--- Makefile.in (revision 179959)
+++ Makefile.in (working copy)
@@ -1418,6 +1418,7 @@
        toplev.o \
        tracer.o \
        tree-affine.o \
+       tree-asan.o \
        tree-call-cdce.o \
        tree-cfg.o \
        tree-cfgcleanup.o \
@@ -2393,6 +2394,10 @@
    $(TREE_H) $(PARAMS_H) $(FLAGS_H) $(FUNCTION_H) $(EXPR_H) output.h $(RTL_H) \
    $(GGC_H) $(TM_P_H) $(TARGET_H) langhooks.h $(REGS_H) gt-stor-layout.h \
    $(DIAGNOSTIC_CORE_H) $(CGRAPH_H) $(TREE_INLINE_H) $(TREE_DUMP_H) $(GIMPLE_H)
+tree-asan.o : tree-asan.c tree-asan.h $(CONFIG_H) pointer-set.h \
+   $(SYSTEM_H) $(TREE_H) $(GIMPLE_H) \
+   output.h $(DIAGNOSTIC_H) coretypes.h $(TREE_DUMP_H) $(FLAGS_H) \
+   tree-pretty-print.h
 tree-ssa-tail-merge.o: tree-ssa-tail-merge.c \
    $(SYSTEM_H) $(CONFIG_H) coretypes.h $(TM_H) $(BITMAP_H) \
    $(FLAGS_H) $(TM_P_H) $(BASIC_BLOCK_H) output.h \
Index: passes.c
===================================================================
--- passes.c    (revision 179959)
+++ passes.c    (working copy)
@@ -1420,6 +1420,7 @@
   NEXT_PASS (pass_lower_resx);
   NEXT_PASS (pass_nrv);
   NEXT_PASS (pass_mudflap_2);
+  NEXT_PASS (pass_asan);
   NEXT_PASS (pass_cleanup_cfg_post_optimizing);
   NEXT_PASS (pass_warn_function_noreturn);
 

--
This patch is available for review at http://codereview.appspot.com/5272048

Reply via email to