Another shot at optimizing redundant UBSAN_NULL statements.

This time we walk the dominator tree - that should result in
more effective optimization - and keep a list of UBSAN_NULL
statements that dominate the current block, see the comment
before sanopt_optimize_walker.  Statements coming from blocks
that are left during the CFG walk are lazily removed, but I
think that isn't really necessary: see the ??? comment.

E.g. on http://ur1.ca/iohtf this allowed us to drop 8 stmts.

I moved all of this into new sanopt.c file.
(I guess that file includes some headers that we in fact don't
need, but the header flattening doesn't make it easy to check,
there are too many of them.)

Bootstrapped(-ubsan)/regtested on x86_64-linux, ok for trunk?

2014-11-03  Marek Polacek  <pola...@redhat.com>

        * Makefile.in (OBJS): Add sanopt.o.
        (GTFILES): Add sanopt.c.
        * asan.h (asan_expand_check_ifn): Declare.
        * asan.c (asan_expand_check_ifn): No longer static.
        (class pass_sanopt, pass_sanopt::execute, make_pass_sanopt): Move...
        * sanopt.c: ...here.  New file.
testsuite/
        * c-c++-common/ubsan/align-2.c: Remove dg-output.
        * c-c++-common/ubsan/align-4.c: Likewise.
        * g++.dg/ubsan/null-1.C: Likewise.
        * g++.dg/ubsan/null-2.C: Likewise.

diff --git gcc/Makefile.in gcc/Makefile.in
index 9c67fe2..f383032 100644
--- gcc/Makefile.in
+++ gcc/Makefile.in
@@ -1376,6 +1376,7 @@ OBJS = \
        asan.o \
        tsan.o \
        ubsan.o \
+       sanopt.o \
        tree-call-cdce.o \
        tree-cfg.o \
        tree-cfgcleanup.o \
@@ -2305,6 +2306,7 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h 
$(srcdir)/coretypes.h \
   $(srcdir)/asan.c \
   $(srcdir)/ubsan.c \
   $(srcdir)/tsan.c \
+  $(srcdir)/sanopt.c \
   $(srcdir)/ipa-devirt.c \
   $(srcdir)/internal-fn.h \
   @all_gtfiles@
diff --git gcc/asan.c gcc/asan.c
index 8f146d2..79dede7 100644
--- gcc/asan.c
+++ gcc/asan.c
@@ -2497,7 +2497,7 @@ asan_finish_file (void)
 
 /* Expand the ASAN_{LOAD,STORE} builtins.  */
 
-static bool
+bool
 asan_expand_check_ifn (gimple_stmt_iterator *iter, bool use_calls)
 {
   gimple g = gsi_stmt (*iter);
@@ -2800,114 +2800,4 @@ make_pass_asan_O0 (gcc::context *ctxt)
   return new pass_asan_O0 (ctxt);
 }
 
-/* Perform optimization of sanitize functions.  */
-
-namespace {
-
-const pass_data pass_data_sanopt =
-{
-  GIMPLE_PASS, /* type */
-  "sanopt", /* name */
-  OPTGROUP_NONE, /* optinfo_flags */
-  TV_NONE, /* tv_id */
-  ( PROP_ssa | PROP_cfg | PROP_gimple_leh ), /* properties_required */
-  0, /* properties_provided */
-  0, /* properties_destroyed */
-  0, /* todo_flags_start */
-  TODO_update_ssa, /* todo_flags_finish */
-};
-
-class pass_sanopt : public gimple_opt_pass
-{
-public:
-  pass_sanopt (gcc::context *ctxt)
-    : gimple_opt_pass (pass_data_sanopt, ctxt)
-  {}
-
-  /* opt_pass methods: */
-  virtual bool gate (function *) { return flag_sanitize; }
-  virtual unsigned int execute (function *);
-
-}; // class pass_sanopt
-
-unsigned int
-pass_sanopt::execute (function *fun)
-{
-  basic_block bb;
-
-  int asan_num_accesses = 0;
-  if (flag_sanitize & SANITIZE_ADDRESS)
-    {
-      gimple_stmt_iterator gsi;
-      FOR_EACH_BB_FN (bb, fun)
-       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-         {
-           gimple stmt = gsi_stmt (gsi);
-           if (is_gimple_call (stmt) && gimple_call_internal_p (stmt)
-               && gimple_call_internal_fn (stmt) == IFN_ASAN_CHECK)
-             ++asan_num_accesses;
-         }
-    }
-
-  bool use_calls = ASAN_INSTRUMENTATION_WITH_CALL_THRESHOLD < INT_MAX
-    && asan_num_accesses >= ASAN_INSTRUMENTATION_WITH_CALL_THRESHOLD;
-
-  FOR_EACH_BB_FN (bb, fun)
-    {
-      gimple_stmt_iterator gsi;
-      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
-       {
-         gimple stmt = gsi_stmt (gsi);
-         bool no_next = false;
-
-         if (!is_gimple_call (stmt))
-           {
-             gsi_next (&gsi);
-             continue;
-           }
-
-         if (gimple_call_internal_p (stmt))
-           {
-             enum internal_fn ifn = gimple_call_internal_fn (stmt);
-             switch (ifn)
-               {
-               case IFN_UBSAN_NULL:
-                 no_next = ubsan_expand_null_ifn (&gsi);
-                 break;
-               case IFN_UBSAN_BOUNDS:
-                 no_next = ubsan_expand_bounds_ifn (&gsi);
-                 break;
-               case IFN_UBSAN_OBJECT_SIZE:
-                 no_next = ubsan_expand_objsize_ifn (&gsi);
-                 break;
-               case IFN_ASAN_CHECK:
-                 no_next = asan_expand_check_ifn (&gsi, use_calls);
-                 break;
-               default:
-                 break;
-               }
-           }
-
-         if (dump_file && (dump_flags & TDF_DETAILS))
-           {
-             fprintf (dump_file, "Optimized\n  ");
-             print_gimple_stmt (dump_file, stmt, 0, dump_flags);
-             fprintf (dump_file, "\n");
-           }
-
-         if (!no_next)
-           gsi_next (&gsi);
-       }
-    }
-  return 0;
-}
-
-} // anon namespace
-
-gimple_opt_pass *
-make_pass_sanopt (gcc::context *ctxt)
-{
-  return new pass_sanopt (ctxt);
-}
-
 #include "gt-asan.h"
diff --git gcc/asan.h gcc/asan.h
index 8e3f0ba..f448391 100644
--- gcc/asan.h
+++ gcc/asan.h
@@ -28,6 +28,7 @@ extern rtx_insn *asan_emit_stack_protection (rtx, rtx, 
unsigned int,
 extern bool asan_protect_global (tree);
 extern void initialize_sanitizer_builtins (void);
 extern tree asan_dynamic_init_call (bool);
+extern bool asan_expand_check_ifn (gimple_stmt_iterator *, bool);
 
 extern gimple_stmt_iterator create_cond_insert_point
      (gimple_stmt_iterator *, bool, bool, bool, basic_block *, basic_block *);
diff --git gcc/sanopt.c gcc/sanopt.c
index e69de29..b8d6183 100644
--- gcc/sanopt.c
+++ gcc/sanopt.c
@@ -0,0 +1,316 @@
+/* Optimize and expand sanitizer functions.
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   Contributed by Marek Polacek <pola...@redhat.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "hash-table.h"
+#include "predict.h"
+#include "vec.h"
+#include "hashtab.h"
+#include "hash-set.h"
+#include "machmode.h"
+#include "tm.h"
+#include "hard-reg-set.h"
+#include "input.h"
+#include "function.h"
+#include "dominance.h"
+#include "cfg.h"
+#include "cfganal.h"
+#include "basic-block.h"
+#include "tree-ssa-alias.h"
+#include "internal-fn.h"
+#include "gimple-expr.h"
+#include "is-a.h"
+#include "inchash.h"
+#include "gimple.h"
+#include "gimplify.h"
+#include "gimple-iterator.h"
+#include "calls.h"
+#include "varasm.h"
+#include "stor-layout.h"
+#include "hash-map.h"
+#include "plugin-api.h"
+#include "ipa-ref.h"
+#include "cgraph.h"
+#include "stringpool.h"
+#include "tree-ssanames.h"
+#include "tree-pass.h"
+#include "asan.h"
+#include "gimple-pretty-print.h"
+#include "target.h"
+#include "expr.h"
+#include "output.h"
+#include "tm_p.h"
+#include "langhooks.h"
+#include "ubsan.h"
+#include "params.h"
+
+
+/* This is used to carry information about basic blocks.  It is
+   attached to the AUX field of the standard CFG block.  */
+
+struct sanopt_info
+{
+  /* True if this BB has been visited.  */
+  bool visited_p;
+};
+
+
+/* Try to optimize away redundant UBSAN_NULL checks.
+   
+   We walk blocks in the CFG via a depth first search of the dominator
+   tree; we push unique UBSAN_NULL statements into a vector in the
+   NULL_CHECK_MAP as we enter the blocks.  When leaving a block, we
+   mark the block as visited; then when checking the statements in the
+   vector, we ignore statements that are coming from already visited
+   blocks, because these cannot dominate anything anymore.  */
+
+static void
+sanopt_optimize_walker (basic_block bb,
+                       hash_map<tree, auto_vec<gimple> > *map)
+{
+  basic_block son;
+  gimple_stmt_iterator gsi;
+
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
+    {
+      gimple stmt = gsi_stmt (gsi);
+      bool remove = false;
+
+      if (is_gimple_call (stmt)
+         && gimple_call_internal_p (stmt))
+       switch (gimple_call_internal_fn (stmt))
+         {
+         case IFN_UBSAN_NULL:
+           {
+             gcc_assert (gimple_call_num_args (stmt) == 3);
+             tree ptr = gimple_call_arg (stmt, 0);
+             tree cur_align = gimple_call_arg (stmt, 2);
+             gcc_assert (TREE_CODE (cur_align) == INTEGER_CST);
+
+             auto_vec<gimple> &v = map->get_or_insert (ptr);
+             if (v.is_empty ())
+               /* For this PTR we don't have any UBSAN_NULL stmts
+                  recorded, so there's nothing to optimize yet.  */
+               v.safe_push (stmt);
+             else
+               {
+                 /* We already have recorded a UBSAN_NULL check
+                    for this pointer.  Perhaps we can drop this one.
+                    But only if this check doesn't specify stricter
+                    alignment.  */
+                 int i;
+                 gimple g;
+
+                 FOR_EACH_VEC_ELT (v, i, g)
+                   {
+                     /* Remove statements for BBs that have been
+                        already processed.  */
+                     sanopt_info *si = (sanopt_info *) gimple_bb (g)->aux;
+                     if (si->visited_p)
+                       {
+                         /* ??? This might be unneccesary; we could just
+                            skip the stale statements.  */
+                         v.unordered_remove (i);
+                         continue;
+                       }
+                     tree align = gimple_call_arg (g, 2);
+                     if (tree_int_cst_le (cur_align, align))
+                       {
+                         remove = true;
+                         break;
+                       }
+                   }
+
+                 if (remove)
+                   {
+                     /* Drop this check.  */
+                     if (dump_file && (dump_flags & TDF_DETAILS))
+                       {
+                         fprintf (dump_file, "Optimizing out\n  ");
+                         print_gimple_stmt (dump_file, stmt, 0,
+                                            dump_flags);
+                         fprintf (dump_file, "\n");
+                       }
+                     gsi_remove (&gsi, true);
+                   }
+                 else if (v.length () < 30)
+                   v.safe_push (stmt);
+                 }
+           }
+         default:
+           break;
+         }
+
+      /* If we were able to remove the current statement, gsi_remove
+        already pointed us to the next statement.  */
+      if (!remove)
+       gsi_next (&gsi);
+    }
+
+  for (son = first_dom_son (CDI_DOMINATORS, bb);
+       son;
+       son = next_dom_son (CDI_DOMINATORS, son))
+    sanopt_optimize_walker (son, map);
+
+  /* We're leaving this BB, so mark it to that effect.  */
+  sanopt_info *info = (sanopt_info *) bb->aux;
+  info->visited_p = true;
+}
+
+/* Try to remove redundant sanitizer checks in function FUN.  */
+
+static void
+sanopt_optimize (function *fun)
+{
+  /* This map maps a pointer (the first argument of UBSAN_NULL) to
+     a vector of UBSAN_NULL call statements that check this pointer.  */
+  hash_map<tree, auto_vec<gimple> > null_check_map;
+
+  /* Set up block info for each basic block.  */
+  alloc_aux_for_blocks (sizeof (sanopt_info));
+
+  /* We're going to do a dominator walk, so ensure that we have
+     dominance information.  */
+  calculate_dominance_info (CDI_DOMINATORS);
+
+  /* Recursively walk the dominator tree optimizing away
+     redundant checks.  */
+  sanopt_optimize_walker (ENTRY_BLOCK_PTR_FOR_FN (fun), &null_check_map);
+
+  free_aux_for_blocks ();
+}
+
+/* Perform optimization of sanitize functions.  */
+
+namespace {
+
+const pass_data pass_data_sanopt =
+{
+  GIMPLE_PASS, /* type */
+  "sanopt", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  ( PROP_ssa | PROP_cfg | PROP_gimple_leh ), /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_sanopt : public gimple_opt_pass
+{
+public:
+  pass_sanopt (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_sanopt, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *) { return flag_sanitize; }
+  virtual unsigned int execute (function *);
+
+}; // class pass_sanopt
+
+unsigned int
+pass_sanopt::execute (function *fun)
+{
+  basic_block bb;
+
+  /* Try to remove redundant checks.  */
+  if (optimize
+      && (flag_sanitize & (SANITIZE_NULL | SANITIZE_ALIGNMENT)))
+    sanopt_optimize (fun);
+
+  int asan_num_accesses = 0;
+  if (flag_sanitize & SANITIZE_ADDRESS)
+    {
+      gimple_stmt_iterator gsi;
+      FOR_EACH_BB_FN (bb, fun)
+       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+         {
+           gimple stmt = gsi_stmt (gsi);
+           if (is_gimple_call (stmt) && gimple_call_internal_p (stmt)
+               && gimple_call_internal_fn (stmt) == IFN_ASAN_CHECK)
+             ++asan_num_accesses;
+         }
+    }
+
+  bool use_calls = ASAN_INSTRUMENTATION_WITH_CALL_THRESHOLD < INT_MAX
+    && asan_num_accesses >= ASAN_INSTRUMENTATION_WITH_CALL_THRESHOLD;
+
+  FOR_EACH_BB_FN (bb, fun)
+    {
+      gimple_stmt_iterator gsi;
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
+       {
+         gimple stmt = gsi_stmt (gsi);
+         bool no_next = false;
+
+         if (!is_gimple_call (stmt))
+           {
+             gsi_next (&gsi);
+             continue;
+           }
+
+         if (gimple_call_internal_p (stmt))
+           {
+             enum internal_fn ifn = gimple_call_internal_fn (stmt);
+             switch (ifn)
+               {
+               case IFN_UBSAN_NULL:
+                 no_next = ubsan_expand_null_ifn (&gsi);
+                 break;
+               case IFN_UBSAN_BOUNDS:
+                 no_next = ubsan_expand_bounds_ifn (&gsi);
+                 break;
+               case IFN_UBSAN_OBJECT_SIZE:
+                 no_next = ubsan_expand_objsize_ifn (&gsi);
+                 break;
+               case IFN_ASAN_CHECK:
+                 no_next = asan_expand_check_ifn (&gsi, use_calls);
+                 break;
+               default:
+                 break;
+               }
+           }
+
+         if (dump_file && (dump_flags & TDF_DETAILS))
+           {
+             fprintf (dump_file, "Expanded\n  ");
+             print_gimple_stmt (dump_file, stmt, 0, dump_flags);
+             fprintf (dump_file, "\n");
+           }
+
+         if (!no_next)
+           gsi_next (&gsi);
+       }
+    }
+  return 0;
+}
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_sanopt (gcc::context *ctxt)
+{
+  return new pass_sanopt (ctxt);
+}
diff --git gcc/testsuite/c-c++-common/ubsan/align-2.c 
gcc/testsuite/c-c++-common/ubsan/align-2.c
index 071de8c..02a26e2 100644
--- gcc/testsuite/c-c++-common/ubsan/align-2.c
+++ gcc/testsuite/c-c++-common/ubsan/align-2.c
@@ -51,6 +51,4 @@ main ()
 /* { dg-output "\.c:(13|16):\[0-9]*: \[^\n\r]*store to misaligned address 
0x\[0-9a-fA-F]* for type 'int', which requires 4 byte alignment.*" } */
 /* { dg-output "\.c:23:\[0-9]*: \[^\n\r]*member access within misaligned 
address 0x\[0-9a-fA-F]* for type 'struct S', which requires \[48] byte 
alignment.*" } */
 /* { dg-output "\.c:(29|30):\[0-9]*: \[^\n\r]*member access within misaligned 
address 0x\[0-9a-fA-F]* for type 'struct S', which requires \[48] byte 
alignment.*" } */
-/* { dg-output "\.c:30:\[0-9]*: \[^\n\r]*member access within misaligned 
address 0x\[0-9a-fA-F]* for type 'struct S', which requires \[48] byte 
alignment.*" } */
-/* { dg-output "\.c:31:\[0-9]*: \[^\n\r]*member access within misaligned 
address 0x\[0-9a-fA-F]* for type 'struct S', which requires \[48] byte 
alignment.*" } */
 /* { dg-output "\.c:37:\[0-9]*: \[^\n\r]*load of misaligned address 
0x\[0-9a-fA-F]* for type 'long long int', which requires \[48] byte alignment" 
} */
diff --git gcc/testsuite/c-c++-common/ubsan/align-4.c 
gcc/testsuite/c-c++-common/ubsan/align-4.c
index 3252595..f010589 100644
--- gcc/testsuite/c-c++-common/ubsan/align-4.c
+++ gcc/testsuite/c-c++-common/ubsan/align-4.c
@@ -9,6 +9,4 @@
 /* { dg-output "\[^\n\r]*\.c:(13|16):\[0-9]*: \[^\n\r]*store to misaligned 
address 0x\[0-9a-fA-F]* for type 'int', which requires 4 byte alignment.*" } */
 /* { dg-output "\[^\n\r]*\.c:23:\[0-9]*: \[^\n\r]*member access within 
misaligned address 0x\[0-9a-fA-F]* for type 'struct S', which requires \[48] 
byte alignment.*" } */
 /* { dg-output "\[^\n\r]*\.c:(29|30):\[0-9]*: \[^\n\r]*member access within 
misaligned address 0x\[0-9a-fA-F]* for type 'struct S', which requires \[48] 
byte alignment.*" } */
-/* { dg-output "\[^\n\r]*\.c:30:\[0-9]*: \[^\n\r]*member access within 
misaligned address 0x\[0-9a-fA-F]* for type 'struct S', which requires \[48] 
byte alignment.*" } */
-/* { dg-output "\[^\n\r]*\.c:31:\[0-9]*: \[^\n\r]*member access within 
misaligned address 0x\[0-9a-fA-F]* for type 'struct S', which requires \[48] 
byte alignment.*" } */
 /* { dg-output "\[^\n\r]*\.c:37:\[0-9]*: \[^\n\r]*load of misaligned address 
0x\[0-9a-fA-F]* for type 'long long int', which requires \[48] byte alignment" 
} */
diff --git gcc/testsuite/g++.dg/ubsan/null-1.C 
gcc/testsuite/g++.dg/ubsan/null-1.C
index e1524b1..83b3033 100644
--- gcc/testsuite/g++.dg/ubsan/null-1.C
+++ gcc/testsuite/g++.dg/ubsan/null-1.C
@@ -25,6 +25,4 @@ main (void)
 }
 
 // { dg-output "reference binding to null pointer of type 'int'(\n|\r\n|\r)" }
-// { dg-output "\[^\n\r]*reference binding to null pointer of type 
'int'(\n|\r\n|\r)" }
 // { dg-output "\[^\n\r]*reference binding to null pointer of type 'const 
L'(\n|\r\n|\r)" }
-// { dg-output "\[^\n\r]*reference binding to null pointer of type 
'int'(\n|\r\n|\r)" }
diff --git gcc/testsuite/g++.dg/ubsan/null-2.C 
gcc/testsuite/g++.dg/ubsan/null-2.C
index 88f387e..0230c7c 100644
--- gcc/testsuite/g++.dg/ubsan/null-2.C
+++ gcc/testsuite/g++.dg/ubsan/null-2.C
@@ -35,5 +35,3 @@ main (void)
 
 // { dg-output "\.C:26:\[0-9]*:\[\^\n\r]*member call on null pointer of type 
'struct U'.*" }
 // { dg-output "\.C:29:\[0-9]*:\[\^\n\r]*member call on null pointer of type 
'struct V'.*" }
-// { dg-output "\.C:31:\[0-9]*:\[\^\n\r]*member call on null pointer of type 
'struct V'.*" }
-// { dg-output "\.C:33:\[0-9]*:\[\^\n\r]*member call on null pointer of type 
'struct V'" }

        Marek

Reply via email to