Hi!

ubsan, asan (both PR112709) and _BitInt lowering (PR113466) want to
insert some instrumentation or adjustment statements before some statement.
This unfortunately creates invalid IL if inserting before a returns_twice
call, because we require that such calls are the first statement in a basic
block and that we have an edge from the .ABNORMAL_DISPATCHER block to
the block containing the returns_twice call (in addition to other edge(s)).

The following patch adds helper functions for such insertions and uses it
for now in ubsan (I'll post a follow up which uses it in asan and will
work later on the _BitInt lowering PR).

In particular, if the bb with returns_twice call at the start has just
2 edges, one EDGE_ABNORMAL from .ABNORMAL_DISPATCHER and another
(non-EDGE_ABNORMAL/EDGE_EH) from some other bb, it just inserts the
statement or sequence on that other edge.
If the bb has more predecessor edges or the one not from
.ABNORMAL_DISPATCHER is e.g. an EH edge (this latter case likely shouldn't
happen, one would need labels or something like that), the patch splits the
block with returns_twice call such that there is just one edge next to
.ABNORMAL_DISPATCHER edge and adjusts PHIs as needed to make it happen.
The functions also replace uses of PHIs from the returns_twice bb with
the corresponding PHI arguments, because otherwise it would be invalid IL.

E.g. in ubsan/pr112709-2.c (qux) we have before the ubsan pass
  <bb 10> :
  # .MEM_5(ab) = PHI <.MEM_4(9), .MEM_25(ab)(11)>
  # _7(ab) = PHI <_20(9), _8(ab)(11)>
  # .MEM_21(ab) = VDEF <.MEM_5(ab)>
  _22 = bar (*_7(ab));
where bar is returns_twice call and bb 11 has .ABNORMAL_DISPATCHER call,
this patch instruments it like:
  <bb 9> :
  # .MEM_4 = PHI <.MEM_17(ab)(4), .MEM_10(D)(5), .MEM_14(ab)(8)>
  # DEBUG BEGIN_STMT
  # VUSE <.MEM_4>
  _20 = p;
  # .MEM_27 = VDEF <.MEM_4>
  .UBSAN_NULL (_20, 0B, 0);
  # VUSE <.MEM_27>
  _2 = __builtin_dynamic_object_size (_20, 0);
  # .MEM_28 = VDEF <.MEM_27>
  .UBSAN_OBJECT_SIZE (_20, 1024, _2, 0);
  
  <bb 10> :
  # .MEM_5(ab) = PHI <.MEM_28(9), .MEM_25(ab)(11)>
  # _7(ab) = PHI <_20(9), _8(ab)(11)>
  # .MEM_21(ab) = VDEF <.MEM_5(ab)>
  _22 = bar (*_7(ab));
The edge from .ABNORMAL_DISPATCHER is there just to represent the
returning for 2nd and later times, the instrumentation can't be
done at that point as there is no code executed during that point.
The ubsan/pr112709-1.c testcase includes non-virtual PHIs to cover
the handling of those as well.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2024-03-12  Jakub Jelinek  <ja...@redhat.com>

        PR sanitizer/112709
        * gimple-iterator.h (gsi_insert_before_returns_twice_call,
        gsi_insert_seq_before_returns_twice_call): Declare.
        * gimple-iterator.cc: Include gimplify.h.
        (edge_before_returns_twice_call, gsi_insert_before_returns_twice_call,
        gsi_insert_seq_before_returns_twice_call): New functions.
        * ubsan.cc (ubsan_insert_before, ubsan_insert_seq_before): New
        functions.
        (instrument_mem_ref, instrument_pointer_overflow,
        instrument_nonnull_arg, instrument_nonnull_return): Use
        ubsan_insert_before instead of gsi_insert_before.
        (maybe_instrument_pointer_overflow): Use force_gimple_operand,
        gimple_seq_add_seq_without_update and ubsan_insert_seq_before
        instead of force_gimple_operand_gsi.
        (instrument_object_size): Likewise.  Use ubsan_insert_before instead
        of gsi_insert_before.

        * gcc.dg/ubsan/pr112709-1.c: New test.
        * gcc.dg/ubsan/pr112709-2.c: New test.

--- gcc/gimple-iterator.h.jj    2024-02-08 11:17:37.144661383 +0100
+++ gcc/gimple-iterator.h       2024-03-11 16:16:24.670464737 +0100
@@ -93,6 +93,10 @@ extern void gsi_insert_on_edge (edge, gi
 extern void gsi_insert_seq_on_edge (edge, gimple_seq);
 extern basic_block gsi_insert_on_edge_immediate (edge, gimple *);
 extern basic_block gsi_insert_seq_on_edge_immediate (edge, gimple_seq);
+extern basic_block gsi_insert_before_returns_twice_call (basic_block,
+                                                        gimple *);
+extern basic_block gsi_insert_seq_before_returns_twice_call (basic_block,
+                                                            gimple_seq);
 extern void gsi_commit_edge_inserts (void);
 extern void gsi_commit_one_edge_insert (edge, basic_block *);
 extern gphi_iterator gsi_start_phis (basic_block);
--- gcc/gimple-iterator.cc.jj   2024-02-08 11:17:37.144661383 +0100
+++ gcc/gimple-iterator.cc      2024-03-11 18:17:08.994804808 +0100
@@ -32,6 +32,7 @@ along with GCC; see the file COPYING3.
 #include "tree-cfg.h"
 #include "tree-ssa.h"
 #include "value-prof.h"
+#include "gimplify.h"
 
 
 /* Mark the statement STMT as modified, and update it.  */
@@ -944,3 +945,135 @@ gsi_start_phis (basic_block bb)
 
   return i;
 }
+
+/* Helper function for gsi_insert_before_returns_twice_call and
+   gsi_insert_seq_before_returns_twice_call.  Find edge to
+   insert statements before returns_twice call at the start of BB,
+   if there isn't just one, split the bb and adjust PHIs to ensure
+   that.  */
+
+static edge
+edge_before_returns_twice_call (basic_block bb)
+{
+  gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
+  gcc_checking_assert (is_gimple_call (gsi_stmt (gsi))
+                      && (gimple_call_flags (gsi_stmt (gsi))
+                          & ECF_RETURNS_TWICE) != 0);
+  edge_iterator ei;
+  edge e, ad_edge = NULL, other_edge = NULL;
+  bool split = false;
+  /* Look for a fallthru and possibly a single backedge.  */
+  FOR_EACH_EDGE (e, ei, bb->preds)
+    {
+      if ((e->flags & (EDGE_ABNORMAL | EDGE_EH)) == EDGE_ABNORMAL)
+       {
+         gimple_stmt_iterator gsi
+           = gsi_start_nondebug_after_labels_bb (e->src);
+         gimple *ad = gsi_stmt (gsi);
+         if (ad && gimple_call_internal_p (ad, IFN_ABNORMAL_DISPATCHER))
+           {
+             gcc_checking_assert (ad_edge == NULL);
+             ad_edge = e;
+             continue;
+           }
+       }
+      if (other_edge || e->flags & (EDGE_ABNORMAL | EDGE_EH))
+       split = true;
+      other_edge = e;
+    }
+  gcc_checking_assert (ad_edge);
+  if (other_edge == NULL)
+    split = true;
+  if (split)
+    {
+      other_edge = split_block_after_labels (bb);
+      e = make_edge (ad_edge->src, other_edge->dest, EDGE_ABNORMAL);
+      for (gphi_iterator gsi = gsi_start_phis (other_edge->src);
+          !gsi_end_p (gsi); gsi_next (&gsi))
+       {
+         gphi *phi = gsi.phi ();
+         tree lhs = gimple_phi_result (phi);
+         tree new_lhs;
+         if (virtual_operand_p (lhs))
+           new_lhs = make_ssa_name (SSA_NAME_VAR (lhs));
+         else
+           new_lhs = make_ssa_name (TREE_TYPE (lhs));
+         gimple_phi_set_result (phi, new_lhs);
+         gphi *new_phi = create_phi_node (lhs, other_edge->dest);
+         add_phi_arg (new_phi, new_lhs, other_edge, UNKNOWN_LOCATION);
+         add_phi_arg (new_phi, gimple_phi_arg_def_from_edge (phi, ad_edge),
+                      e, gimple_phi_arg_location_from_edge (phi, ad_edge));
+       }
+      remove_edge (ad_edge);
+    }
+  return other_edge;
+}
+
+/* Insert G before BB which starts with an ECF_RETURNS_TWICE call.
+   If BB has a single predecessor edge except for edge from
+   .ABNORMAL_DISPATCHER, insert on that edge, otherwise split
+   BB before the call, adjust PHIs and insert into the new BB.
+   If G refers to any results of BB's PHIs, replace them afterwards
+   with corresponding PHI arg.  */
+
+basic_block
+gsi_insert_before_returns_twice_call (basic_block bb, gimple *g)
+{
+  edge e = edge_before_returns_twice_call (bb);
+  basic_block new_bb = gsi_insert_on_edge_immediate (e, g);
+  use_operand_p use_p;
+  ssa_op_iter iter;
+  bool m = false;
+  FOR_EACH_SSA_USE_OPERAND (use_p, g, iter, SSA_OP_USE)
+    {
+      tree s = USE_FROM_PTR (use_p);
+      if (SSA_NAME_DEF_STMT (s)
+         && gimple_code (SSA_NAME_DEF_STMT (s)) == GIMPLE_PHI
+         && gimple_bb (SSA_NAME_DEF_STMT (s)) == e->dest)
+       {
+         tree r = gimple_phi_arg_def_from_edge (SSA_NAME_DEF_STMT (s), e);
+         SET_USE (use_p, unshare_expr (r));
+         m = true;
+       }
+    }
+  if (m)
+    update_stmt (g);
+  return new_bb;
+}
+
+/* Similiarly for sequence SEQ.  */
+
+basic_block
+gsi_insert_seq_before_returns_twice_call (basic_block bb, gimple_seq seq)
+{
+  if (gimple_seq_empty_p (seq))
+    return NULL;
+  gimple *f = gimple_seq_first_stmt (seq);
+  gimple *l = gimple_seq_last_stmt (seq);
+  edge e = edge_before_returns_twice_call (bb);
+  basic_block new_bb = gsi_insert_seq_on_edge_immediate (e, seq);
+  use_operand_p use_p;
+  ssa_op_iter iter;
+  for (gimple_stmt_iterator gsi = gsi_for_stmt (f); ; gsi_next (&gsi))
+    {
+      gimple *g = gsi_stmt (gsi);
+      bool m = false;
+      FOR_EACH_SSA_USE_OPERAND (use_p, g, iter, SSA_OP_USE)
+       {
+         tree s = USE_FROM_PTR (use_p);
+         if (SSA_NAME_DEF_STMT (s)
+             && gimple_code (SSA_NAME_DEF_STMT (s)) == GIMPLE_PHI
+             && gimple_bb (SSA_NAME_DEF_STMT (s)) == e->dest)
+           {
+             tree r = gimple_phi_arg_def_from_edge (SSA_NAME_DEF_STMT (s), e);
+             SET_USE (use_p, unshare_expr (r));
+             m = true;
+           }
+       }
+      if (m)
+       update_stmt (g);
+      if (g == l)
+       break;
+    }
+  return new_bb;
+}
--- gcc/ubsan.cc.jj     2024-01-03 11:51:32.312720350 +0100
+++ gcc/ubsan.cc        2024-03-11 17:07:58.104243341 +0100
@@ -1432,6 +1432,37 @@ ubsan_expand_vptr_ifn (gimple_stmt_itera
   return true;
 }
 
+/* Insert G stmt before ITER.  If ITER is a returns_twice call,
+   insert it on an appropriate edge instead.  */
+
+static void
+ubsan_insert_before (gimple_stmt_iterator *iter, gimple *g)
+{
+  gimple *stmt = gsi_stmt (*iter);
+  if (stmt
+      && is_gimple_call (stmt)
+      && (gimple_call_flags (stmt) & ECF_RETURNS_TWICE) != 0)
+    gsi_insert_before_returns_twice_call (gsi_bb (*iter), g);
+  else
+    gsi_insert_before (iter, g, GSI_SAME_STMT);
+}
+
+/* Similarly for sequence SEQ.  */
+
+static void
+ubsan_insert_seq_before (gimple_stmt_iterator *iter, gimple_seq seq)
+{
+  if (gimple_seq_empty_p (seq))
+    return;
+  gimple *stmt = gsi_stmt (*iter);
+  if (stmt
+      && is_gimple_call (stmt)
+      && (gimple_call_flags (stmt) & ECF_RETURNS_TWICE) != 0)
+    gsi_insert_seq_before_returns_twice_call (gsi_bb (*iter), seq);
+  else
+    gsi_insert_seq_before (iter, seq, GSI_SAME_STMT);
+}
+
 /* Instrument a memory reference.  BASE is the base of MEM, IS_LHS says
    whether the pointer is on the left hand side of the assignment.  */
 
@@ -1458,7 +1489,7 @@ instrument_mem_ref (tree mem, tree base,
   tree alignt = build_int_cst (pointer_sized_int_node, align);
   gcall *g = gimple_build_call_internal (IFN_UBSAN_NULL, 3, t, kind, alignt);
   gimple_set_location (g, gimple_location (gsi_stmt (*iter)));
-  gsi_insert_before (iter, g, GSI_SAME_STMT);
+  ubsan_insert_before (iter, g);
 }
 
 /* Perform the pointer instrumentation.  */
@@ -1485,7 +1516,7 @@ instrument_pointer_overflow (gimple_stmt
     return;
   gcall *g = gimple_build_call_internal (IFN_UBSAN_PTR, 2, ptr, off);
   gimple_set_location (g, gimple_location (gsi_stmt (*gsi)));
-  gsi_insert_before (gsi, g, GSI_SAME_STMT);
+  ubsan_insert_before (gsi, g);
 }
 
 /* Instrument pointer arithmetics if any.  */
@@ -1577,10 +1608,11 @@ maybe_instrument_pointer_overflow (gimpl
       else
        t = fold_convert (sizetype, moff);
     }
-  t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, true,
-                               GSI_SAME_STMT);
-  base_addr = force_gimple_operand_gsi (gsi, base_addr, true, NULL_TREE, true,
-                                       GSI_SAME_STMT);
+  gimple_seq seq, this_seq;
+  t = force_gimple_operand (t, &seq, true, NULL_TREE);
+  base_addr = force_gimple_operand (base_addr, &this_seq, true, NULL_TREE);
+  gimple_seq_add_seq_without_update (&seq, this_seq);
+  ubsan_insert_seq_before (gsi, seq);
   instrument_pointer_overflow (gsi, base_addr, t);
 }
 
@@ -2035,7 +2067,7 @@ instrument_nonnull_arg (gimple_stmt_iter
            {
              g = gimple_build_assign (make_ssa_name (TREE_TYPE (arg)), arg);
              gimple_set_location (g, loc[0]);
-             gsi_insert_before (gsi, g, GSI_SAME_STMT);
+             ubsan_insert_before (gsi, g);
              arg = gimple_assign_lhs (g);
            }
 
@@ -2068,7 +2100,7 @@ instrument_nonnull_arg (gimple_stmt_iter
              g = gimple_build_call (fn, 1, data);
            }
          gimple_set_location (g, loc[0]);
-         gsi_insert_before (gsi, g, GSI_SAME_STMT);
+         ubsan_insert_before (gsi, g);
          ubsan_create_edge (g);
        }
       *gsi = gsi_for_stmt (stmt);
@@ -2124,7 +2156,7 @@ instrument_nonnull_return (gimple_stmt_i
          g = gimple_build_call (fn, 2, data, data2);
        }
       gimple_set_location (g, loc[0]);
-      gsi_insert_before (gsi, g, GSI_SAME_STMT);
+      ubsan_insert_before (gsi, g);
       ubsan_create_edge (g);
       *gsi = gsi_for_stmt (stmt);
     }
@@ -2231,6 +2263,7 @@ instrument_object_size (gimple_stmt_iter
   tree sizet;
   tree base_addr = base;
   gimple *bos_stmt = NULL;
+  gimple_seq seq = NULL;
   if (decl_p)
     base_addr = build1 (ADDR_EXPR,
                        build_pointer_type (TREE_TYPE (base)), base);
@@ -2244,19 +2277,12 @@ instrument_object_size (gimple_stmt_iter
       sizet = builtin_decl_explicit (BUILT_IN_DYNAMIC_OBJECT_SIZE);
       sizet = build_call_expr_loc (loc, sizet, 2, base_addr,
                                   integer_zero_node);
-      sizet = force_gimple_operand_gsi (gsi, sizet, false, NULL_TREE, true,
-                                       GSI_SAME_STMT);
+      sizet = force_gimple_operand (sizet, &seq, false, NULL_TREE);
       /* If the call above didn't end up being an integer constant, go one
         statement back and get the __builtin_object_size stmt.  Save it,
         we might need it later.  */
       if (SSA_VAR_P (sizet))
-       {
-         gsi_prev (gsi);
-         bos_stmt = gsi_stmt (*gsi);
-
-         /* Move on to where we were.  */
-         gsi_next (gsi);
-       }
+       bos_stmt = gsi_stmt (gsi_last (seq));
     }
   else
     return;
@@ -2298,21 +2324,24 @@ instrument_object_size (gimple_stmt_iter
       && !TREE_ADDRESSABLE (base))
     mark_addressable (base);
 
+  /* We have to emit the check.  */
+  gimple_seq this_seq;
+  t = force_gimple_operand (t, &this_seq, true, NULL_TREE);
+  gimple_seq_add_seq_without_update (&seq, this_seq);
+  ptr = force_gimple_operand (ptr, &this_seq, true, NULL_TREE);
+  gimple_seq_add_seq_without_update (&seq, this_seq);
+  ubsan_insert_seq_before (gsi, seq);
+
   if (bos_stmt
       && gimple_call_builtin_p (bos_stmt, BUILT_IN_DYNAMIC_OBJECT_SIZE))
     ubsan_create_edge (bos_stmt);
 
-  /* We have to emit the check.  */
-  t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, true,
-                               GSI_SAME_STMT);
-  ptr = force_gimple_operand_gsi (gsi, ptr, true, NULL_TREE, true,
-                                 GSI_SAME_STMT);
   tree ckind = build_int_cst (unsigned_char_type_node,
                              is_lhs ? UBSAN_STORE_OF : UBSAN_LOAD_OF);
   gimple *g = gimple_build_call_internal (IFN_UBSAN_OBJECT_SIZE, 4,
                                         ptr, t, sizet, ckind);
   gimple_set_location (g, loc);
-  gsi_insert_before (gsi, g, GSI_SAME_STMT);
+  ubsan_insert_before (gsi, g);
 }
 
 /* Instrument values passed to builtin functions.  */
--- gcc/testsuite/gcc.dg/ubsan/pr112709-1.c.jj  2024-03-11 16:55:09.038760951 
+0100
+++ gcc/testsuite/gcc.dg/ubsan/pr112709-1.c     2024-03-11 16:55:02.159854950 
+0100
@@ -0,0 +1,52 @@
+/* PR sanitizer/112709 */
+/* { dg-do compile } */
+/* { dg-options "-fsanitize=undefined -O2" } */
+
+struct S { char c[1024]; };
+int foo (int);
+
+__attribute__((returns_twice, noipa)) struct S
+bar (int x)
+{
+  (void) x;
+  struct S s = {};
+  s.c[42] = 42;
+  return s;
+}
+
+void
+baz (struct S *p)
+{
+  foo (1);
+  *p = bar (0);
+}
+
+void
+qux (int x, struct S *p)
+{
+  if (x == 25)
+    x = foo (2);
+  else if (x == 42)
+    x = foo (foo (3));
+  *p = bar (x);
+}
+
+void
+corge (int x, struct S *p)
+{
+  void *q[] = { &&l1, &&l2, &&l3, &&l3 };
+  if (x == 25)
+    {
+    l1:
+      x = foo (2);
+    }
+  else if (x == 42)
+    {
+    l2:
+      x = foo (foo (3));
+    }
+l3:
+  *p = bar (x);
+  if (x < 4)
+    goto *q[x & 3];
+}
--- gcc/testsuite/gcc.dg/ubsan/pr112709-2.c.jj  2024-03-11 16:55:37.000378840 
+0100
+++ gcc/testsuite/gcc.dg/ubsan/pr112709-2.c     2024-03-11 17:13:37.517599492 
+0100
@@ -0,0 +1,50 @@
+/* PR sanitizer/112709 */
+/* { dg-do compile } */
+/* { dg-options "-fsanitize=undefined -O2" } */
+
+struct S { char c[1024]; } *p;
+int foo (int);
+
+__attribute__((returns_twice, noipa)) int
+bar (struct S x)
+{
+  (void) x.c[0];
+  return 0;
+}
+
+void
+baz (int *y)
+{
+  foo (1);
+  *y = bar (*p);
+}
+
+void
+qux (int x, int *y)
+{
+  if (x == 25)
+    x = foo (2);
+  else if (x == 42)
+    x = foo (foo (3));
+  *y = bar (*p);
+}
+
+void
+corge (int x, int *y)
+{
+  void *q[] = { &&l1, &&l2, &&l3, &&l3 };
+  if (x == 25)
+    {
+    l1:
+      x = foo (2);
+    }
+  else if (x == 42)
+    {
+    l2:
+      x = foo (foo (3));
+    }
+l3:
+  *y = bar (*p);
+  if (x < 4)
+    goto *q[x & 3];
+}

        Jakub

Reply via email to