https://gcc.gnu.org/g:ebf9b1becc8cf76421f1741ac8084d139abd49db

commit ebf9b1becc8cf76421f1741ac8084d139abd49db
Author: Alexandre Oliva <ol...@adacore.com>
Date:   Wed Aug 14 21:59:28 2024 -0300

    Optimize initialization of small padded objects
    
    When small objects containing padding bits (or bytes) are fully
    initialized, we will often store them in registers, and setting
    bitfields and other small fields will attempt to preserve the
    uninitialized padding bits, which tends to be expensive.
    Zero-initializing registers, OTOH, tends to be cheap.
    
    So, if we're optimizing, zero-initialize such small padded objects
    even if that's not needed for correctness.  We can't zero-initialize
    all such padding objects, though: if there's no padding whatsoever,
    and all fields are initialized with nonzero, the zero initialization
    would be flagged as dead.  That's why we introduce machinery to detect
    whether objects have padding bits.  I considered distinguishing
    between bitfields, units and larger padding elements, but I didn't
    pursue that distinction.
    
    Since the object's zero-initialization subsumes fields'
    zero-initialization, the empty string test in builtin-snprintf-6.c's
    test_assign_aggregate would regress without the addition of
    native_encode_constructor.
    
    
    for  gcc/ChangeLog
    
            * expr.cc (categorize_ctor_elements_1): Change p_complete to
            int, to distinguish complete initialization in presence or
            absence of uninitialized padding bits.
            (categorize_ctor_elements): Likewise.  Adjust all callers...
            * expr.h (categorize_ctor_elements): ... and declaration.
            (type_has_padding_at_level_p): New.
            * gimple-fold.cc (type_has_padding_at_level_p): New.
            * fold-const.cc (native_encode_constructor): New.
            (native_encode_expr): Call it.
            * gimplify.cc (gimplify_init_constructor): Clear small
            non-addressable non-volatile objects with padding or
            other uninitialized fields as an optimization.

Diff:
---
 gcc/expr.cc        | 20 +++++++++++++-------
 gcc/expr.h         |  3 ++-
 gcc/fold-const.cc  | 33 +++++++++++++++++++++++++++++++++
 gcc/gimple-fold.cc | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 gcc/gimplify.cc    | 14 +++++++++++++-
 5 files changed, 111 insertions(+), 9 deletions(-)

diff --git a/gcc/expr.cc b/gcc/expr.cc
index 2089c2b86a9..a701c67b348 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -7096,7 +7096,7 @@ count_type_elements (const_tree type, bool for_ctor_p)
 static bool
 categorize_ctor_elements_1 (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
                            HOST_WIDE_INT *p_unique_nz_elts,
-                           HOST_WIDE_INT *p_init_elts, bool *p_complete)
+                           HOST_WIDE_INT *p_init_elts, int *p_complete)
 {
   unsigned HOST_WIDE_INT idx;
   HOST_WIDE_INT nz_elts, unique_nz_elts, init_elts, num_fields;
@@ -7218,7 +7218,10 @@ categorize_ctor_elements_1 (const_tree ctor, 
HOST_WIDE_INT *p_nz_elts,
 
   if (*p_complete && !complete_ctor_at_level_p (TREE_TYPE (ctor),
                                                num_fields, elt_type))
-    *p_complete = false;
+    *p_complete = 0;
+  else if (*p_complete > 0
+          && type_has_padding_at_level_p (TREE_TYPE (ctor)))
+    *p_complete = -1;
 
   *p_nz_elts += nz_elts;
   *p_unique_nz_elts += unique_nz_elts;
@@ -7239,7 +7242,10 @@ categorize_ctor_elements_1 (const_tree ctor, 
HOST_WIDE_INT *p_nz_elts,
      and place it in *P_ELT_COUNT.
    * whether the constructor is complete -- in the sense that every
      meaningful byte is explicitly given a value --
-     and place it in *P_COMPLETE.
+     and place it in *P_COMPLETE:
+     -  0 if any field is missing
+     -  1 if all fields are initialized, and there's no padding
+     - -1 if all fields are initialized, but there's padding
 
    Return whether or not CTOR is a valid static constant initializer, the same
    as "initializer_constant_valid_p (CTOR, TREE_TYPE (CTOR)) != 0".  */
@@ -7247,12 +7253,12 @@ categorize_ctor_elements_1 (const_tree ctor, 
HOST_WIDE_INT *p_nz_elts,
 bool
 categorize_ctor_elements (const_tree ctor, HOST_WIDE_INT *p_nz_elts,
                          HOST_WIDE_INT *p_unique_nz_elts,
-                         HOST_WIDE_INT *p_init_elts, bool *p_complete)
+                         HOST_WIDE_INT *p_init_elts, int *p_complete)
 {
   *p_nz_elts = 0;
   *p_unique_nz_elts = 0;
   *p_init_elts = 0;
-  *p_complete = true;
+  *p_complete = 1;
 
   return categorize_ctor_elements_1 (ctor, p_nz_elts, p_unique_nz_elts,
                                     p_init_elts, p_complete);
@@ -7313,7 +7319,7 @@ mostly_zeros_p (const_tree exp)
   if (TREE_CODE (exp) == CONSTRUCTOR)
     {
       HOST_WIDE_INT nz_elts, unz_elts, init_elts;
-      bool complete_p;
+      int complete_p;
 
       categorize_ctor_elements (exp, &nz_elts, &unz_elts, &init_elts,
                                &complete_p);
@@ -7331,7 +7337,7 @@ all_zeros_p (const_tree exp)
   if (TREE_CODE (exp) == CONSTRUCTOR)
     {
       HOST_WIDE_INT nz_elts, unz_elts, init_elts;
-      bool complete_p;
+      int complete_p;
 
       categorize_ctor_elements (exp, &nz_elts, &unz_elts, &init_elts,
                                &complete_p);
diff --git a/gcc/expr.h b/gcc/expr.h
index 533ae0af387..04782b15f19 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -361,7 +361,8 @@ extern unsigned HOST_WIDE_INT highest_pow2_factor 
(const_tree);
 
 extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *,
                                      HOST_WIDE_INT *, HOST_WIDE_INT *,
-                                     bool *);
+                                     int *);
+extern bool type_has_padding_at_level_p (tree);
 extern bool immediate_const_ctor_p (const_tree, unsigned int words = 1);
 extern void store_constructor (tree, rtx, int, poly_int64, bool);
 extern HOST_WIDE_INT int_expr_size (const_tree exp);
diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 8908e7381e7..ff2c8f35303 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -8193,6 +8193,36 @@ native_encode_string (const_tree expr, unsigned char 
*ptr, int len, int off)
   return len;
 }
 
+/* Subroutine of native_encode_expr.  Encode the CONSTRUCTOR
+   specified by EXPR into the buffer PTR of length LEN bytes.
+   Return the number of bytes placed in the buffer, or zero
+   upon failure.  */
+
+static int
+native_encode_constructor (const_tree expr, unsigned char *ptr, int len, int 
off)
+{
+  /* We are only concerned with zero-initialization constructors here.  */
+  if (CONSTRUCTOR_NELTS (expr))
+    return 0;
+
+  /* Wide-char strings are encoded in target byte-order so native
+     encoding them is trivial.  */
+  if (BITS_PER_UNIT != CHAR_BIT
+      || !tree_fits_shwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))))
+    return 0;
+
+  HOST_WIDE_INT total_bytes = tree_to_shwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
+  if ((off == -1 && total_bytes > len) || off >= total_bytes)
+    return 0;
+  if (off == -1)
+    off = 0;
+  len = MIN (total_bytes - off, len);
+  if (ptr == NULL)
+    /* Dry run.  */;
+  else
+    memset (ptr, 0, len);
+  return len;
+}
 
 /* Subroutine of fold_view_convert_expr.  Encode the INTEGER_CST, REAL_CST,
    FIXED_CST, COMPLEX_CST, STRING_CST, or VECTOR_CST specified by EXPR into
@@ -8229,6 +8259,9 @@ native_encode_expr (const_tree expr, unsigned char *ptr, 
int len, int off)
     case STRING_CST:
       return native_encode_string (expr, ptr, len, off);
 
+    case CONSTRUCTOR:
+      return native_encode_constructor (expr, ptr, len, off);
+
     default:
       return 0;
     }
diff --git a/gcc/gimple-fold.cc b/gcc/gimple-fold.cc
index 18d7a6b176d..f955a006ebe 100644
--- a/gcc/gimple-fold.cc
+++ b/gcc/gimple-fold.cc
@@ -4661,6 +4661,56 @@ clear_padding_type_may_have_padding_p (tree type)
     }
 }
 
+/* Return true if TYPE has padding bits aside from those in fields,
+   elements, etc.  */
+
+bool
+type_has_padding_at_level_p (tree type)
+{
+  switch (TREE_CODE (type))
+    {
+    case RECORD_TYPE:
+      {
+       tree bitpos = size_zero_node;
+       /* Expect fields to be sorted by bit position.  */
+       for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
+         if (TREE_CODE (f) == FIELD_DECL)
+           {
+             if (DECL_PADDING_P (f))
+               return true;
+             tree pos = bit_position (f);
+             if (simple_cst_equal (bitpos, pos) != 1)
+               return true;
+             if (!DECL_SIZE (f))
+               return true;
+             bitpos = int_const_binop (PLUS_EXPR, pos, DECL_SIZE (f));
+           }
+       if (simple_cst_equal (bitpos, TYPE_SIZE (type)) != 1)
+         return true;
+       return false;
+      }
+    case UNION_TYPE:
+      /* If any of the fields is smaller than the whole, there is padding.  */
+      for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
+       if (TREE_CODE (f) == FIELD_DECL)
+         if (simple_cst_equal (TYPE_SIZE (TREE_TYPE (f)),
+                               TREE_TYPE (type)) != 1)
+           return true;
+      return false;
+    case ARRAY_TYPE:
+    case COMPLEX_TYPE:
+    case VECTOR_TYPE:
+      /* No recursing here, no padding at this level.  */
+      return false;
+    case REAL_TYPE:
+      return clear_padding_real_needs_padding_p (type);
+    case BITINT_TYPE:
+      return clear_padding_bitint_needs_padding_p (type);
+    default:
+      return false;
+    }
+}
+
 /* Emit a runtime loop:
    for (; buf.base != end; buf.base += sz)
      __builtin_clear_padding (buf.base);  */
diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index 26a216e151d..354a6f53a72 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -5564,7 +5564,8 @@ gimplify_init_constructor (tree *expr_p, gimple_seq 
*pre_p, gimple_seq *post_p,
        struct gimplify_init_ctor_preeval_data preeval_data;
        HOST_WIDE_INT num_ctor_elements, num_nonzero_elements;
        HOST_WIDE_INT num_unique_nonzero_elements;
-       bool complete_p, valid_const_initializer;
+       int complete_p;
+       bool valid_const_initializer;
 
        /* Aggregate types must lower constructors to initialization of
           individual elements.  The exception is that a CONSTRUCTOR node
@@ -5668,6 +5669,17 @@ gimplify_init_constructor (tree *expr_p, gimple_seq 
*pre_p, gimple_seq *post_p,
          /* If a single access to the target must be ensured and all elements
             are zero, then it's optimal to clear whatever their number.  */
          cleared = true;
+       /* If the object is small enough to go in registers, and it's
+          not required to be constructed in memory, clear it first.
+          That will avoid wasting cycles preserving any padding bits
+          that might be there, and if there aren't any, the compiler
+          is smart enough to optimize the clearing out.  */
+       else if (complete_p <= 0
+                && !TREE_ADDRESSABLE (ctor) && !TREE_THIS_VOLATILE (object)
+                && (TYPE_MODE (type) != BLKmode || TYPE_NO_FORCE_BLK (type))
+                && (opt_for_fn (cfun->decl, optimize)
+                    || opt_for_fn (cfun->decl, optimize_size)))
+         cleared = true;
        else
          cleared = false;

Reply via email to