On Fri, Aug 28, 2020 at 06:25:46PM +0200, Jakub Jelinek via Gcc-patches wrote:
> You're right, thanks for spotting it, I've missed native_encode_rtx will do
> quick_push rather than safe_push.
> 
> Updated patch below, it shouldn't be needed in the second loop, because
> the first loop should already grow it to the largest size.

Testing beyond a bug in i386.md revealed also that I've lost a cast to long
to avoid breaking 32-bit bootstrap.

This is the version that passed bootstrap/regtest on both x86_64-linux and
i686-linux.  In both bootstraps/regtests together, it saved (from the
statistics I've gathered) 63104 .rodata bytes (before constant merging),
in 6814 hits of the data->desc->mark = ~(*slot)->desc->labelno;.

Ok for trunk?

2020-08-30  Jakub Jelinek  <ja...@redhat.com>

        PR middle-end/54201
        * varasm.c: Include alloc-pool.h.
        (output_constant_pool_contents): Emit desc->mark < 0 entries as
        aliases.
        (struct constant_descriptor_rtx_data): New type.
        (constant_descriptor_rtx_data_cmp): New function.
        (struct const_rtx_data_hasher): New type.
        (const_rtx_data_hasher::hash, const_rtx_data_hasher::equal): New
        methods.
        (optimize_constant_pool): New function.
        (output_shared_constant_pool): Call it if TARGET_SUPPORTS_ALIASES.

--- gcc/varasm.c.jj     2020-07-28 15:39:10.091755086 +0200
+++ gcc/varasm.c        2020-08-28 18:21:58.943759578 +0200
@@ -57,6 +57,7 @@ along with GCC; see the file COPYING3.
 #include "asan.h"
 #include "rtl-iter.h"
 #include "file-prefix-map.h" /* remap_debug_filename()  */
+#include "alloc-pool.h"
 
 #ifdef XCOFF_DEBUGGING_INFO
 #include "xcoffout.h"          /* Needed for external data declarations.  */
@@ -4198,7 +4199,27 @@ output_constant_pool_contents (struct rt
   class constant_descriptor_rtx *desc;
 
   for (desc = pool->first; desc ; desc = desc->next)
-    if (desc->mark)
+    if (desc->mark < 0)
+      {
+#ifdef ASM_OUTPUT_DEF
+       const char *name = targetm.strip_name_encoding (XSTR (desc->sym, 0));
+       char label[256];
+       char buffer[256 + 32];
+       const char *p;
+
+       ASM_GENERATE_INTERNAL_LABEL (label, "LC", ~desc->mark);
+       p = targetm.strip_name_encoding (label);
+       if (desc->offset)
+         {
+           sprintf (buffer, "%s+%ld", p, (long) (desc->offset));
+           p = buffer;
+         }
+       ASM_OUTPUT_DEF (asm_out_file, name, p);
+#else
+       gcc_unreachable ();
+#endif
+      }
+    else if (desc->mark)
       {
        /* If the constant is part of an object_block, make sure that
           the constant has been positioned within its block, but do not
@@ -4216,6 +4237,160 @@ output_constant_pool_contents (struct rt
       }
 }
 
+struct constant_descriptor_rtx_data {
+  constant_descriptor_rtx *desc;
+  target_unit *bytes;
+  unsigned short size;
+  unsigned short offset;
+  unsigned int hash;
+};
+
+/* qsort callback to sort constant_descriptor_rtx_data * vector by
+   decreasing size.  */
+
+static int
+constant_descriptor_rtx_data_cmp (const void *p1, const void *p2)
+{
+  constant_descriptor_rtx_data *const data1
+    = *(constant_descriptor_rtx_data * const *) p1;
+  constant_descriptor_rtx_data *const data2
+    = *(constant_descriptor_rtx_data * const *) p2;
+  if (data1->size > data2->size)
+    return -1;
+  if (data1->size < data2->size)
+    return 1;
+  if (data1->hash < data2->hash)
+    return -1;
+  gcc_assert (data1->hash > data2->hash);
+  return 1;
+}
+
+struct const_rtx_data_hasher : nofree_ptr_hash<constant_descriptor_rtx_data>
+{
+  static hashval_t hash (constant_descriptor_rtx_data *);
+  static bool equal (constant_descriptor_rtx_data *,
+                    constant_descriptor_rtx_data *);
+};
+
+/* Hash and compare functions for const_rtx_data_htab.  */
+
+hashval_t
+const_rtx_data_hasher::hash (constant_descriptor_rtx_data *data)
+{
+  return data->hash;
+}
+
+bool
+const_rtx_data_hasher::equal (constant_descriptor_rtx_data *x,
+                             constant_descriptor_rtx_data *y)
+{
+  if (x->hash != y->hash || x->size != y->size)
+    return 0;
+  unsigned int align1 = x->desc->align;
+  unsigned int align2 = y->desc->align;
+  unsigned int offset1 = (x->offset * BITS_PER_UNIT) & (align1 - 1);
+  unsigned int offset2 = (y->offset * BITS_PER_UNIT) & (align2 - 1);
+  if (offset1)
+    align1 = least_bit_hwi (offset1);
+  if (offset2)
+    align2 = least_bit_hwi (offset2);
+  if (align2 > align1)
+    return 0;
+  if (memcmp (x->bytes, y->bytes, x->size * sizeof (target_unit)) != 0)
+    return 0;
+  return 1;
+}
+
+/* Attempt to optimize constant pool POOL.  If it contains both CONST_VECTOR
+   constants and scalar constants with the values of CONST_VECTOR elements,
+   try to alias the scalar constants with the CONST_VECTOR elements.  */
+
+static void
+optimize_constant_pool (struct rtx_constant_pool *pool)
+{
+  auto_vec<target_unit, 128> buffer;
+  auto_vec<constant_descriptor_rtx_data *, 128> vec;
+  object_allocator<constant_descriptor_rtx_data>
+    data_pool ("constant_descriptor_rtx_data_pool");
+  int idx = 0;
+  size_t size = 0;
+  for (constant_descriptor_rtx *desc = pool->first; desc; desc = desc->next)
+    if (desc->mark > 0
+       && ! (SYMBOL_REF_HAS_BLOCK_INFO_P (desc->sym)
+             && SYMBOL_REF_BLOCK (desc->sym)))
+      {
+       buffer.truncate (0);
+       buffer.reserve (GET_MODE_SIZE (desc->mode));
+       if (native_encode_rtx (desc->mode, desc->constant, buffer, 0,
+                              GET_MODE_SIZE (desc->mode)))
+         {
+           constant_descriptor_rtx_data *data = data_pool.allocate ();
+           data->desc = desc;
+           data->bytes = NULL;
+           data->size = GET_MODE_SIZE (desc->mode);
+           data->offset = 0;
+           data->hash = idx++;
+           size += data->size;
+           vec.safe_push (data);
+         }
+      }
+  if (idx)
+    {
+      vec.qsort (constant_descriptor_rtx_data_cmp);
+      unsigned min_size = vec.last ()->size;
+      target_unit *bytes = XNEWVEC (target_unit, size);
+      unsigned int i;
+      constant_descriptor_rtx_data *data;
+      hash_table<const_rtx_data_hasher> * htab
+       = new hash_table<const_rtx_data_hasher> (31);
+      size = 0;
+      FOR_EACH_VEC_ELT (vec, i, data)
+       {
+         buffer.truncate (0);
+         native_encode_rtx (data->desc->mode, data->desc->constant,
+                            buffer, 0, data->size);
+         memcpy (bytes + size, buffer.address (), data->size);
+         data->bytes = bytes + size;
+         data->hash = iterative_hash (data->bytes,
+                                      data->size * sizeof (target_unit), 0);
+         size += data->size;
+         constant_descriptor_rtx_data **slot
+           = htab->find_slot_with_hash (data, data->hash, INSERT);
+         if (*slot)
+           {
+             data->desc->mark = ~(*slot)->desc->labelno;
+             data->desc->offset = (*slot)->offset;
+           }
+         else
+           {
+             unsigned int sz = 1 << floor_log2 (data->size);
+
+             *slot = data;
+             for (sz >>= 1; sz >= min_size; sz >>= 1)
+               for (unsigned off = 0; off + sz <= data->size; off += sz)
+                 {
+                   constant_descriptor_rtx_data tmp;
+                   tmp.desc = data->desc;
+                   tmp.bytes = data->bytes + off;
+                   tmp.size = sz;
+                   tmp.offset = off;
+                   tmp.hash = iterative_hash (tmp.bytes,
+                                              sz * sizeof (target_unit), 0);
+                   slot = htab->find_slot_with_hash (&tmp, tmp.hash, INSERT);
+                   if (*slot == NULL)
+                     {
+                       *slot = data_pool.allocate ();
+                       **slot = tmp;
+                     }
+                 }
+           }
+       }
+      delete htab;
+      XDELETE (bytes);
+    }
+  data_pool.release ();
+}
+
 /* Mark all constants that are used in the current function, then write
    out the function's private constant pool.  */
 
@@ -4251,6 +4426,10 @@ output_constant_pool (const char *fnname
 void
 output_shared_constant_pool (void)
 {
+  if (optimize
+      && TARGET_SUPPORTS_ALIASES)
+    optimize_constant_pool (shared_constant_pool);
+
   output_constant_pool_contents (shared_constant_pool);
 }
 


        Jakub

Reply via email to