On Fri, Aug 28, 2020 at 08:47:06AM +0200, Richard Biener via Gcc-patches wrote:
> IIRC elsewhere it was discussed to use ld to perform merging by
> emitting separate rodata sections for constant sizes (4, 8, 16, 32, 64
> byte sizes).

ld does that already, and gcc too.

> ld could always direct 8 byte constant refs to the larger pools (sub-)entry.

But there is no way to express in ELF that something like that would be
acceptable.

I meant something like the following, which on e.g. a dumb:

typedef float V __attribute__((vector_size (4 * sizeof (float))));

void
foo (V *p, float *q)
{
  p[0] += (V) { 1.0f, 2.0f, 3.0f, 4.0f };
  q[0] += 4.0f;
  q[1] -= 3.0f;
  q[17] -= 2.0f;
  q[31] += 1.0f;
}

testcase merges all the 4 scalar constant pool entries into the CONST_VECTOR
one.

I'm punting for section anchors and not doing it in the per-function (i.e.
non-shared) constant pools simply because I don't know them well enough,
don't know whether backends use the offsets for something etc.
For section anchors, I guess it would need to be done before (re)computing the
offsets and arrange for the desc->mark < 0 entries not to be considered as
objects in the object block, for non-shared pools, perhaps it would be
enough to call the new function from output_constant_pool before calling
recompute_pool_offsets and adjust recompute_pool_offsets to ignore
desc->mark < 0.

2020-08-28  Jakub Jelinek  <ja...@redhat.com>

        * varasm.c (output_constant_pool_contents): Emit desc->mark < 0
        entries as aliases.
        (optimize_constant_pool): New function.
        (output_shared_constant_pool): Call it if TARGET_SUPPORTS_ALIASES.

--- gcc/varasm.c.jj     2020-07-28 15:39:10.091755086 +0200
+++ gcc/varasm.c        2020-08-28 10:38:10.207636849 +0200
@@ -4198,7 +4198,27 @@ output_constant_pool_contents (struct rt
   class constant_descriptor_rtx *desc;
 
   for (desc = pool->first; desc ; desc = desc->next)
-    if (desc->mark)
+    if (desc->mark < 0)
+      {
+#ifdef ASM_OUTPUT_DEF
+        const char *name = targetm.strip_name_encoding (XSTR (desc->sym, 0));
+        char label[256];
+        char buffer[256 + 32];
+        const char *p;
+
+        ASM_GENERATE_INTERNAL_LABEL (label, "LC", ~desc->mark);
+       p = targetm.strip_name_encoding (label);
+       if (desc->offset)
+         {
+           sprintf (buffer, "%s+%ld", p, (long) (desc->offset));
+           p = buffer;
+         }
+       ASM_OUTPUT_DEF (asm_out_file, name, p);
+#else
+       gcc_unreachable ();
+#endif
+      }
+    else if (desc->mark)
       {
        /* If the constant is part of an object_block, make sure that
           the constant has been positioned within its block, but do not
@@ -4216,6 +4236,52 @@ output_constant_pool_contents (struct rt
       }
 }
 
+/* Attempt to optimize constant pool POOL.  If it contains both CONST_VECTOR
+   constants and scalar constants with the values of CONST_VECTOR elements,
+   try to alias the scalar constants with the CONST_VECTOR elements.  */
+
+static void
+optimize_constant_pool (struct rtx_constant_pool *pool)
+{
+  for (constant_descriptor_rtx *desc = pool->first; desc; desc = desc->next)
+    if (desc->mark > 0
+       && GET_CODE (desc->constant) == CONST_VECTOR
+       && VECTOR_MODE_P (desc->mode)
+       && GET_MODE_CLASS (desc->mode) != MODE_VECTOR_BOOL
+       && !(SYMBOL_REF_HAS_BLOCK_INFO_P (desc->sym)
+            && SYMBOL_REF_BLOCK (desc->sym))
+       && desc->labelno >= 0)
+      {
+       scalar_mode submode = GET_MODE_INNER (desc->mode);
+       unsigned int subalign = MIN (desc->align, GET_MODE_BITSIZE (submode));
+       int units = GET_MODE_NUNITS (desc->mode);
+
+       for (int i = 0; i < units; i++)
+         {
+           if (i != 0
+               && rtx_equal_p (CONST_VECTOR_ELT (desc->constant, i),
+                               CONST_VECTOR_ELT (desc->constant, i - 1)))
+             continue;
+
+           constant_descriptor_rtx tmp;
+           tmp.constant = CONST_VECTOR_ELT (desc->constant, i);
+           tmp.mode = submode;
+           hashval_t hash = const_rtx_hash (tmp.constant);
+           constant_descriptor_rtx *eldesc
+             = pool->const_rtx_htab->find_with_hash (&tmp, hash);
+           if (eldesc
+               && eldesc->mark > 0
+               && eldesc->align <= subalign
+               && !(SYMBOL_REF_HAS_BLOCK_INFO_P (eldesc->sym)
+                    && SYMBOL_REF_BLOCK (eldesc->sym)))
+             {
+               eldesc->mark = ~desc->labelno;
+               eldesc->offset = i * GET_MODE_SIZE (submode);
+             }
+         }
+      }
+}
+
 /* Mark all constants that are used in the current function, then write
    out the function's private constant pool.  */
 
@@ -4251,6 +4317,9 @@ output_constant_pool (const char *fnname
 void
 output_shared_constant_pool (void)
 {
+  if (TARGET_SUPPORTS_ALIASES)
+    optimize_constant_pool (shared_constant_pool);
+
   output_constant_pool_contents (shared_constant_pool);
 }
 


        Jakub

Reply via email to