Aside from the hashing, and the splitting of insn generation from
recipe creation, there's no algorithmic change.

Cc: Marcus Shawcroft <marcus.shawcr...@arm.com>
Cc: Richard Earnshaw <richard.earns...@arm.com>
---
        * config/aarch64/aarch64.c: Include genimm-hash.h
        (aa_gi_code): New enum.
        (genimm_aa64): New class.
        (genimm_aa64::genimm_aa64): New.
        (genimm_aa64::set0, genimm_aa64::opN, genimm_aa64::insN): New.
        (genimm_aa64::exam_simple): New.
        (genimm_aa64::exam_plus): New.
        (genimm_aa64::generate): New.
        (genimm_aa64::exam_full): Extract from the body of the
        old aarch64_internal_mov_immediate.
        (aarch64_internal_mov_immediate): Rewrite using genimm_hash.
---
 gcc/config/aarch64/aarch64.c | 446 +++++++++++++++++++++++++------------------
 1 file changed, 256 insertions(+), 190 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 1394ed7..6b12a07 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -76,6 +76,7 @@
 #include "sched-int.h"
 #include "cortex-a57-fma-steering.h"
 #include "target-globals.h"
+#include "genimm-hash.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -1317,54 +1318,144 @@ aarch64_add_offset (machine_mode mode, rtx temp, rtx 
reg, HOST_WIDE_INT offset)
   return plus_constant (mode, reg, offset);
 }
 
-static int
-aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
-                               machine_mode mode)
+namespace {
+
+/* In order to simplify the below, make sure none of the
+   given rtx codes are in {0,16,32,48}.  */
+STATIC_ASSERT (((int)PLUS & ~48) != 0);
+STATIC_ASSERT (((int)IOR & ~48) != 0);
+STATIC_ASSERT (((int)AND & ~48) != 0);
+
+enum aa_gi_code
 {
-  unsigned HOST_WIDE_INT mask;
-  int i;
-  bool first;
-  unsigned HOST_WIDE_INT val;
-  bool subtargets;
-  rtx subtarget;
-  int one_match, zero_match, first_not_ffff_match;
-  int num_insns = 0;
+  AA_GI_NIL = -2,
+  AA_GI_SET = -1,
+
+  AA_GI_INS0 = 0,
+  AA_GI_INS1 = 16,
+  AA_GI_INS2 = 32,
+  AA_GI_INS3 = 48,
+
+  AA_GI_PLUS = PLUS,
+  AA_GI_IOR = IOR,
+  AA_GI_AND = AND
+};
+
+struct genimm_aa64 : genimm_base<aa_gi_code, 4>
+{
+  static const int max_simple = 2;
+
+  static rtx_code aa_gi_binop(aa_gi_code c)
+  {
+    return (c == AA_GI_PLUS || c == AA_GI_IOR || c == AA_GI_AND
+           ? (rtx_code)c : UNKNOWN);
+  }
+
+  genimm_aa64 (HOST_WIDE_INT c);
+
+  void set0 (HOST_WIDE_INT v);
+  void opN (aa_gi_code o, HOST_WIDE_INT v);
+  void insN (int b, unsigned HOST_WIDE_INT v);
+
+  /* The search algorithm that we use for aarch64 is non-recursive.
+     Thus we do not require the iteration provided by genimm_hash.
+     Produce an empty loop and go straight to exam_full.  */
+  bool exam_search (unsigned HOST_WIDE_INT, int) { return false; }
+
+  bool exam_simple (HOST_WIDE_INT val, machine_mode mode, int);
+  bool exam_plus (unsigned HOST_WIDE_INT val, unsigned HOST_WIDE_INT base);
+  void exam_full (unsigned HOST_WIDE_INT val);
+  void generate (rtx dest, machine_mode mode) const;
+};
 
-  if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
+genimm_aa64::genimm_aa64 (HOST_WIDE_INT c)
+  : genimm_base (c)
+{
+#ifdef ENABLE_CHECKING
+  code[0] = code[1] = code[2] = code[3] = AA_GI_NIL;
+  op[0] = op[1] = op[2] = op[3] = 0;
+#endif
+}
+
+void
+genimm_aa64::set0 (HOST_WIDE_INT v)
+{
+  cost = 1;
+  code[0] = AA_GI_SET;
+  op[0] = v;
+}
+
+void
+genimm_aa64::opN (aa_gi_code c, HOST_WIDE_INT v)
+{
+  int n = cost++;
+  gcc_checking_assert (n > 0 && n < max_cost);
+  code[n] = c;
+  op[n] = v;
+}
+
+void
+genimm_aa64::insN (int b, unsigned HOST_WIDE_INT v)
+{
+  int n = cost++;
+  gcc_checking_assert (n > 0 && n < max_cost);
+  gcc_checking_assert ((b & ~48) == 0);
+  code[n] = (aa_gi_code)b;
+  op[n] = (v >> b) & 0xffff;
+}
+
+/* Look for simple constants that aren't worth hashing.  */
+
+bool
+genimm_aa64::exam_simple (HOST_WIDE_INT val, machine_mode mode, int)
+{
+  if (aarch64_move_imm (val, mode))
     {
-      if (generate)
-       emit_insn (gen_rtx_SET (dest, imm));
-      num_insns++;
-      return num_insns;
+      set0 (val);
+      return true;
     }
-
   if (mode == SImode)
     {
       /* We know we can't do this in 1 insn, and we must be able to do it
         in two; so don't mess around looking for sequences that don't buy
         us anything.  */
-      if (generate)
-       {
-         emit_insn (gen_rtx_SET (dest, GEN_INT (INTVAL (imm) & 0xffff)));
-         emit_insn (gen_insv_immsi (dest, GEN_INT (16),
-                                    GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
-       }
-      num_insns += 2;
-      return num_insns;
+      set0 (val & 0xffff);
+      insN (16, val);
+      return true;
     }
+  return false;
+}
 
-  /* Remaining cases are all for DImode.  */
+/* A subroutine of genimm_aa64::exam_full.  If VAL can be created from BASE
+   via the addition of a constant, construct the recipe as appropriate and
+   return true.  Otherwise return false.  */
 
-  val = INTVAL (imm);
-  subtargets = optimize && can_create_pseudo_p ();
+bool
+genimm_aa64::exam_plus (unsigned HOST_WIDE_INT val, unsigned HOST_WIDE_INT 
base)
+{
+  HOST_WIDE_INT diff = val - base;
+  if (aarch64_uimm12_shift (diff < 0 ? -diff : diff))
+    {
+      set0 (base);
+      opN (AA_GI_PLUS, diff);
+      return true;
+    }
+  return false;
+}
 
-  one_match = 0;
-  zero_match = 0;
-  mask = 0xffff;
-  first_not_ffff_match = -1;
+/* Examine the DImode quantity VAL, and store a recipe for its creation.  */
 
-  for (i = 0; i < 64; i += 16, mask <<= 16)
+void
+genimm_aa64::exam_full (unsigned HOST_WIDE_INT val)
+{
+  unsigned HOST_WIDE_INT mask;
+  int one_match = 0;
+  int zero_match = 0;
+  int first_not_ffff_match = -1;
+
+  for (int i = 0; i < 64; i += 16)
     {
+      mask = HOST_WIDE_INT_UC (0xffff) << i;
       if ((val & mask) == mask)
        one_match++;
       else
@@ -1379,211 +1470,186 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, 
bool generate,
   if (one_match == 2)
     {
       /* Set one of the quarters and then insert back into result.  */
-      mask = 0xffffll << first_not_ffff_match;
-      if (generate)
-       {
-         emit_insn (gen_rtx_SET (dest, GEN_INT (val | mask)));
-         emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
-                                    GEN_INT ((val >> first_not_ffff_match)
-                                             & 0xffff)));
-       }
-      num_insns += 2;
-      return num_insns;
+      mask = HOST_WIDE_INT_UC (0xffff) << first_not_ffff_match;
+      set0 (val | mask);
+      insN (first_not_ffff_match, val);
+      return;
     }
 
   if (zero_match == 2)
     goto simple_sequence;
 
-  mask = 0x0ffff0000UL;
-  for (i = 16; i < 64; i += 16, mask <<= 16)
+  for (int i = 16; i < 64; i += 16, mask <<= 16)
     {
-      HOST_WIDE_INT comp = mask & ~(mask - 1);
+      unsigned HOST_WIDE_INT comp = HOST_WIDE_INT_1U << i;
+      mask = HOST_WIDE_INT_UC (0xffff) << i;
 
-      if (aarch64_uimm12_shift (val - (val & mask)))
-       {
-         if (generate)
-           {
-             subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-             emit_insn (gen_rtx_SET (subtarget, GEN_INT (val & mask)));
-             emit_insn (gen_adddi3 (dest, subtarget,
-                                    GEN_INT (val - (val & mask))));
-           }
-         num_insns += 2;
-         return num_insns;
-       }
-      else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
-       {
-         if (generate)
-           {
-             subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-             emit_insn (gen_rtx_SET (subtarget,
-                                     GEN_INT ((val + comp) & mask)));
-             emit_insn (gen_adddi3 (dest, subtarget,
-                                    GEN_INT (val - ((val + comp) & mask))));
-           }
-         num_insns += 2;
-         return num_insns;
-       }
-      else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
-       {
-         if (generate)
-           {
-             subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-             emit_insn (gen_rtx_SET (subtarget,
-                                     GEN_INT ((val - comp) | ~mask)));
-             emit_insn (gen_adddi3 (dest, subtarget,
-                                    GEN_INT (val - ((val - comp) | ~mask))));
-           }
-         num_insns += 2;
-         return num_insns;
-       }
-      else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
-       {
-         if (generate)
-           {
-             subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-             emit_insn (gen_rtx_SET (subtarget, GEN_INT (val | ~mask)));
-             emit_insn (gen_adddi3 (dest, subtarget,
-                                    GEN_INT (val - (val | ~mask))));
-           }
-         num_insns += 2;
-         return num_insns;
-       }
+      if (exam_plus (val, val & mask))
+       return;
+      if (exam_plus (val, (val + comp) & mask))
+       return;
+      if (exam_plus (val, (val - comp) | ~mask))
+       return;
+      if (exam_plus (val, val | ~mask))
+       return;
     }
 
-  /* See if we can do it by arithmetically combining two
-     immediates.  */
-  for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
+  /* See if we can do it by arithmetically combining two immediates.  */
+  for (int i = 0; i < AARCH64_NUM_BITMASKS; i++)
     {
-      int j;
-      mask = 0xffff;
+      unsigned HOST_WIDE_INT bmi = aarch64_bitmasks[i];
 
-      if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
-         || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
-       {
-         if (generate)
-           {
-             subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
-             emit_insn (gen_rtx_SET (subtarget,
-                                     GEN_INT (aarch64_bitmasks[i])));
-             emit_insn (gen_adddi3 (dest, subtarget,
-                                    GEN_INT (val - aarch64_bitmasks[i])));
-           }
-         num_insns += 2;
-         return num_insns;
-       }
+      if (exam_plus (val, bmi))
+       return;
 
-      for (j = 0; j < 64; j += 16, mask <<= 16)
+      for (int j = 0; j < 64; j += 16)
        {
-         if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
+          mask = HOST_WIDE_INT_UC (0xffff) << j;
+         if ((bmi & ~mask) == (val & ~mask))
            {
-             if (generate)
-               {
-                 emit_insn (gen_rtx_SET (dest,
-                                         GEN_INT (aarch64_bitmasks[i])));
-                 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
-                                            GEN_INT ((val >> j) & 0xffff)));
-               }
-             num_insns += 2;
-             return num_insns;
+             set0 (bmi);
+             insN (j, val);
+             return;
            }
        }
     }
 
   /* See if we can do it by logically combining two immediates.  */
-  for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
+  for (int i = 0; i < AARCH64_NUM_BITMASKS; i++)
     {
-      if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
+      unsigned HOST_WIDE_INT bmi = aarch64_bitmasks[i];
+
+      if ((bmi & val) == bmi)
        {
-         int j;
+         for (int j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
+           {
+             unsigned HOST_WIDE_INT bmj = aarch64_bitmasks[j];
 
-         for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
-           if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
-             {
-               if (generate)
-                 {
-                   subtarget = subtargets ? gen_reg_rtx (mode) : dest;
-                   emit_insn (gen_rtx_SET (subtarget,
-                                           GEN_INT (aarch64_bitmasks[i])));
-                   emit_insn (gen_iordi3 (dest, subtarget,
-                                          GEN_INT (aarch64_bitmasks[j])));
-                 }
-               num_insns += 2;
-               return num_insns;
-             }
+             if (val == (bmi | bmj))
+               {
+                 set0 (bmi);
+                 opN (AA_GI_IOR, bmj);
+                 return;
+               }
+           }
        }
-      else if ((val & aarch64_bitmasks[i]) == val)
+      else if ((val & bmi) == val)
        {
-         int j;
+         for (int j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
+           {
+             unsigned HOST_WIDE_INT bmj = aarch64_bitmasks[j];
 
-         for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
-           if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
-             {
-               if (generate)
-                 {
-                   subtarget = subtargets ? gen_reg_rtx (mode) : dest;
-                   emit_insn (gen_rtx_SET (subtarget,
-                                           GEN_INT (aarch64_bitmasks[j])));
-                   emit_insn (gen_anddi3 (dest, subtarget,
-                                          GEN_INT (aarch64_bitmasks[i])));
-                 }
-               num_insns += 2;
-               return num_insns;
-             }
+             if (val == (bmi & bmj))
+               {
+                 set0 (bmi);
+                 opN (AA_GI_AND, bmj);
+                 return;
+               }
+           }
        }
     }
 
   if (one_match > zero_match)
     {
       /* Set either first three quarters or all but the third.  */
-      mask = 0xffffll << (16 - first_not_ffff_match);
-      if (generate)
-       emit_insn (gen_rtx_SET (dest,
-                               GEN_INT (val | mask | 0xffffffff00000000ull)));
-      num_insns ++;
+      mask = HOST_WIDE_INT_UC (0xffff) << (16 - first_not_ffff_match);
+      set0 (val | mask | HOST_WIDE_INT_UC (0xffffffff00000000));
 
       /* Now insert other two quarters.         */
-      for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
-          i < 64; i += 16, mask <<= 16)
+      for (int i = first_not_ffff_match + 16; i < 64; i += 16)
        {
+         mask = HOST_WIDE_INT_UC (0xffff) << i;
          if ((val & mask) != mask)
-           {
-             if (generate)
-               emit_insn (gen_insv_immdi (dest, GEN_INT (i),
-                                          GEN_INT ((val >> i) & 0xffff)));
-             num_insns ++;
-           }
+           insN (i, val);
        }
-      return num_insns;
+      return;
     }
 
  simple_sequence:
-  first = true;
-  mask = 0xffff;
-  for (i = 0; i < 64; i += 16, mask <<= 16)
+  cost = 0;
+  for (int i = 0; i < 64; i += 16)
     {
+      mask = HOST_WIDE_INT_UC (0xffff) << i;
       if ((val & mask) != 0)
        {
-         if (first)
-           {
-             if (generate)
-               emit_insn (gen_rtx_SET (dest, GEN_INT (val & mask)));
-             num_insns ++;
-             first = false;
-           }
+         if (cost == 0)
+           set0 (val & mask);
          else
-           {
-             if (generate)
-               emit_insn (gen_insv_immdi (dest, GEN_INT (i),
-                                          GEN_INT ((val >> i) & 0xffff)));
-             num_insns ++;
-           }
+           insN (i, val);
        }
     }
+}
+
+/* Follow the recipe to construct a value in MODE
+   placing the result in DEST.  */
+
+void
+genimm_aa64::generate (rtx dest, machine_mode mode) const
+{
+  int n = cost;
 
-  return num_insns;
+  gcc_checking_assert (n >= 1 && n <= max_cost);
+  gcc_checking_assert (code[0] == AA_GI_SET);
+
+  /* If possible, put the original SET into its own pseudo, so that
+     it might be CSE'd.  We can't do this if we use INSV, and we only
+     ever use arithmetic with N == 2.  */
+  if (n == 2 && optimize && can_create_pseudo_p ())
+    {
+      rtx_code rc = aa_gi_binop (code[1]);
+      if (rc != UNKNOWN)
+       {
+         rtx sub = gen_reg_rtx (mode);
+         emit_insn (gen_rtx_SET (sub, GEN_INT (op[0])));
+         sub = gen_rtx_fmt_ee (rc, mode, sub, GEN_INT (op[1]));
+         emit_insn (gen_rtx_SET (dest, sub));
+         return;
+       }
+    }
+
+  emit_insn (gen_rtx_SET (dest, GEN_INT (op[0])));
+
+  for (int i = 1; i < n; ++i)
+    {
+      rtx x = GEN_INT (op[i]);
+      switch (code[i])
+       {
+       case AA_GI_PLUS:
+       case AA_GI_IOR:
+       case AA_GI_AND:
+         x = gen_rtx_fmt_ee (aa_gi_binop (code[i]), mode, dest, x);
+         x = gen_rtx_SET (dest, x);
+         break;
+       case AA_GI_INS0:
+       case AA_GI_INS1:
+       case AA_GI_INS2:
+       case AA_GI_INS3:
+         if (mode == SImode)
+           x = gen_insv_immsi (dest, GEN_INT ((int)code[i]), x);
+         else
+           x = gen_insv_immdi (dest, GEN_INT ((int)code[i]), x);
+         break;
+       default:
+         gcc_unreachable ();
+       }
+      emit_insn (x);
+    }
 }
 
+} // anon namespace
+
+/* Examine IMM in MODE and return the number insns required to construct it.
+   If GENERATE is true, emit instructions to compute IMM into DEST.  */
+
+static inline int
+aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
+                                machine_mode mode)
+{
+  genimm_aa64 data = genimm_hash<genimm_aa64>::hash (INTVAL (imm), mode);
+  if (generate)
+    data.generate (dest, mode);
+  return data.cost;
+}
 
 void
 aarch64_expand_mov_immediate (rtx dest, rtx imm)
-- 
2.4.3

Reply via email to