Ping.  And adding Aarch64 Maintainers.

On Mon, 2019-01-28 at 16:11 -0800, Steve Ellcey wrote:
> On Sat, 2019-01-26 at 00:00 +0100, Jakub Jelinek wrote:
> > 
> > > +  /* Verify that there is no overlap in what bits are set in the
> > > two masks.  */
> > > +  if ((m1 + m2 + 1) != 0)
> > > +    return false;
> > 
> > Wouldn't that be clearer to test
> >   if (m1 + m2 != HOST_WIDE_INT_1U)
> >     return false;
> > ?
> > You IMHO also should test
> >   if ((m1 & m2) != 0)
> >     return false;
> 
> I think you mean HOST_WIDE_INT_M1U, not HOST_WIDE_INT_1U.  That does
> seem clearer and I made that change as well as adding the second
> test.
> 
> > > 
> > > +  t = popcount_hwi (m2);
> > > +  t = (1 << t) - 1;
> > 
> > This should be (HOST_WIDE_INT_1U << t), otherwise if popcount of m2
> > is
> > > = 32, there will be UB.
> 
> Fixed.
> 
> > As mentioned in rs6000.md, I believe you also need a similar
> > pattern where
> > the two ANDs are swapped, because they have the same priority.
> 
> I fixed the long lines in aarch64.md and I added a second pattern for
> the *aarch64_bfi<GPI:mode>4_noshift pattern, swapping the order of
> the
> IOR operands.  I did not swap the AND operands, I assume the compiler
> would ensure that the register was always before the constant or that
> it would check both orderings.
> 
> I tried adding a second version of *aarch64_bfi<GPI:mode>5_shift as
> well but when I tested it, it never got used during a bootstrap build
> or a GCC test run so I decided it was not needed.
> 
> Tested on aarch64 with a bootstrap build and a GCC testsuite run.
> OK to checkin?
> 
> 
> Steve Ellcey
> sell...@marvell.com
> 
> 
> 2018-01-28  Steve Ellcey  <sell...@marvell.com>
> 
>       PR rtl-optimization/87763
>       * config/aarch64/aarch64-protos.h
> (aarch64_masks_and_shift_for_bfi_p):
>       New prototype.
>       * config/aarch64/aarch64.c (aarch64_masks_and_shift_for_bfi_p):
>       New function.
>       * config/aarch64/aarch64.md (*aarch64_bfi<GPI:mode>5_shift):
>       New instruction.
>       (*aarch64_bfi<GPI:mode>4_noshift): Ditto.
>       (*aarch64_bfi<GPI:mode>4_noshift_alt): Ditto.
> 
> 
> 2018-01-28  Steve Ellcey  <sell...@marvell.com>
> 
>       PR rtl-optimization/87763
>       * gcc.target/aarch64/combine_bfxil.c: Change some bfxil checks
>       to bfi.
> 
> 
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index b035e35f33b..b6c0d0a8eb6 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -429,6 +429,9 @@ bool aarch64_label_mentioned_p (rtx);
 void aarch64_declare_function_name (FILE *, const char*, tree);
 bool aarch64_legitimate_pic_operand_p (rtx);
 bool aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode, rtx, rtx);
+bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT,
+					unsigned HOST_WIDE_INT,
+					unsigned HOST_WIDE_INT);
 bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
 bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
 opt_machine_mode aarch64_sve_pred_mode (unsigned int);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index d7c453cdad0..9a3080b5db8 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -9330,6 +9330,41 @@ aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode mode, rtx mask,
 	     & ((HOST_WIDE_INT_1U << INTVAL (shft_amnt)) - 1)) == 0;
 }
 
+/* Return true if the masks and a shift amount from an RTX of the form
+   ((x & MASK1) | ((y << SHIFT_AMNT) & MASK2)) are valid to combine into
+   a BFI instruction of mode MODE.  See *arch64_bfi patterns.  */
+
+bool
+aarch64_masks_and_shift_for_bfi_p (scalar_int_mode mode,
+				   unsigned HOST_WIDE_INT mask1,
+				   unsigned HOST_WIDE_INT shft_amnt,
+				   unsigned HOST_WIDE_INT mask2)
+{
+  unsigned HOST_WIDE_INT t;
+
+  /* Verify that there is no overlap in what bits are set in the two masks.  */
+  if ((mask1 + mask2) != HOST_WIDE_INT_M1U)
+    return false;
+  if ((mask1 & mask2) != 0)
+    return false;
+
+  /* Verify that the shift amount is less than the mode size.  */
+  if (shft_amnt >= GET_MODE_BITSIZE (mode))
+    return false;
+
+  /* Verify that the mask being shifted is contiguous and would be in the
+     least significant bits after shifting by creating a mask 't' based on
+     the number of bits set in mask2 and the shift amount for mask2 and
+     comparing that to the actual mask2.  */
+  t = popcount_hwi (mask2);
+  t = (HOST_WIDE_INT_1U << t) - 1;
+  t = t << shft_amnt;
+  if (t != mask2)
+    return false;
+  
+  return true;
+}
+
 /* Calculate the cost of calculating X, storing it in *COST.  Result
    is true if the total cost of the operation has now been calculated.  */
 static bool
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index b7f6fe0f135..6b5339092ba 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -5476,6 +5476,56 @@
   [(set_attr "type" "bfm")]
 )
 
+;;  Match a bfi instruction where the shift of OP3 means that we are
+;;  actually copying the least significant bits of OP3 into OP0 by way
+;;  of the AND masks and the IOR instruction.  A similar instruction
+;;  with the two parts of the IOR swapped around was never triggered
+;;  in a bootstrap build and test of GCC so it was not included.
+
+(define_insn "*aarch64_bfi<GPI:mode>5_shift"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0")
+                          (match_operand:GPI 2 "const_int_operand" "n"))
+                 (and:GPI (ashift:GPI
+                           (match_operand:GPI 3 "register_operand" "r")
+                           (match_operand:GPI 4 "aarch64_simd_shift_imm_<mode>" "n"))
+                          (match_operand:GPI 5 "const_int_operand" "n"))))]
+  "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[2]),
+				      UINTVAL (operands[4]),
+				      UINTVAL(operands[5]))"
+  "bfi\t%<GPI:w>0, %<GPI:w>3, %4, %P5"
+  [(set_attr "type" "bfm")]
+)
+
+;; Like the above instruction but with no shifting, we are just copying
+;; the least significant bits of OP3 to OP0.  In this case we do need
+;; two versions of the instruction to handle different checks on the
+;; constant values.
+
+(define_insn "*aarch64_bfi<GPI:mode>4_noshift"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "0")
+                          (match_operand:GPI 2 "const_int_operand" "n"))
+                 (and:GPI (match_operand:GPI 3 "register_operand" "r")
+                          (match_operand:GPI 4 "const_int_operand" "n"))))]
+  "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[2]), 0,
+				      UINTVAL (operands[4]))"
+  "bfi\t%<GPI:w>0, %<GPI:w>3, 0, %P4"
+  [(set_attr "type" "bfm")]
+)
+
+(define_insn "*aarch64_bfi<GPI:mode>4_noshift_alt"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+        (ior:GPI (and:GPI (match_operand:GPI 1 "register_operand" "r")
+                          (match_operand:GPI 2 "const_int_operand" "n"))
+                 (and:GPI (match_operand:GPI 3 "register_operand" "0")
+                          (match_operand:GPI 4 "const_int_operand" "n"))))]
+  "aarch64_masks_and_shift_for_bfi_p (<MODE>mode, UINTVAL (operands[4]), 0,
+				      UINTVAL (operands[2]))"
+  "bfi\t%<GPI:w>0, %<GPI:w>1, 0, %P2"
+  [(set_attr "type" "bfm")]
+)
+
 (define_insn "*extr_insv_lower_reg<mode>"
   [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
 			  (match_operand 1 "const_int_operand" "n")
diff --git a/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c b/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c
index 109f989a2f0..c3b5fc58800 100644
--- a/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c
+++ b/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c
@@ -114,4 +114,5 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-assembler-times "bfxil\\t" 18 } } */
+/* { dg-final { scan-assembler-times "bfxil\\t" 3 } } */
+/* { dg-final { scan-assembler-times "bfi\\t" 15 } } */

Reply via email to