On 01/12/2025 15:53, Christophe Lyon wrote:
Changes v2->v3:
- use satisfies_constraint_Ph in split condition
- use %R0 instead of %R1 as first input parameter

I think the same applies to the register shift variants, particularly mve_lsrl.

- improve test to cover more cases (shifting by #34 instead of #33
   avoids PR122871)

The second alternative for operand 1 needs a new constraint which does
not overlap with Pg, except that it can handle 32 to generate an
optimal mov.

This patch introduces a new Ph constraint which has the [32..255]
range.

This fixes a lot of regressions when running the testsuite for an MVE
target such as -march=armv8.1-m.main+mve.fp+fp.dp -mfloat-abi=hard.

gcc/ChangeLog:

        PR target/122858
        * config/arm/constraints.md (Ph): New constraint.
        * config/arm/mve.md (mve_asrl_imm, mve_lsll_imm): Fix constraints
        of operand 1 and handle 32 as special shift amount.

gcc/testsuite/ChangeLog:

        PR target/122858
        * gcc.target/arm/mve/pr122858.c: New test.
---
  gcc/config/arm/constraints.md               |  9 ++-
  gcc/config/arm/mve.md                       | 65 ++++++++++-------
  gcc/testsuite/gcc.target/arm/mve/pr122858.c | 79 +++++++++++++++++++++
  3 files changed, 125 insertions(+), 28 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/arm/mve/pr122858.c

diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index 86a9e97f514..e3809d31ba9 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -35,8 +35,8 @@
  ;; in ARM/Thumb-2 state: Da, Db, Dc, Dd, Dn, DN, Dm, Dl, DL, Do, Dv, Dy, Di,
  ;;                     Dj, Ds, Dt, Dp, Dz, Tu, Te
  ;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe
-;; in Thumb-2 state: Ha, Pg, Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py, Pz, Ra, Rb,
-;;                  Rd, Rf, Rg, Ri
+;; in Thumb-2 state: Ha, Pg, Ph, Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py, Pz, Ra,
+;;                  Rb, Rd, Rf, Rg, Ri
;; The following memory constraints have been used:
  ;; in ARM/Thumb-2 state: Uh, Ut, Uv, Un, Um, Us, Uo, Up, Uf, Ux, Ul, Uz
@@ -237,6 +237,11 @@ (define_constraint "Pg"
    (and (match_code "const_int")
         (match_test "TARGET_THUMB2 && ival >= 1 && ival <= 32")))
+(define_constraint "Ph"
+  "@internal In Thumb-2 state a constant in range 32 to 255"
+  (and (match_code "const_int")
+       (match_test "TARGET_THUMB2 && ival >= 32 && ival <= 255")))
+
  (define_constraint "Ps"
    "@internal In Thumb-2 state a constant in the range -255 to +255"
    (and (match_code "const_int")
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index b29c5f1bcd5..bec81461aa8 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -4762,10 +4762,10 @@ (define_expand "mve_asrl"
  (define_insn_and_split "mve_asrl_imm"
    [(set (match_operand:DI 0 "arm_general_register_operand" "=r,r")
          (ashiftrt:DI (match_operand:DI 1 "arm_general_register_operand" "0,r")
-                      (match_operand:QI 2 "immediate_operand" "Pg,I")))]
+                      (match_operand:QI 2 "immediate_operand" "Pg,Ph")))]
    "TARGET_HAVE_MVE"
-  "asrl%?\\t%Q0, %R1, %2"
-  "&& !satisfies_constraint_Pg (operands[2])"
+  "asrl%?\\t%Q0, %R0, %2"
+  "&& satisfies_constraint_Ph (operands[2])"
    [(clobber (const_int 0))]
    "
    rtx amount = operands[2];
@@ -4781,30 +4781,36 @@ (define_insn_and_split "mve_asrl_imm"
      }
/* ival < 0 should have already been handled by mve_asrl. */
-  gcc_assert (ival > 32);
+  gcc_assert (ival >= 32);
- /* Shift amount above immediate range (ival > 32).
-     out_hi gets the sign bit
-     out_lo gets in_hi << (ival - 32) or << 31 if ival >= 64.
-     If ival >= 64, the result is either 0 or -1, depending on the
-     input sign.  */
    rtx in_hi = gen_highpart (SImode, operands[1]);
    rtx out_lo = gen_lowpart (SImode, operands[0]);
    rtx out_hi = gen_highpart (SImode, operands[0]);
- emit_insn (gen_rtx_SET (out_lo,
-                         gen_rtx_fmt_ee (ASHIFTRT,
-                                         SImode,
-                                         in_hi,
-                                         GEN_INT (MIN (ival - 32,
-                                                       31)))));
+  if (ival == 32)
+    /* out_hi gets the sign bit
+       out_lo gets in_hi.  */
+    emit_insn (gen_movsi (out_lo, in_hi));
+  else
+    /* Shift amount above immediate range (ival > 32).
+       out_hi gets the sign bit
+       out_lo gets in_hi << (ival - 32) or << 31 if ival >= 64.
+       If ival >= 64, the result is either 0 or -1, depending on the
+       input sign.  */
+    emit_insn (gen_rtx_SET (out_lo,
+                           gen_rtx_fmt_ee (ASHIFTRT,
+                                           SImode,
+                                           in_hi,
+                                           GEN_INT (MIN (ival - 32,
+                                                         31)))));
+
    /* Copy sign bit, which is OK even if out_lo == in_hi.  */
    emit_insn (gen_rtx_SET (out_hi,
                          gen_rtx_fmt_ee (ASHIFTRT,
                                          SImode,
                                          in_hi,
                                          GEN_INT (31))));
-       DONE;
+  DONE;
    "
    [(set_attr "predicable" "yes,yes")
     (set_attr "length" "4,8")])
@@ -4852,10 +4858,10 @@ (define_expand "mve_lsll"
  (define_insn_and_split "mve_lsll_imm"
    [(set (match_operand:DI 0 "arm_general_register_operand" "=r,r")
          (ashift:DI (match_operand:DI 1 "arm_general_register_operand" "0,r")
-                    (match_operand:QI 2 "immediate_operand" "Pg,I")))]
+                    (match_operand:QI 2 "immediate_operand" "Pg,Ph")))]
    "TARGET_HAVE_MVE"
-  "lsll%?\\t%Q0, %R1, %2"
-  "&& !satisfies_constraint_Pg (operands[2])"
+  "lsll%?\\t%Q0, %R0, %2"
+  "&& satisfies_constraint_Ph (operands[2])"
    [(clobber (const_int 0))]
    "
    rtx amount = operands[2];
@@ -4878,17 +4884,24 @@ (define_insn_and_split "mve_lsll_imm"
      }
/* ival < 0 should have already been handled by mve_asrl. */
-  gcc_assert (ival > 32);
+  gcc_assert (ival >= 32);
- /* Shift amount above immediate range: 32 < ival < 64. */
    rtx in_lo = gen_lowpart (SImode, operands[1]);
    rtx out_lo = gen_lowpart (SImode, operands[0]);
    rtx out_hi = gen_highpart (SImode, operands[0]);
-  emit_insn (gen_rtx_SET (out_hi,
-                         gen_rtx_fmt_ee (ASHIFT,
-                                         SImode,
-                                         in_lo,
-                                         GEN_INT (ival - 32))));
+
+  if (ival == 32)
+    /* Shift by 32 is just a move.  */
+    emit_insn (gen_movsi (out_hi, in_lo));
+  else
+    /* Shift amount above immediate range: 32 < ival < 64.  */
+    emit_insn (gen_rtx_SET (out_hi,
+                           gen_rtx_fmt_ee (ASHIFT,
+                                           SImode,
+                                           in_lo,
+                                           GEN_INT (ival - 32))));
+
+  /* Clear low 32 bits.  */
    emit_insn (gen_rtx_SET (out_lo, const0_rtx));
    DONE;
    "
diff --git a/gcc/testsuite/gcc.target/arm/mve/pr122858.c 
b/gcc/testsuite/gcc.target/arm/mve/pr122858.c
new file mode 100644
index 00000000000..4511a50ebd3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/pr122858.c
@@ -0,0 +1,79 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_mve_hw } */
+/* { dg-options "-O2" } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+extern void abort (void);
+
+/*
+** ashrl_fn:
+**...
+**     asrl    r[0-9]+, r[0-9]+, #31
+**...
+*/
+__attribute__ ((noipa))
+long long ashrl_fn (long long a)
+{
+  long long c;
+
+  c = a >> 31;
+  c += a;
+  return c;
+}
+
+/*
+** ashll_fn:
+**...
+**     add     (r[0-9]+), \1, r[0-9]+, lsl #2
+**...
+*/
+__attribute__ ((noipa))
+long long ashll_fn (long long a)
+{
+  long long c;
+
+  /* Use 34, since 33 causes PR122871.  */
+  c = a << 34;
+  c += a;
+  return c;
+}
+
+/*
+** ashll_fn2:
+**...
+**     lsll    r[0-9]+, r[0-9]+, #7
+**...
+*/
+__attribute__ ((noipa))
+long long ashll_fn2 (long long a /* unused */, long long x)
+{
+  return x << 7;
+}

The key thing about this test is that r2,r3 have to be copied into r0,r1 either before or after the shift. Just checking for the lsll is not sufficient (and that's what was wrong before this fix).

I think gcc will always do the moves first, but technically either order is ok.

+
+/*
+** ashll_fn3:
+**...
+**     lsls    r[0-9]+, (r[0-9]+), #2
+**     movs    \1, #0
+**...
+*/
+__attribute__ ((noipa))
+long long ashll_fn3 (long long x)
+{
+  return x << 34;
+}
+
+int main(void)
+{
+  long long var1 = 1;
+  long long var2 = ashll_fn (var1);
+  if (var2 != 0x400000001)
+    abort ();
+
+  var2 = ashrl_fn (var2);
+  if (var2 != 0x400000009)
+    abort ();
+
+  return 0;
+}

Reply via email to