MVE asrl and lsll instructions have two variants:
- immediate shift amount in the [1..32] range
- shift amount in a register, where negative values reverse the
  direction of the shift

However, RTL assumes that the shift amount is interpreted unsigned, so
we want to make sure undesired simplifications do not take place.
For instance if simplify_rtx optimizes
(set (reg:SI 1) (const_int -5))
(set (reg:DI 2) (ashift:DI (reg:DI 3) (reg:SI 1)))
into:
(set (reg:DI 2) (ashift:DI (reg:DI 3) (const_int -5)))
we do not want this to be interpreted as undefined behavior.

We handle this using a general pattern where:
- immediates are handled by a define_insn_and_split pattern which
  directly maps immediates in [1..32] to the shift operator and splits
  other cases as needed.
- non-immediates are handled by another pattern

gcc/ChangeLog:

        PR target/122216
        * config/arm/arm.md (ashldi3, ashrdi3): Force shift amount into
        QImode.
        * config/arm/constraints.md: Fix comment, Pg is valid in Thumb-2
        state only.
        * config/arm/mve.md (mve_asrl): Handle various shift amount ranges.
        (mve_asrl_imm, mve_asrl_internal): New patterns.
        (mve_lsll): Handle various shift amount ranges.
        (mve_lsll_imm, mve_lsll_internal): New patterns.

gcc/testsuite/ChangeLog:

        PR target/122216
        * gcc.target/arm/mve/intrinsics/asrl-various-ranges.c: New test.
        * gcc.target/arm/mve/intrinsics/lsll-various-ranges.c: New test.
---
 gcc/config/arm/arm.md                         |   2 +
 gcc/config/arm/constraints.md                 |   5 +-
 gcc/config/arm/mve.md                         | 186 +++++++++++++++++-
 .../arm/mve/intrinsics/asrl-various-ranges.c  | 161 +++++++++++++++
 .../arm/mve/intrinsics/lsll-various-ranges.c  | 160 +++++++++++++++
 5 files changed, 505 insertions(+), 9 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/arm/mve/intrinsics/asrl-various-ranges.c
 create mode 100644 
gcc/testsuite/gcc.target/arm/mve/intrinsics/lsll-various-ranges.c

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 1575d126733..b5d92aebf4d 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -4588,6 +4588,7 @@ (define_expand "ashldi3"
       if (arm_reg_or_long_shift_imm (operands[2], GET_MODE (operands[2]))
          && (REG_P (operands[2]) || INTVAL(operands[2]) != 32))
         {
+         operands[2] = convert_modes (QImode, SImode, operands[2], 0);
          emit_insn (gen_mve_lsll (operands[0], operands[1], operands[2]));
          DONE;
        }
@@ -4624,6 +4625,7 @@ (define_expand "ashrdi3"
   if (TARGET_HAVE_MVE && !BYTES_BIG_ENDIAN
       && arm_reg_or_long_shift_imm (operands[2], GET_MODE (operands[2])))
     {
+      operands[2] = convert_modes (QImode, SImode, operands[2], 0);
       emit_insn (gen_mve_asrl (operands[0], operands[1], operands[2]));
       DONE;
     }
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index 24743a82356..86a9e97f514 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -35,9 +35,8 @@
 ;; in ARM/Thumb-2 state: Da, Db, Dc, Dd, Dn, DN, Dm, Dl, DL, Do, Dv, Dy, Di,
 ;;                      Dj, Ds, Dt, Dp, Dz, Tu, Te
 ;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe
-;; in Thumb-2 state: Ha, Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py, Pz, Rd, Rf, Rb, 
Ra,
-;;                  Rg, Ri
-;; in all states: Pg
+;; in Thumb-2 state: Ha, Pg, Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py, Pz, Ra, Rb,
+;;                  Rd, Rf, Rg, Ri
 
 ;; The following memory constraints have been used:
 ;; in ARM/Thumb-2 state: Uh, Ut, Uv, Un, Um, Us, Uo, Up, Uf, Ux, Ul, Uz
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index e20ec716209..fd996b2dab0 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -4715,19 +4715,193 @@ (define_insn "dlstp<dlstp_elemsize>_insn"
   "dlstp.<dlstp_elemsize>\t%|lr, %0"
   [(set_attr "type" "mve_misc")])
 
+
+;;
 ;; Scalar shifts
-(define_insn "mve_asrl"
+;;
+;; immediate shift amounts have to be in the [1..32] range
+;;
+;; shift amounts stored in a register can be negative, in which case
+;; the shift is reversed (asrl, lsll only)
+;; since RTL expects shift amounts to be unsigned, make sure the
+;; negative case is handled, in case simplify_rtx could optimize:
+;; (set (reg:SI 1) (const_int -5))
+;; (set (reg:DI 2) (ashift:DI (reg:DI 3) (reg:SI 1)))
+;; into:
+;; (set (reg:DI 2) (ashift:DI (reg:DI 3) (const_int -5)))
+
+;; General pattern for asrl
+(define_expand "mve_asrl"
+  [(set (match_operand:DI 0 "arm_general_register_operand" "")
+       (ashiftrt:DI (match_operand:DI 1 "arm_general_register_operand" "")
+                    (match_operand:QI 2 "reg_or_int_operand" "")))]
+  "TARGET_HAVE_MVE"
+{
+  rtx amount = operands[2];
+  if (CONST_INT_P (amount))
+    {
+      HOST_WIDE_INT ival = INTVAL (amount);
+
+      if (ival >= 0)
+       /* Right shift.  */
+       emit_insn (gen_mve_asrl_imm (operands[0], operands[1], amount));
+      else
+       /* Left shift.  */
+       emit_insn (gen_mve_lsll_imm (operands[0], operands[1],
+                                    GEN_INT (-ival)));
+      DONE;
+    }
+
+  emit_insn (gen_mve_asrl_internal (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+;; immediate shift amount
+;; we have to split the insn if the amount is not in the [1..32] range
+(define_insn_and_split "mve_asrl_imm"
+  [(set (match_operand:DI 0 "arm_general_register_operand" "=r,r")
+         (ashiftrt:DI (match_operand:DI 1 "arm_general_register_operand" "0,r")
+                      (match_operand:QI 2 "immediate_operand" "Pg,I")))]
+  "TARGET_HAVE_MVE"
+  "asrl%?\\t%Q0, %R1, %2"
+  "&& !satisfies_constraint_Pg (operands[2])"
+  [(clobber (const_int 0))]
+  "
+  rtx amount = operands[2];
+  HOST_WIDE_INT ival = INTVAL (amount);
+
+  /* shift amount in [1..32] is already handled by the Pg constraint.  */
+
+  /* Shift by 0, it is just a move.  */
+  if (ival == 0)
+    {
+      emit_insn (gen_movdi (operands[0], operands[1]));
+      DONE;
+    }
+
+  /* ival < 0 should have already been handled by mve_asrl. */
+  gcc_assert (ival > 32);
+
+  /* Shift amount above immediate range (ival > 32).
+     out_hi gets the sign bit
+     out_lo gets in_hi << (ival - 32) or << 31 if ival >= 64.
+     If ival >= 64, the result is either 0 or -1, depending on the
+     input sign.  */
+  rtx in_hi = gen_highpart (SImode, operands[1]);
+  rtx out_lo = gen_lowpart (SImode, operands[0]);
+  rtx out_hi = gen_highpart (SImode, operands[0]);
+
+  emit_insn (gen_rtx_SET (out_lo,
+                         gen_rtx_fmt_ee (ASHIFTRT,
+                                         SImode,
+                                         in_hi,
+                                         GEN_INT (MIN (ival - 32,
+                                                       31)))));
+  /* Copy sign bit, which is OK even if out_lo == in_hi.  */
+  emit_insn (gen_rtx_SET (out_hi,
+                         gen_rtx_fmt_ee (ASHIFTRT,
+                                         SImode,
+                                         in_hi,
+                                         GEN_INT (31))));
+       DONE;
+  "
+  [(set_attr "predicable" "yes,yes")
+   (set_attr "length" "4,8")])
+
+(define_insn "mve_asrl_internal"
   [(set (match_operand:DI 0 "arm_general_register_operand" "=r")
-       (ashiftrt:DI (match_operand:DI 1 "arm_general_register_operand" "0")
-                    (match_operand:SI 2 "arm_reg_or_long_shift_imm" "rPg")))]
+       (if_then_else:DI
+         (ge:QI (match_operand:QI 2 "arm_general_register_operand" "r")
+                (const_int 0))
+         (ashiftrt:DI (match_operand:DI 1 "arm_general_register_operand" "0")
+                      (match_dup 2))
+         (ashift:DI (match_dup 1) (neg:QI (match_dup 2)))))]
   "TARGET_HAVE_MVE"
   "asrl%?\\t%Q0, %R1, %2"
   [(set_attr "predicable" "yes")])
 
-(define_insn "mve_lsll"
+;; General pattern for lsll
+(define_expand "mve_lsll"
+  [(set (match_operand:DI 0 "arm_general_register_operand" "")
+       (ashift:DI (match_operand:DI 1 "arm_general_register_operand" "")
+                  (match_operand:QI 2 "reg_or_int_operand" "")))]
+  "TARGET_HAVE_MVE"
+{
+  rtx amount = operands[2];
+  if (CONST_INT_P (amount))
+    {
+      HOST_WIDE_INT ival = INTVAL (amount);
+
+      if (ival >= 0)
+       /* Left shift.  */
+       emit_insn (gen_mve_lsll_imm (operands[0], operands[1], amount));
+      else
+       /* Right shift.  */
+       emit_insn (gen_lshrdi3 (operands[0], operands[1],
+                                GEN_INT (-ival)));
+      DONE;
+    }
+
+  emit_insn (gen_mve_lsll_internal (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+;; immediate shift amount
+;; we have to split the insn if the amount is not in the [1..32] range
+(define_insn_and_split "mve_lsll_imm"
+  [(set (match_operand:DI 0 "arm_general_register_operand" "=r,r")
+         (ashift:DI (match_operand:DI 1 "arm_general_register_operand" "0,r")
+                    (match_operand:QI 2 "immediate_operand" "Pg,I")))]
+  "TARGET_HAVE_MVE"
+  "lsll%?\\t%Q0, %R1, %2"
+  "&& !satisfies_constraint_Pg (operands[2])"
+  [(clobber (const_int 0))]
+  "
+  rtx amount = operands[2];
+  HOST_WIDE_INT ival = INTVAL (amount);
+
+  /* shift amount in [1..32] is already handled by the Pg constraint.  */
+
+  /* Shift by 0, it is just a move.  */
+  if (ival == 0)
+    {
+      emit_insn (gen_movdi (operands[0], operands[1]));
+      DONE;
+    }
+
+  /* Shift amount larger than input, result is 0.  */
+  if (ival >= 64)
+    {
+      emit_insn (gen_movdi (operands[0], const0_rtx));
+      DONE;
+    }
+
+  /* ival < 0 should have already been handled by mve_asrl. */
+  gcc_assert (ival > 32);
+
+  /* Shift amount above immediate range: 32 < ival < 64.  */
+  rtx in_lo = gen_lowpart (SImode, operands[1]);
+  rtx out_lo = gen_lowpart (SImode, operands[0]);
+  rtx out_hi = gen_highpart (SImode, operands[0]);
+  emit_insn (gen_rtx_SET (out_hi,
+                         gen_rtx_fmt_ee (ASHIFT,
+                                         SImode,
+                                         in_lo,
+                                         GEN_INT (ival - 32))));
+  emit_insn (gen_rtx_SET (out_lo, const0_rtx));
+  DONE;
+  "
+  [(set_attr "predicable" "yes,yes")
+   (set_attr "length" "4,8")])
+
+(define_insn "mve_lsll_internal"
   [(set (match_operand:DI 0 "arm_general_register_operand" "=r")
-       (ashift:DI (match_operand:DI 1 "arm_general_register_operand" "0")
-                  (match_operand:SI 2 "arm_reg_or_long_shift_imm" "rPg")))]
+        (if_then_else:DI
+         (ge:QI (match_operand:QI 2 "arm_general_register_operand" "r")
+                (const_int 0))
+         (ashift:DI (match_operand:DI 1 "arm_general_register_operand" "0")
+                    (match_dup 2))
+         (lshiftrt:DI (match_dup 1) (neg:QI (match_dup 2)))))]
   "TARGET_HAVE_MVE"
   "lsll%?\\t%Q0, %R1, %2"
   [(set_attr "predicable" "yes")])
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/asrl-various-ranges.c 
b/gcc/testsuite/gcc.target/arm/mve/intrinsics/asrl-various-ranges.c
new file mode 100644
index 00000000000..4e2a301863d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/asrl-various-ranges.c
@@ -0,0 +1,161 @@
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/* Check that calling asrl with an out of range shift amount is not interpreted
+   as undefined behavior, and that we actually use the asrl instruction (except
+   if a negative shift amount can be handled by lsll).  Check code generation
+   for various special cases:
+   1 <= amount <= 32
+   -32 <= amount <= -1
+   32 < amount < 64
+   -64 < amount < -32
+   amount >= 64
+   amount <= -64
+   amount == 0
+   amount unknown at compile time. */
+#include "arm_mve.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+  /* Positive shift amount in [1.32] range, use the immediate:
+
+   asrl r0, r1, #3  */
+/*
+**foo_3:
+**     ...
+**     asrl    (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+), #3(?:     @.*|)
+**     ...
+*/
+int64_t
+foo_3 (int64_t value)
+{
+  return asrl (value, 3);
+}
+
+  /* Negative shift amount in [-32.-1] range, reverse shift (lsll) with the
+     opposite shift amount as immediate:
+
+     lsll r0, r1, #3  */
+/*
+**foo_m3:
+**     ...
+**     lsll    (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+), #3(?:     @.*|)
+**     ...
+*/
+int64_t
+foo_m3 (int64_t value)
+{
+  return asrl (value, -3);
+}
+
+  /* Out of [1.32] range positive shift amount, but < 64.
+     lo_out = hi_in >> (amount - 32)
+     hi_out = hi_in >> 31 (to copy the sign bit)
+
+     asrs r0, r1, #1
+     asrs r1, r1, #31   */
+/*
+**foo_33:
+**     ...
+**     asrs    (?:ip|fp|r[0-9]+), (ip|fp|r[0-9]+), #1(?:       @.*|)
+**     asrs    (?:ip|fp|r[0-9]+), \1, #31(?:   @.*|)
+**     ...
+*/
+int64_t
+foo_33 (int64_t value)
+{
+  return asrl (value, 33);
+}
+
+  /* Out of [-32..-1] range negative shift amount, but > -64. Reverse shift
+     (lsll equivalent) in [33.64] range:
+     hi_out = lo_in << (-amount - 32)
+     lo_out = 0 
+
+     lsls r1, r0, #1
+     movs r0, #0  */
+/*
+**foo_m33:
+**     ...
+**     lsls    (?:ip|fp|r[0-9]+), (ip|fp|r[0-9]+), #1(?:       @.*|)
+**     movs    \1, #0(?:       @.*|)
+**     ...
+*/
+int64_t
+foo_m33 (int64_t value)
+{
+  return asrl (value, -33);
+}
+
+  /* Out of range positive shift amount (>= 64)
+     lo_out = hi_in >> 31 (copy sign bit)
+     hi_out = hi_in >> 31
+
+     asrs r0, r1, #31
+     mov r1, r0  */
+/*
+**foo_65:
+**     ...
+**     asrs    (ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+), #31(?:      @.*|)
+**     mov     (?:ip|fp|r[0-9]+), \1(?:        @.*|)
+**     ...
+*/
+int64_t
+foo_65 (int64_t value)
+{
+  return asrl (value, 65);
+}
+
+  /* Out of range negative shift amount (<= 64), result is 0.
+
+   movs r0, #0
+   movs r1, #0  */
+/*
+**foo_m65:
+**     ...
+**     movs    (ip|fp|r[0-9]+), #0(?:  @.*|)
+**     movs    (ip|fp|r[0-9]+), #0(?:  @.*|)
+**     ...
+*/
+int64_t
+foo_m65 (int64_t value)
+{
+  return asrl (value, -65);
+}
+
+  /* shift amount == 0, use a mov, which is optimized out.  */
+/*
+**foo_0:
+**     bx      lr
+**     ...
+*/
+int64_t
+foo_0 (int64_t value)
+{
+  return asrl (value, 0);
+}
+
+  /* Unknown shift amount, use the register variant.
+
+     asrl r0, r1, r2  */
+/*
+**foo_var:
+**     ...
+**     asrl    (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+)(?:      
@.*|)
+**     ...
+*/
+int64_t
+foo_var (int64_t value, int32_t amount)
+{
+  return asrl (value, amount);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/lsll-various-ranges.c 
b/gcc/testsuite/gcc.target/arm/mve/intrinsics/lsll-various-ranges.c
new file mode 100644
index 00000000000..1e2f558af88
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/lsll-various-ranges.c
@@ -0,0 +1,160 @@
+ /* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/* Check that calling lsll with an out of range shift amount is not interpreted
+   as undefined behavior, and that we actually use the lsll instruction (except
+   if a negative shift amount can be handled by asrl).  Check code generation
+   for various special cases:
+   1 <= amount <= 32
+   -32 <= amount <= -1
+   32 < amount < 64
+   -64 < amount < -32
+   amount >= 64
+   amount <= -64
+   amount == 0
+   amount unknown at compile time. */
+#include "arm_mve.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+  /* Positive shift amount in [1.32] range, use the immediate:
+
+     lsll r0,r1,#3  */
+/*
+**foo_3:
+**     ...
+**     lsll    (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+), #3(?:     @.*|)
+**     ...
+*/
+uint64_t
+foo_3 (uint64_t value)
+{
+  return lsll (value, 3);
+}
+
+  /* Negative shift amount in [-32.-1] range, reverse shift (asrl) with the
+     opposite shift amount as immediate:
+
+     lsrl r0, r1, #3  */
+/*
+**foo_m3:
+**     ...
+**     lsrl    (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+), #3(?:     @.*|)
+**     ...
+*/
+uint64_t
+foo_m3 (uint64_t value)
+{
+  return lsll (value, -3);
+}
+
+  /* Out of [1..32] range positive shift amount, but < 64.
+     high_out = low_in << (amount - 32) (using lsls, not lsll)
+     low_out = 0
+
+     lsls r1,r0,#1
+     movs r0, #0  */
+/*
+**foo_33:
+**     ...
+**     lsls    (?:ip|fp|r[0-9]+), (ip|fp|r[0-9]+), #1(?:       @.*|)
+**     movs    \1, #0(?:       @.*|)
+**     ...
+*/
+uint64_t
+foo_33 (uint64_t value)
+{
+  return lsll (value, 33);
+}
+
+  /* Out of [-32..-1] range negative shift amount, but > -64.  Reverse shift
+     (lsrl equivalent) in [33..64] range:
+     lo_out = hi_in >> (amount - 32)
+     hi_out = 0
+
+     lsrs r0, r1, #1
+     movs r1, #0  */
+/*
+**foo_m33:
+**     ...
+**     lsrs    (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+), #1(?:     @.*|)
+**     movs    (?:ip|fp|r[0-9]+), #0(?:        @.*|)
+**     ...
+*/
+uint64_t
+foo_m33 (uint64_t value)
+{
+  return lsll (value, -33);
+}
+
+  /* Out of range positive shift amount (>= 64), result is 0.
+
+     movs r0, #0
+     movs r1, #0  */
+/*
+**foo_65:
+**     ...
+**     movs    (ip|fp|r[0-9]+), #0(?:  @.*|)
+**     movs    (ip|fp|r[0-9]+), #0(?:  @.*|)
+**     ...
+*/
+uint64_t
+foo_65 (uint64_t value)
+{
+  return lsll (value, 65);
+}
+
+  /* Out of range negative shift amount (<= -64), result is 0, because lsll
+     uses an unsigned input.
+
+     movs r0, #0
+     movs r1, #0  */
+/*
+**foo_m65:
+**     ...
+**     movs    (ip|fp|r[0-9]+), #0(?:  @.*|)
+**     movs    (ip|fp|r[0-9]+), #0(?:  @.*|)
+**     ...
+*/
+uint64_t
+foo_m65 (uint64_t value)
+{
+  return lsll (value, -65);
+}
+
+  /* shift amount == 0, use a mov, which is optimized out.  */
+/*
+**foo_0:
+**     bx      lr
+**     ...
+*/
+uint64_t
+foo_0 (uint64_t value)
+{
+  return lsll (value, 0);
+}
+
+  /* Unknown shift amount, use the register variant.
+
+   lsll r0, r1, r2  */
+/*
+**foo_var:
+**     ...
+**     lsll    (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+), (?:ip|fp|r[0-9]+)(?:      
@.*|)
+**     ...
+*/
+uint64_t
+foo_var (uint64_t value, int32_t amount)
+{
+  return lsll (value, amount);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+/* { dg-final { scan-assembler-not "__ARM_undef" } } */
-- 
2.34.1

Reply via email to