With the attachment this time :-)

Regards
VP

On Tue, Aug 20, 2013 at 04:01:59PM +0100, Vidya Praveen wrote:
> Hello,
> 
> This patch supports SISD shift instructions SHL/USHR/SSHR/SSHL/USHL for
> SImode and DImode. This patch also refactors the integer shifts pattern
> "<optab><mode>3_insn". Pattern for rotate is moved out as ror<mode>3_insn.
> 
> Shift patterns (aarch64_{lshr|ashl|ashr}_sisd_or_int_{si|di}3) support
> both SIMD registers and general purpose registers with the shift quantity
> either as variable or literal. Since there are no SISD instructions for
> right shifts, the instruction SSHL and USHL are used with shift operand
> negated using NEG in order reverse the direction. This is done by
> insisting on splitting (after reload) in to neg and UNSPEC_SISD_USHL or
> UNSPEC_SISD_SSHL or UNSPEC_USHL_S2 or UNSPEC_SSHL_S2 pattern. Since there
> are no SISD variants of shift instructions available for SImode, the SIMD
> variants of corresponsing instructions are used with 2S size by taking
> one lane alone in to cosideration and ignoring other.
> 
> This patch also introduces a predicate aarch64_simd_register to help in
> splitting patterns. Tests for both newly introduced instructions as well
> as for the integer instructions are included.
> 
> Tested and no new regressions.
> 
> OK for trunk?
> 
> Regards
> VP
> 
> ---
> 
> gcc/ChangeLog
> 
> 2013-08-20  Vidya Praveen  <vidyaprav...@arm.com>
> 
>         * config/aarch64/aarch64.md (unspec): Add UNSPEC_SISD_SSHL,
>         UNSPEC_SISD_USHL, UNSPEC_USHL_2S, UNSPEC_SSHL_2S, UNSPEC_SISD_NEG.
>         (<optab><mode>3_insn): Remove.
>         (aarch64_ashl_sisd_or_int_<mode>3): New Pattern.
>         (aarch64_lshr_sisd_or_int_<mode>3): Likewise.
>         (aarch64_ashr_sisd_or_int_<mode>3): Likewise.
>         (define_split for aarch64_lshr_sisd_or_int_di3): Likewise.
>         (define_split for aarch64_lshr_sisd_or_int_si3): Likewise.
>         (define_split for aarch64_ashr_sisd_or_int_di3): Likewise.
>         (define_split for aarch64_ashr_sisd_or_int_si3): Likewise.
>         (aarch64_sisd_ushl, aarch64_sisd_sshl): Likewise.
>         (aarch64_ushl_2s, aarch64_sshl_2s, aarch64_sisd_neg_qi): Likewise.
>         (ror<mode>3_insn): Likewise.
>         * config/aarch64/predicates.md (aarch64_simd_register): New.
> 
> gcc/testsuite/ChangeLog
> 
> 2013-08-20  Vidya Praveen  <vidyaprav...@arm.com>
> 
>         * gcc.target/aarch64/scalar_shift_1.c: New.
> 
> 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 5312a79..07349c6 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -88,11 +88,16 @@
     UNSPEC_NOP
     UNSPEC_PRLG_STK
     UNSPEC_RBIT
+    UNSPEC_SISD_NEG
+    UNSPEC_SISD_SSHL
+    UNSPEC_SISD_USHL
+    UNSPEC_SSHL_2S
     UNSPEC_ST2
     UNSPEC_ST3
     UNSPEC_ST4
     UNSPEC_TLS
     UNSPEC_TLSDESC
+    UNSPEC_USHL_2S
     UNSPEC_VSTRUCTDUMMY
 ])
 
@@ -3183,13 +3188,182 @@
   }
 )
 
-(define_insn "*<optab><mode>3_insn"
+;; Logical left shift using SISD or Integer instruction
+(define_insn "*aarch64_ashl_sisd_or_int_<mode>3"
+  [(set (match_operand:GPI 0 "register_operand" "=w,w,r")
+        (ashift:GPI
+          (match_operand:GPI 1 "register_operand" "w,w,r")
+          (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "Us<cmode>,w,rUs<cmode>")))]
+  ""
+  "@
+   shl\t%<rtn>0<vas>, %<rtn>1<vas>, %2
+   ushl\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas>
+   lsl\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "simd" "yes,yes,no")
+   (set_attr "simd_type" "simd_shift_imm,simd_shift,*")
+   (set_attr "simd_mode" "<MODE>,<MODE>,*")
+   (set_attr "v8type" "*,*,shift")
+   (set_attr "type" "*,*,shift")
+   (set_attr "mode" "*,*,<MODE>")]
+)
+
+;; Logical right shift using SISD or Integer instruction
+(define_insn "*aarch64_lshr_sisd_or_int_<mode>3"
+  [(set (match_operand:GPI 0 "register_operand" "=w,w,r")
+        (lshiftrt:GPI
+          (match_operand:GPI 1 "register_operand" "w,w,r")
+          (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "Us<cmode>,w,rUs<cmode>")))]
+  ""
+  "@
+   ushr\t%<rtn>0<vas>, %<rtn>1<vas>, %2
+   #
+   lsr\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "simd" "yes,yes,no")
+   (set_attr "simd_type" "simd_shift_imm,simd_shift,*")
+   (set_attr "simd_mode" "<MODE>,<MODE>,*")
+   (set_attr "v8type" "*,*,shift")
+   (set_attr "type" "*,*,shift")
+   (set_attr "mode" "*,*,<MODE>")]
+)
+
+(define_split
+  [(set (match_operand:DI 0 "aarch64_simd_register")
+        (lshiftrt:DI
+           (match_operand:DI 1 "aarch64_simd_register")
+           (match_operand:QI 2 "aarch64_simd_register")))]
+  "TARGET_SIMD && reload_completed"
+  [(set (match_dup 2)
+        (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG))
+   (set (match_dup 0)
+        (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_SISD_USHL))]
+  ""
+)
+
+(define_split
+  [(set (match_operand:SI 0 "aarch64_simd_register")
+        (lshiftrt:SI
+           (match_operand:SI 1 "aarch64_simd_register")
+           (match_operand:QI 2 "aarch64_simd_register")))]
+  "TARGET_SIMD && reload_completed"
+  [(set (match_dup 2)
+        (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG))
+   (set (match_dup 0)
+        (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_USHL_2S))]
+  ""
+)
+
+;; Arithmetic right shift using SISD or Integer instruction
+(define_insn "*aarch64_ashr_sisd_or_int_<mode>3"
+  [(set (match_operand:GPI 0 "register_operand" "=w,w,r")
+        (ashiftrt:GPI
+          (match_operand:GPI 1 "register_operand" "w,w,r")
+          (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us<cmode>,w,rUs<cmode>")))]
+  ""
+  "@
+   sshr\t%<rtn>0<vas>, %<rtn>1<vas>, %2
+   #
+   asr\t%<w>0, %<w>1, %<w>2"
+  [(set_attr "simd" "yes,yes,no")
+   (set_attr "simd_type" "simd_shift_imm,simd_shift,*")
+   (set_attr "simd_mode" "<MODE>,<MODE>,*")
+   (set_attr "v8type" "*,*,shift")
+   (set_attr "type" "*,*,shift")
+   (set_attr "mode" "*,*,<MODE>")]
+)
+
+(define_split
+  [(set (match_operand:DI 0 "aarch64_simd_register")
+        (ashiftrt:DI
+           (match_operand:DI 1 "aarch64_simd_register")
+           (match_operand:QI 2 "aarch64_simd_register")))]
+  "TARGET_SIMD && reload_completed"
+  [(set (match_dup 2)
+        (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG))
+   (set (match_dup 0)
+        (unspec:DI [(match_dup 1) (match_dup 2)] UNSPEC_SISD_SSHL))]
+  ""
+)
+
+(define_split
+  [(set (match_operand:SI 0 "aarch64_simd_register")
+        (ashiftrt:SI
+           (match_operand:SI 1 "aarch64_simd_register")
+           (match_operand:QI 2 "aarch64_simd_register")))]
+  "TARGET_SIMD && reload_completed"
+  [(set (match_dup 2)
+        (unspec:QI [(match_dup 2)] UNSPEC_SISD_NEG))
+   (set (match_dup 0)
+        (unspec:SI [(match_dup 1) (match_dup 2)] UNSPEC_SSHL_2S))]
+  ""
+)
+
+(define_insn "*aarch64_sisd_ushl"
+  [(set (match_operand:DI 0 "register_operand" "=w")
+        (unspec:DI [(match_operand:DI 1 "register_operand" "w")
+                    (match_operand:QI 2 "register_operand" "w")]
+                   UNSPEC_SISD_USHL))]
+  "TARGET_SIMD"
+  "ushl\t%d0, %d1, %d2"
+  [(set_attr "simd" "yes")
+   (set_attr "simd_type" "simd_shift")
+   (set_attr "simd_mode" "DI")]
+)
+
+(define_insn "*aarch64_ushl_2s"
+  [(set (match_operand:SI 0 "register_operand" "=w")
+        (unspec:SI [(match_operand:SI 1 "register_operand" "w")
+                    (match_operand:QI 2 "register_operand" "w")]
+                   UNSPEC_USHL_2S))]
+  "TARGET_SIMD"
+  "ushl\t%0.2s, %1.2s, %2.2s"
+  [(set_attr "simd" "yes")
+   (set_attr "simd_type" "simd_shift")
+   (set_attr "simd_mode" "DI")]
+)
+
+(define_insn "*aarch64_sisd_sshl"
+  [(set (match_operand:DI 0 "register_operand" "=w")
+        (unspec:DI [(match_operand:DI 1 "register_operand" "w")
+                    (match_operand:QI 2 "register_operand" "w")]
+                   UNSPEC_SISD_SSHL))]
+  "TARGET_SIMD"
+  "sshl\t%d0, %d1, %d2"
+  [(set_attr "simd" "yes")
+   (set_attr "simd_type" "simd_shift")
+   (set_attr "simd_mode" "DI")]
+)
+
+(define_insn "*aarch64_sshl_2s"
+  [(set (match_operand:SI 0 "register_operand" "=w")
+        (unspec:SI [(match_operand:SI 1 "register_operand" "w")
+                    (match_operand:QI 2 "register_operand" "w")]
+                   UNSPEC_SSHL_2S))]
+  "TARGET_SIMD"
+  "sshl\t%0.2s, %1.2s, %2.2s"
+  [(set_attr "simd" "yes")
+   (set_attr "simd_type" "simd_shift")
+   (set_attr "simd_mode" "DI")]
+)
+
+(define_insn "*aarch64_sisd_neg_qi"
+  [(set (match_operand:QI 0 "register_operand" "=w")
+        (unspec:QI [(match_operand:QI 1 "register_operand" "w")]
+                   UNSPEC_SISD_NEG))]
+  "TARGET_SIMD"
+  "neg\t%d0, %d1"
+  [(set_attr "simd" "yes")
+   (set_attr "simd_type" "simd_negabs")
+   (set_attr "simd_mode" "QI")]
+)
+
+;; Rotate right
+(define_insn "*ror<mode>3_insn"
   [(set (match_operand:GPI 0 "register_operand" "=r")
-	(SHIFT:GPI
-	 (match_operand:GPI 1 "register_operand" "r")
-	 (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "rUs<cmode>")))]
+        (rotatert:GPI
+          (match_operand:GPI 1 "register_operand" "r")
+          (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "rUs<cmode>")))]
   ""
-  "<shift>\\t%<w>0, %<w>1, %<w>2"
+  "ror\\t%<w>0, %<w>1, %<w>2"
   [(set_attr "v8type" "shift")
    (set_attr "type" "shift")
    (set_attr "mode" "<MODE>")]
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 3e2b6b3..dbc9082 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -26,6 +26,11 @@
 			      && GET_MODE_CLASS (GET_MODE (op)) == MODE_CC"))))
 )
 
+(define_predicate "aarch64_simd_register"
+  (and (match_code "reg")
+       (ior (match_test "REGNO_REG_CLASS (REGNO (op)) == FP_LO_REGS")
+            (match_test "REGNO_REG_CLASS (REGNO (op)) == FP_REGS"))))
+
 (define_predicate "aarch64_reg_or_zero"
   (and (match_code "reg,subreg,const_int")
        (ior (match_operand 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c b/gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c
new file mode 100644
index 0000000..7cb17f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/scalar_shift_1.c
@@ -0,0 +1,263 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-inline -save-temps" } */
+
+extern void abort ();
+
+#define force_simd_di(v) asm volatile ("mov %d0, %1.d[0]" :"=w" (v) :"w" (v) :)
+#define force_simd_si(v) asm volatile ("mov %s0, %1.s[0]" :"=w" (v) :"w" (v) :)
+
+typedef unsigned long long int UInt64x1;
+typedef long long int Int64x1;
+typedef unsigned int UInt32x1;
+typedef int Int32x1;
+
+UInt64x1
+test_lshift_left_sisd_di (UInt64x1 b, UInt64x1 c)
+{
+  UInt64x1 a;
+
+  force_simd_di (b);
+  force_simd_di (c);
+  a = b << 8;
+  a = a << c;
+  force_simd_di (a);
+  return a;
+}
+/* { dg-final { scan-assembler "shl\td\[0-9\]+,\ d\[0-9\]+,\ 8" } } */
+/* { dg-final { scan-assembler "ushl\td\[0-9\]+,\ d\[0-9\]+,\ d\[0-9\]+" } } */
+
+UInt32x1
+test_lshift_left_sisd_si (UInt32x1 b, UInt32x1 c)
+{
+  UInt32x1 a;
+
+  force_simd_si (b);
+  force_simd_si (c);
+  a = b << 4;
+  a = a << c;
+  force_simd_si (a);
+  return a;
+}
+/* { dg-final { scan-assembler "shl\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ 4" } } */
+/* "ushl\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" (counted later) */
+
+UInt64x1
+test_lshift_right_sisd_di (UInt64x1 b, UInt64x1 c)
+{
+  UInt64x1 a;
+
+  force_simd_di (b);
+  force_simd_di (c);
+  a = b >> 8;
+  a = a >> c;
+  force_simd_di (a);
+  return a;
+}
+/* { dg-final { scan-assembler "ushr\td\[0-9\]+,\ d\[0-9\]+,\ 8" } } */
+/* "neg\td\[0-9\]+,\ d\[0-9\]+" (counted later) */
+/* { dg-final { scan-assembler "ushl\td\[0-9\]+,\ d\[0-9\]+,\ d\[0-9\]+" } } */
+
+UInt64x1
+test_lshift_right_sisd_si (UInt32x1 b, UInt32x1 c)
+{
+  UInt32x1 a;
+
+  force_simd_si (b);
+  force_simd_si (c);
+  a = b >> 4;
+  a = a >> c;
+  force_simd_si (a);
+  return a;
+}
+/* { dg-final { scan-assembler "ushr\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ 4" } } */
+/* "neg\td\[0-9\]+,\ d\[0-9\]+" (counted later) */
+/* { dg-final { scan-assembler-times "ushl\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" 2 } } */
+
+Int64x1
+test_ashift_right_sisd_di (Int64x1 b, Int64x1 c)
+{
+  Int64x1 a;
+
+  force_simd_di (b);
+  force_simd_di (c);
+  a = b >> 8;
+  a = a >> c;
+  force_simd_di (a);
+  return a;
+}
+/* { dg-final { scan-assembler "sshr\td\[0-9\]+,\ d\[0-9\]+,\ 8" } } */
+/* "neg\td\[0-9\]+,\ d\[0-9\]+" (counted later) */
+/* { dg-final { scan-assembler "sshl\td\[0-9\]+,\ d\[0-9\]+,\ d\[0-9\]+" } } */
+
+Int32x1
+test_ashift_right_sisd_si (Int32x1 b, Int32x1 c)
+{
+  Int32x1 a;
+
+  force_simd_si (b);
+  force_simd_si (c);
+  a = b >> 4;
+  a = a >> c;
+  force_simd_si (a);
+  return a;
+}
+/* { dg-final { scan-assembler "sshr\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ 4" } } */
+/* { dg-final { scan-assembler-times "neg\td\[0-9\]+,\ d\[0-9\]+" 4 } } */
+/* { dg-final { scan-assembler "sshl\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" } } */
+
+
+/* The following are to make sure if the integer instructions lsl/lsr/asr are
+   generated in non-vector scenarios */
+
+UInt64x1
+test_lshift_left_int_di (UInt64x1 b, UInt64x1 c)
+{
+  UInt64x1 a;
+
+  a = b << 8;
+  a = a << c;
+  return a;
+}
+/* { dg-final { scan-assembler "lsl\tx\[0-9\]+,\ x\[0-9\]+,\ 8" } } */
+/* { dg-final { scan-assembler "lsl\tx\[0-9\]+,\ x\[0-9\]+,\ x\[0-9\]+" } } */
+
+UInt32x1
+test_lshift_left_int_si (UInt32x1 b, UInt32x1 c)
+{
+  UInt32x1 a;
+
+  a = b << 4;
+  a = a << c;
+  return a;
+}
+/* { dg-final { scan-assembler "lsl\tw\[0-9\]+,\ w\[0-9\]+,\ 4" } } */
+/* { dg-final { scan-assembler "lsl\tw\[0-9\]+,\ w\[0-9\]+,\ w\[0-9\]+" } } */
+
+UInt64x1
+test_lshift_right_int_di (UInt64x1 b, UInt64x1 c)
+{
+  UInt64x1 a;
+
+  a = b >> 8;
+  a = a >> c;
+  return a;
+}
+/* { dg-final { scan-assembler "lsr\tx\[0-9\]+,\ x\[0-9\]+,\ 8" } } */
+/* { dg-final { scan-assembler "lsr\tx\[0-9\]+,\ x\[0-9\]+,\ x\[0-9\]+" } } */
+
+UInt32x1
+test_lshift_right_int_si (UInt32x1 b, UInt32x1 c)
+{
+  UInt32x1 a;
+
+  a = b >> 4;
+  a = a >> c;
+  return a;
+}
+/* { dg-final { scan-assembler "lsr\tw\[0-9\]+,\ w\[0-9\]+,\ 4" } } */
+/* { dg-final { scan-assembler "lsr\tw\[0-9\]+,\ w\[0-9\]+,\ w\[0-9\]+" } } */
+
+Int64x1
+test_ashift_right_int_di (Int64x1 b, Int64x1 c)
+{
+  Int64x1 a;
+
+  a = b >> 8;
+  a = a >> c;
+  return a;
+}
+/* { dg-final { scan-assembler "asr\tx\[0-9\]+,\ x\[0-9\]+,\ 8" } } */
+/* { dg-final { scan-assembler "asr\tx\[0-9\]+,\ x\[0-9\]+,\ x\[0-9\]+" } } */
+
+Int32x1
+test_ashift_right_int_si (Int32x1 b, Int32x1 c)
+{
+  Int32x1 a;
+
+  a = b >> 4;
+  a = a >> c;
+  return a;
+}
+/* { dg-final { scan-assembler "asr\tw\[0-9\]+,\ w\[0-9\]+,\ 4" } } */
+/* { dg-final { scan-assembler "asr\tw\[0-9\]+,\ w\[0-9\]+,\ w\[0-9\]+" } } */
+
+Int64x1
+test_corners_sisd_di (Int64x1 b)
+{
+  force_simd_di (b);
+  b = b >> 63;
+  b = b >> 0;
+  b += b >> 65; /* { dg-warning "right shift count >= width of type" } */
+  force_simd_di (b);
+
+  return b;
+}
+/* { dg-final { scan-assembler "sshr\td\[0-9\]+,\ d\[0-9\]+,\ 63" } } */
+/* { dg-final { scan-assembler "shl\td\[0-9\]+,\ d\[0-9\]+,\ 1" } } */
+
+Int32x1
+test_corners_sisd_si (Int32x1 b)
+{
+  force_simd_si (b);
+  b = b >> 31;
+  b = b >> 0;
+  b += b >> 33; /* { dg-warning "right shift count >= width of type" } */
+  force_simd_si (b);
+
+  return b;
+}
+/* { dg-final { scan-assembler "sshr\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ 31" } } */
+/* { dg-final { scan-assembler "shl\tv\[0-9\]+\.2s,\ v\[0-9\]+\.2s,\ 1" } } */
+
+
+
+#define CHECK(var,val) \
+do                     \
+  {                    \
+    if (var != val)    \
+      abort();         \
+  }                    \
+while(0)
+
+UInt64x1 x = 0xC01dDeadBeefFaceull;
+UInt32x1 y = 0xDeadBeef;
+
+int
+main ()
+{
+  x = test_lshift_left_sisd_di (x, 8);
+  CHECK (x, 0xdeadbeefface0000ull);
+  x = test_lshift_right_int_di (x, 8);
+  CHECK (x, 0x0000deadbeeffaceull);
+  x = test_lshift_right_sisd_di (x, 8);
+  CHECK (x, 0x00000000deadbeefull);
+  x = test_lshift_left_int_di (x, 8);
+  CHECK (x, 0x0000deadbeef0000ull);
+  x = ~x;
+  x = test_ashift_right_int_di (x, 8);
+  CHECK (x, 0xffffffff21524110ull);
+  x = test_ashift_right_sisd_di (x, 8);
+  CHECK (x, 0xffffffffffff2152ull);
+  x = test_corners_sisd_di (x);
+  CHECK (x, 0xfffffffffffffffeull);
+
+  y = test_lshift_left_sisd_si (y, 4);
+  CHECK (y, 0xadbeef00);
+  y = test_lshift_right_int_si (y, 4);
+  CHECK (y, 0x00adbeef);
+  y = test_lshift_right_sisd_si (y, 4);
+  CHECK (y, 0x0000adbe);
+  y = test_lshift_left_int_si (y, 4);
+  CHECK (y, 0x00adbe00);
+  y = ~y;
+  y = test_ashift_right_int_si (y, 4);
+  CHECK (y, 0xffff5241);
+  y = test_ashift_right_sisd_si (y, 4);
+  CHECK (y, 0xffffff52);
+  y = test_corners_sisd_si (y);
+  CHECK (y, 0xfffffffe);
+
+  return 0;
+}
+
+/* { dg-final { cleanup-saved-temps } } */

Reply via email to