Re: [Patch,AArch64] Support SADDL/SSUBL/UADDL/USUBL

2013-10-01 Thread Marcus Shawcroft
> 2013-09-30  Vidya Praveen  
>
> * aarch64-simd.md
> (aarch64_l2_internal): Rename to 
> ...
> (aarch64_l_hi_internal): ... this;
> Insert '\t' to output template.
> (aarch64_l_lo_internal): New.
> (aarch64_saddl2, aarch64_uaddl2): Modify to call
> gen_aarch64_l_hi_internal() 
> instead.
> (aarch64_ssubl2, aarch64_usubl2): Ditto.
>
> gcc/testsuite/ChangeLog:
>
> 2013-09-30  Vidya Praveen  
>
> * gcc.target/aarch64/vect_saddl_1.c: New.

OK /Marcus


[Patch,AArch64] Support SADDL/SSUBL/UADDL/USUBL

2013-09-30 Thread Vidya Praveen
Hello,

This patch adds support to generate SADDL/SSUBL/UADDL/USUBL. Part of the support
is available already (supported for intrinsics). This patch extends this support
to generate these instructions (and lane variations) in all scenarios and adds a
testcase. Tested for aarch64-none-elf, aarch64_be-none-elf with no regressions.

OK for trunk?

Cheers
VP

~~~

gcc/ChangeLog:

2013-09-30  Vidya Praveen  

* aarch64-simd.md 
(aarch64_l2_internal): Rename to ...
(aarch64_l_hi_internal): ... this;
Insert '\t' to output template.
(aarch64_l_lo_internal): New.
(aarch64_saddl2, aarch64_uaddl2): Modify to call 
gen_aarch64_l_hi_internal() instead.
(aarch64_ssubl2, aarch64_usubl2): Ditto.

gcc/testsuite/ChangeLog:

2013-09-30  Vidya Praveen  

* gcc.target/aarch64/vect_saddl_1.c: New.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index f13cd5b..a0259b8 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2586,7 +2586,7 @@
 
 ;; l.
 
-(define_insn "aarch64_l2_internal"
+(define_insn "aarch64_l_hi_internal"
  [(set (match_operand: 0 "register_operand" "=w")
(ADDSUB: (ANY_EXTEND: (vec_select:
 			   (match_operand:VQW 1 "register_operand" "w")
@@ -2595,11 +2595,26 @@
 			   (match_operand:VQW 2 "register_operand" "w")
 			   (match_dup 3)]
   "TARGET_SIMD"
-  "l2 %0., %1., %2."
+  "l2\t%0., %1., %2."
   [(set_attr "simd_type" "simd_addl")
(set_attr "simd_mode" "")]
 )
 
+(define_insn "aarch64_l_lo_internal"
+ [(set (match_operand: 0 "register_operand" "=w")
+   (ADDSUB: (ANY_EXTEND: (vec_select:
+   (match_operand:VQW 1 "register_operand" "w")
+   (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
+   (ANY_EXTEND: (vec_select:
+   (match_operand:VQW 2 "register_operand" "w")
+   (match_dup 3)]
+  "TARGET_SIMD"
+  "l\t%0., %1., %2."
+  [(set_attr "simd_type" "simd_addl")
+   (set_attr "simd_mode" "")]
+)
+
+
 (define_expand "aarch64_saddl2"
   [(match_operand: 0 "register_operand" "=w")
(match_operand:VQW 1 "register_operand" "w")
@@ -2607,8 +2622,8 @@
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (mode, true);
-  emit_insn (gen_aarch64_saddl2_internal (operands[0], operands[1],
-		operands[2], p));
+  emit_insn (gen_aarch64_saddl_hi_internal (operands[0], operands[1],
+  operands[2], p));
   DONE;
 })
 
@@ -2619,8 +2634,8 @@
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (mode, true);
-  emit_insn (gen_aarch64_uaddl2_internal (operands[0], operands[1],
-		operands[2], p));
+  emit_insn (gen_aarch64_uaddl_hi_internal (operands[0], operands[1],
+  operands[2], p));
   DONE;
 })
 
@@ -2631,7 +2646,7 @@
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (mode, true);
-  emit_insn (gen_aarch64_ssubl2_internal (operands[0], operands[1],
+  emit_insn (gen_aarch64_ssubl_hi_internal (operands[0], operands[1],
 		operands[2], p));
   DONE;
 })
@@ -2643,7 +2658,7 @@
   "TARGET_SIMD"
 {
   rtx p = aarch64_simd_vect_par_cnst_half (mode, true);
-  emit_insn (gen_aarch64_usubl2_internal (operands[0], operands[1],
+  emit_insn (gen_aarch64_usubl_hi_internal (operands[0], operands[1],
 		operands[2], p));
   DONE;
 })
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_saddl_1.c b/gcc/testsuite/gcc.target/aarch64/vect_saddl_1.c
new file mode 100644
index 000..ecbd8a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect_saddl_1.c
@@ -0,0 +1,315 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fno-inline -save-temps -fno-vect-cost-model" } */
+
+typedef signed char S8_t;
+typedef signed short S16_t;
+typedef signed int S32_t;
+typedef signed long long S64_t;
+
+typedef signed char *__restrict__ pS8_t;
+typedef signed short *__restrict__ pS16_t;
+typedef signed int *__restrict__ pS32_t;
+typedef signed long long *__restrict__ pS64_t;
+
+typedef unsigned char U8_t;
+typedef unsigned short U16_t;
+typedef unsigned int U32_t;
+typedef unsigned long long U64_t;
+
+typedef unsigned char *__restrict__ pU8_t;
+typedef unsigned short *__restrict__ pU16_t;
+typedef unsigned int *__restrict__ pU32_t;
+typedef unsigned long long *__restrict__ pU64_t;
+
+extern void abort ();
+
+void
+test_addl_S64_S32_4 (pS64_t a, pS32_t b, pS32_t c)
+{
+  int i;
+  for (i = 0; i < 4; i++)
+a[i] = (S64_t) b[i] + (S64_t) c[i];
+}
+/* "saddl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" */
+/* "saddl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" */
+
+/* a = -b + c => a = c - b */
+void
+test_addl_S64_S32_4_neg0 (pS64_t a, pS32_t b, pS32_t c)
+{
+  int i;
+  for (i = 0; i < 4; i++)
+a[i] = -(S64_t) b[i] + (S64_t) c[i];
+}
+/* "ssubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" */
+/* "ssubl2\tv\[0-9\]+