Hello,
This patch adds support to generate SADDL/SSUBL/UADDL/USUBL. Part of the support
is available already (supported for intrinsics). This patch extends this support
to generate these instructions (and lane variations) in all scenarios and adds a
testcase. Tested for aarch64-none-elf, aarch64_be-none-elf with no regressions.
OK for trunk?
Cheers
VP
~~~
gcc/ChangeLog:
2013-09-30 Vidya Praveen
* aarch64-simd.md
(aarch64_l2_internal): Rename to ...
(aarch64_l_hi_internal): ... this;
Insert '\t' to output template.
(aarch64_l_lo_internal): New.
(aarch64_saddl2, aarch64_uaddl2): Modify to call
gen_aarch64_l_hi_internal() instead.
(aarch64_ssubl2, aarch64_usubl2): Ditto.
gcc/testsuite/ChangeLog:
2013-09-30 Vidya Praveen
* gcc.target/aarch64/vect_saddl_1.c: New.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index f13cd5b..a0259b8 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2586,7 +2586,7 @@
;; l.
-(define_insn "aarch64_l2_internal"
+(define_insn "aarch64_l_hi_internal"
[(set (match_operand: 0 "register_operand" "=w")
(ADDSUB: (ANY_EXTEND: (vec_select:
(match_operand:VQW 1 "register_operand" "w")
@@ -2595,11 +2595,26 @@
(match_operand:VQW 2 "register_operand" "w")
(match_dup 3)]
"TARGET_SIMD"
- "l2 %0., %1., %2."
+ "l2\t%0., %1., %2."
[(set_attr "simd_type" "simd_addl")
(set_attr "simd_mode" "")]
)
+(define_insn "aarch64_l_lo_internal"
+ [(set (match_operand: 0 "register_operand" "=w")
+ (ADDSUB: (ANY_EXTEND: (vec_select:
+ (match_operand:VQW 1 "register_operand" "w")
+ (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
+ (ANY_EXTEND: (vec_select:
+ (match_operand:VQW 2 "register_operand" "w")
+ (match_dup 3)]
+ "TARGET_SIMD"
+ "l\t%0., %1., %2."
+ [(set_attr "simd_type" "simd_addl")
+ (set_attr "simd_mode" "")]
+)
+
+
(define_expand "aarch64_saddl2"
[(match_operand: 0 "register_operand" "=w")
(match_operand:VQW 1 "register_operand" "w")
@@ -2607,8 +2622,8 @@
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (mode, true);
- emit_insn (gen_aarch64_saddl2_internal (operands[0], operands[1],
- operands[2], p));
+ emit_insn (gen_aarch64_saddl_hi_internal (operands[0], operands[1],
+ operands[2], p));
DONE;
})
@@ -2619,8 +2634,8 @@
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (mode, true);
- emit_insn (gen_aarch64_uaddl2_internal (operands[0], operands[1],
- operands[2], p));
+ emit_insn (gen_aarch64_uaddl_hi_internal (operands[0], operands[1],
+ operands[2], p));
DONE;
})
@@ -2631,7 +2646,7 @@
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (mode, true);
- emit_insn (gen_aarch64_ssubl2_internal (operands[0], operands[1],
+ emit_insn (gen_aarch64_ssubl_hi_internal (operands[0], operands[1],
operands[2], p));
DONE;
})
@@ -2643,7 +2658,7 @@
"TARGET_SIMD"
{
rtx p = aarch64_simd_vect_par_cnst_half (mode, true);
- emit_insn (gen_aarch64_usubl2_internal (operands[0], operands[1],
+ emit_insn (gen_aarch64_usubl_hi_internal (operands[0], operands[1],
operands[2], p));
DONE;
})
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_saddl_1.c b/gcc/testsuite/gcc.target/aarch64/vect_saddl_1.c
new file mode 100644
index 000..ecbd8a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect_saddl_1.c
@@ -0,0 +1,315 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -fno-inline -save-temps -fno-vect-cost-model" } */
+
+typedef signed char S8_t;
+typedef signed short S16_t;
+typedef signed int S32_t;
+typedef signed long long S64_t;
+
+typedef signed char *__restrict__ pS8_t;
+typedef signed short *__restrict__ pS16_t;
+typedef signed int *__restrict__ pS32_t;
+typedef signed long long *__restrict__ pS64_t;
+
+typedef unsigned char U8_t;
+typedef unsigned short U16_t;
+typedef unsigned int U32_t;
+typedef unsigned long long U64_t;
+
+typedef unsigned char *__restrict__ pU8_t;
+typedef unsigned short *__restrict__ pU16_t;
+typedef unsigned int *__restrict__ pU32_t;
+typedef unsigned long long *__restrict__ pU64_t;
+
+extern void abort ();
+
+void
+test_addl_S64_S32_4 (pS64_t a, pS32_t b, pS32_t c)
+{
+ int i;
+ for (i = 0; i < 4; i++)
+a[i] = (S64_t) b[i] + (S64_t) c[i];
+}
+/* "saddl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" */
+/* "saddl2\tv\[0-9\]+\.2d,\ v\[0-9\]+\.4s,\ v\[0-9\]+\.4s" */
+
+/* a = -b + c => a = c - b */
+void
+test_addl_S64_S32_4_neg0 (pS64_t a, pS32_t b, pS32_t c)
+{
+ int i;
+ for (i = 0; i < 4; i++)
+a[i] = -(S64_t) b[i] + (S64_t) c[i];
+}
+/* "ssubl\tv\[0-9\]+\.2d,\ v\[0-9\]+\.2s,\ v\[0-9\]+\.2s" */
+/* "ssubl2\tv\[0-9\]+