Hi Marcus,
Thanks for reviewing the patch and your comments.
>> Please drop the trailing white space, here and on the following
>> patterns.
The trailing white spaces are observed only in the patch. When the
patch is applied on sources, there are no trailing white spaces.
>> GNU style, spaces around '=' and '<'.
Sorry for missing the indentation part. Spaces have been added
wherever needed.
Please review the modified patch and let me know if there should be
any further modifications?
Thanks,
Naveen
--- gcc/config/aarch64/aarch64-simd.md 2013-02-20 13:24:21.608042549 +0530
+++ gcc/config/aarch64/aarch64-simd.md 2013-02-27 10:26:47.367204006 +0530
@@ -44,6 +44,7 @@
; simd_dup duplicate element.
; simd_dupgp duplicate general purpose register.
; simd_ext bitwise extract from pair.
+; simd_fabd floating absolute difference and accumulate.
; simd_fadd floating point add/sub.
; simd_fcmp floating point compare.
; simd_fcvti floating point convert to integer.
@@ -147,6 +148,7 @@
simd_dup,\
simd_dupgp,\
simd_ext,\
+ simd_fabd,\
simd_fadd,\
simd_fcmp,\
simd_fcvti,\
@@ -520,6 +522,40 @@
(set_attr "simd_mode" "<MODE>")]
)
+(define_insn "abd<mode>_3"
+ [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
+ (abs:VDQ_BHSI (minus:VDQ_BHSI
+ (match_operand:VDQ_BHSI 1 "register_operand" "w")
+ (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
+ "TARGET_SIMD"
+ "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+ [(set_attr "simd_type" "simd_abd")
+ (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "aba<mode>_3"
+ [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
+ (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
+ (match_operand:VDQ_BHSI 1 "register_operand" "w")
+ (match_operand:VDQ_BHSI 2 "register_operand" "w")))
+ (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
+ "TARGET_SIMD"
+ "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+ [(set_attr "simd_type" "simd_abd")
+ (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "fabd<mode>_3"
+ [(set (match_operand:VDQF 0 "register_operand" "=w")
+ (abs:VDQF (minus:VDQF
+ (match_operand:VDQF 1 "register_operand" "w")
+ (match_operand:VDQF 2 "register_operand" "w"))))]
+ "TARGET_SIMD"
+ "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+ [(set_attr "simd_type" "simd_fabd")
+ (set_attr "simd_mode" "<MODE>")]
+)
+
(define_insn "and<mode>3"
[(set (match_operand:VDQ 0 "register_operand" "=w")
(and:VDQ (match_operand:VDQ 1 "register_operand" "w")
--- gcc/testsuite/gcc.target/aarch64/vect.c 2013-02-20 13:24:22.140042557 +0530
+++ gcc/testsuite/gcc.target/aarch64/vect.c 2013-02-27 10:21:27.991945664 +0530
@@ -55,6 +55,8 @@ int main (void)
int smin_vector[] = {0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15};
unsigned int umax_vector[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
unsigned int umin_vector[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+ int sabd_vector[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ int saba_vector[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
int reduce_smax_value = 0;
int reduce_smin_value = -15;
unsigned int reduce_umax_value = 15;
@@ -81,6 +83,8 @@ int main (void)
TEST (smin, s);
TEST (umax, u);
TEST (umin, u);
+ TEST (sabd, s);
+ TEST (saba, s);
TESTV (reduce_smax, s);
TESTV (reduce_smin, s);
TESTV (reduce_umax, u);
--- gcc/testsuite/gcc.target/aarch64/vect-compile.c 2013-02-20 13:24:22.140042557 +0530
+++ gcc/testsuite/gcc.target/aarch64/vect-compile.c 2013-02-27 10:21:27.991945664 +0530
@@ -16,5 +16,7 @@
/* { dg-final { scan-assembler "uminv" } } */
/* { dg-final { scan-assembler "smaxv" } } */
/* { dg-final { scan-assembler "sminv" } } */
+/* { dg-final { scan-assembler "sabd" } } */
+/* { dg-final { scan-assembler "saba" } } */
/* { dg-final { scan-assembler-times "addv" 2} } */
/* { dg-final { scan-assembler-times "addp" 2} } */
--- gcc/testsuite/gcc.target/aarch64/vect-fp.c 2013-02-20 13:24:22.140042557 +0530
+++ gcc/testsuite/gcc.target/aarch64/vect-fp.c 2013-02-27 10:21:27.995943664 +0530
@@ -117,6 +117,16 @@ int main (void)
9.0, 10.0, 11.0, 12.0,
13.0, 14.0, 15.0, 16.0 };
+ F32 fabd_F32_vector[] = { 1.0f, 1.0f, 1.0f, 1.0f,
+ 1.0f, 1.0f, 1.0f, 1.0f,
+ 1.0f, 1.0f, 1.0f, 1.0f,
+ 1.0f, 1.0f, 1.0f, 1.0f };
+
+ F64 fabd_F64_vector[] = { 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, 1.0, 1.0,
+ 1.0, 1.0, 1.0, 1.0 };
+
/* Setup input vectors. */
for (i=1; i<=16; i++)
{
@@ -132,6 +142,7 @@ int main (void)
TEST (div, 3);
TEST (neg, 2);
TEST (abs, 2);
+ TEST (fabd, 3);
return 0;
}
--- gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c 2013-02-20 13:24:22.140042557 +0530
+++ gcc/testsuite/gcc.target/aarch64/vect-fp-compile.c 2013-02-27 10:21:27.995943664 +0530
@@ -11,3 +11,4 @@
/* { dg-final { scan-assembler "fdiv\\tv" } } */
/* { dg-final { scan-assembler "fneg\\tv" } } */
/* { dg-final { scan-assembler "fabs\\tv" } } */
+/* { dg-final { scan-assembler "fabd\\tv" } } */
--- gcc/testsuite/gcc.target/aarch64/vect-fp.x 2013-02-20 13:24:22.140042557 +0530
+++ gcc/testsuite/gcc.target/aarch64/vect-fp.x 2013-02-27 10:25:58.231203919 +0530
@@ -7,13 +7,23 @@ typedef double *__restrict__ pRF64;
extern float fabsf (float);
extern double fabs (double);
+#define DEF3a(fname, type, op) \
+ void fname##_##type (pR##type a, \
+ pR##type b, \
+ pR##type c) \
+ { \
+ int i; \
+ for (i = 0; i < 16; i++) \
+ a[i] = op (b[i] - c[i]); \
+ }
+
#define DEF3(fname, type, op) \
void fname##_##type (pR##type a, \
pR##type b, \
pR##type c) \
{ \
int i; \
- for (i=0; i<16; i++) \
+ for (i = 0; i < 16; i++) \
a[i] = b[i] op c[i]; \
}
@@ -22,11 +32,15 @@ extern double fabs (double);
pR##type b) \
{ \
int i; \
- for (i=0; i<16; i++) \
+ for (i = 0; i < 16; i++) \
a[i] = op(b[i]); \
}
+#define DEFN3a(fname, op) \
+ DEF3a (fname, F32, op) \
+ DEF3a (fname, F64, op)
+
#define DEFN3(fname, op) \
DEF3 (fname, F32, op) \
DEF3 (fname, F64, op)
@@ -42,3 +56,5 @@ DEFN3 (div, /)
DEFN2 (neg, -)
DEF2 (abs, F32, fabsf)
DEF2 (abs, F64, fabs)
+DEF3a (fabd, F32, fabsf)
+DEF3a (fabd, F64, fabs)
--- gcc/testsuite/gcc.target/aarch64/vect.x 2013-02-20 13:24:22.140042557 +0530
+++ gcc/testsuite/gcc.target/aarch64/vect.x 2013-02-27 10:25:49.451203905 +0530
@@ -138,3 +138,18 @@ long long reduce_add_s64 (pRINT64 a)
return s;
}
+
+void sabd (pRINT a, pRINT b, pRINT c)
+{
+ int i;
+ for (i = 0; i < 16; i++)
+ c[i] = abs (a[i] - b[i]);
+}
+
+void saba (pRINT a, pRINT b, pRINT c)
+{
+ int i;
+ for (i = 0; i < 16; i++)
+ c[i] += abs (a[i] - b[i]);
+}
+