Re: [[ARM/AArch64][testsuite] 06/36] Add vmla and vmls tests.

2015-01-20 Thread Christophe Lyon
On 19 January 2015 at 14:35, Marcus Shawcroft
 wrote:
> On 13 January 2015 at 15:18, Christophe Lyon  
> wrote:
>>
>> * gcc.target/aarch64/advsimd-intrinsics/vmlX.inc: New file.
>> * gcc.target/aarch64/advsimd-intrinsics/vmla.c: New file.
>> * gcc.target/aarch64/advsimd-intrinsics/vmls.c: New file.
>
> OK with the the vmlx poly ops dropped /M

Thanks, here is what I have committed (I removed the 64 bits elements
vectors, in addition to the poly ones).

Christophe
Index: gcc/testsuite/ChangeLog
===
--- gcc/testsuite/ChangeLog	(revision 219916)
+++ gcc/testsuite/ChangeLog	(working copy)
@@ -1,5 +1,11 @@
 2015-01-20  Christophe Lyon  
 
+	* gcc.target/aarch64/advsimd-intrinsics/vmlX.inc: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vmla.c: New file.
+	* gcc.target/aarch64/advsimd-intrinsics/vmls.c: New file.
+
+2015-01-20  Christophe Lyon  
+
 	* gcc.target/aarch64/advsimd-intrinsics/vldX_dup.c: New file.
 
 2015-01-20  Jakub Jelinek  
Index: gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX.inc
===
--- gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX.inc	(revision 0)
+++ gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX.inc	(working copy)
@@ -0,0 +1,123 @@
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+#define DECL_VMLX(T, W, N)			\
+  DECL_VARIABLE(vector1, T, W, N);		\
+  DECL_VARIABLE(vector2, T, W, N);		\
+  DECL_VARIABLE(vector3, T, W, N);		\
+  DECL_VARIABLE(vector_res, T, W, N)
+
+  /* vector_res = vmla(vector, vector3, vector4),
+ then store the result.  */
+#define TEST_VMLX1(INSN, Q, T1, T2, W, N)\
+  VECT_VAR(vector_res, T1, W, N) =  \
+INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N),			\
+		  VECT_VAR(vector2, T1, W, N),			\
+		  VECT_VAR(vector3, T1, W, N));			\
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \
+		VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VMLX(INSN, Q, T1, T2, W, N)	\
+  TEST_VMLX1(INSN, Q, T1, T2, W, N)
+
+  DECL_VMLX(int, 8, 8);
+  DECL_VMLX(int, 16, 4);
+  DECL_VMLX(int, 32, 2);
+  DECL_VMLX(uint, 8, 8);
+  DECL_VMLX(uint, 16, 4);
+  DECL_VMLX(uint, 32, 2);
+  DECL_VMLX(float, 32, 2);
+  DECL_VMLX(int, 8, 16);
+  DECL_VMLX(int, 16, 8);
+  DECL_VMLX(int, 32, 4);
+  DECL_VMLX(uint, 8, 16);
+  DECL_VMLX(uint, 16, 8);
+  DECL_VMLX(uint, 32, 4);
+  DECL_VMLX(float, 32, 4);
+
+  clean_results ();
+
+  VLOAD(vector1, buffer, , int, s, 8, 8);
+  VLOAD(vector1, buffer, , int, s, 16, 4);
+  VLOAD(vector1, buffer, , int, s, 32, 2);
+  VLOAD(vector1, buffer, , uint, u, 8, 8);
+  VLOAD(vector1, buffer, , uint, u, 16, 4);
+  VLOAD(vector1, buffer, , uint, u, 32, 2);
+  VLOAD(vector1, buffer, , float, f, 32, 2);
+  VLOAD(vector1, buffer, q, int, s, 8, 16);
+  VLOAD(vector1, buffer, q, int, s, 16, 8);
+  VLOAD(vector1, buffer, q, int, s, 32, 4);
+  VLOAD(vector1, buffer, q, uint, u, 8, 16);
+  VLOAD(vector1, buffer, q, uint, u, 16, 8);
+  VLOAD(vector1, buffer, q, uint, u, 32, 4);
+  VLOAD(vector1, buffer, q, float, f, 32, 4);
+
+  VDUP(vector2, , int, s, 8, 8, 0x11);
+  VDUP(vector2, , int, s, 16, 4, 0x22);
+  VDUP(vector2, , int, s, 32, 2, 0x33);
+  VDUP(vector2, , uint, u, 8, 8, 0x44);
+  VDUP(vector2, , uint, u, 16, 4, 0x55);
+  VDUP(vector2, , uint, u, 32, 2, 0x66);
+  VDUP(vector2, , float, f, 32, 2, 33.1f);
+  VDUP(vector2, q, int, s, 8, 16, 0x77);
+  VDUP(vector2, q, int, s, 16, 8, 0x88);
+  VDUP(vector2, q, int, s, 32, 4, 0x99);
+  VDUP(vector2, q, uint, u, 8, 16, 0xAA);
+  VDUP(vector2, q, uint, u, 16, 8, 0xBB);
+  VDUP(vector2, q, uint, u, 32, 4, 0xCC);
+  VDUP(vector2, q, float, f, 32, 4, 99.2f);
+
+  VDUP(vector3, , int, s, 8, 8, 0xFF);
+  VDUP(vector3, , int, s, 16, 4, 0xEE);
+  VDUP(vector3, , int, s, 32, 2, 0xDD);
+  VDUP(vector3, , uint, u, 8, 8, 0xCC);
+  VDUP(vector3, , uint, u, 16, 4, 0xBB);
+  VDUP(vector3, , uint, u, 32, 2, 0xAA);
+  VDUP(vector3, , float, f, 32, 2, 10.23f);
+  VDUP(vector3, q, int, s, 8, 16, 0x99);
+  VDUP(vector3, q, int, s, 16, 8, 0x88);
+  VDUP(vector3, q, int, s, 32, 4, 0x77);
+  VDUP(vector3, q, uint, u, 8, 16, 0x66);
+  VDUP(vector3, q, uint, u, 16, 8, 0x55);
+  VDUP(vector3, q, uint, u, 32, 4, 0x44);
+  VDUP(vector3, q, float, f, 32, 4, 77.8f);
+
+  TEST_VMLX(INSN_NAME, , int, s, 8, 8);
+  TEST_VMLX(INSN_NAME, , int, s, 16, 4);
+  TEST_VMLX(INSN_NAME, , int, s, 32, 2);
+  TEST_VMLX(INSN_NAME, , uint, u, 8, 8);
+  TEST_VMLX(INSN_NAME, , uint, u, 16, 4);
+  TEST_VMLX(INSN_NAME, , uint, u, 32, 2);
+  TEST_VMLX(INSN_NAME, , float, f, 32, 2);
+  TEST_VMLX(INSN_NAME, q, int, s, 8, 16);
+  TEST_VMLX(INSN_NAME, q, int, s, 16, 8);
+  TEST_VMLX(INSN_NAME, q, int, s, 32, 4);
+  TEST_VMLX(INSN_NAME, q, uint, u, 8, 16);
+  TEST_VMLX(INSN_NAME, q, uint, u, 16, 8);
+  TEST_VMLX(INSN_NAME, q, uint, u, 32, 4);
+  TEST_VMLX(INSN_NAME, 

Re: [[ARM/AArch64][testsuite] 06/36] Add vmla and vmls tests.

2015-01-19 Thread Marcus Shawcroft
On 13 January 2015 at 15:18, Christophe Lyon  wrote:
>
> * gcc.target/aarch64/advsimd-intrinsics/vmlX.inc: New file.
> * gcc.target/aarch64/advsimd-intrinsics/vmla.c: New file.
> * gcc.target/aarch64/advsimd-intrinsics/vmls.c: New file.

OK with the the vmlx poly ops dropped /M


Re: [[ARM/AArch64][testsuite] 06/36] Add vmla and vmls tests.

2015-01-16 Thread Christophe Lyon
On 16 January 2015 at 16:35, Tejas Belagod  wrote:
>> +VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
>> +0x33, 0x33, 0x33, 0x33,
>> +0x33, 0x33, 0x33, 0x33,
>> +0x33, 0x33, 0x33, 0x33 };
>> +VECT_VAR_DECL(expected,poly,16,8) [] = { 0x, 0x, 0x, 0x,
>> +0x, 0x, 0x, 0x };
>
>
> No poly ops for vmlx.
>
Indeed, it's not used I shall remove it.

>> +VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x45f0ae15, 0x45f0b615,
>> +  0x45f0be15, 0x45f0c615 };
>> +
>
> These expected results are calculated using chained(as opposed to fused)
> float MACs, right?
>
IIRC, yes.

> Otherwise, LGTM.
>
> Tejas.
>


Re: [[ARM/AArch64][testsuite] 06/36] Add vmla and vmls tests.

2015-01-16 Thread Tejas Belagod

+VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
+0x33, 0x33, 0x33, 0x33,
+0x33, 0x33, 0x33, 0x33,
+0x33, 0x33, 0x33, 0x33 };
+VECT_VAR_DECL(expected,poly,16,8) [] = { 0x, 0x, 0x, 0x,
+0x, 0x, 0x, 0x };


No poly ops for vmlx.


+VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x45f0ae15, 0x45f0b615,
+  0x45f0be15, 0x45f0c615 };
+


These expected results are calculated using chained(as opposed to fused) 
float MACs, right?


Otherwise, LGTM.

Tejas.



[[ARM/AArch64][testsuite] 06/36] Add vmla and vmls tests.

2015-01-13 Thread Christophe Lyon

* gcc.target/aarch64/advsimd-intrinsics/vmlX.inc: New file.
* gcc.target/aarch64/advsimd-intrinsics/vmla.c: New file.
* gcc.target/aarch64/advsimd-intrinsics/vmls.c: New file.

diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX.inc 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX.inc
new file mode 100644
index 000..1c8f1be
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmlX.inc
@@ -0,0 +1,110 @@
+#define FNNAME1(NAME) exec_ ## NAME
+#define FNNAME(NAME) FNNAME1(NAME)
+
+void FNNAME (INSN_NAME) (void)
+{
+#define DECL_VMLX(T, W, N) \
+  DECL_VARIABLE(vector1, T, W, N); \
+  DECL_VARIABLE(vector2, T, W, N); \
+  DECL_VARIABLE(vector3, T, W, N); \
+  DECL_VARIABLE(vector_res, T, W, N)
+
+  /* vector_res = vmla(vector, vector3, vector4),
+ then store the result.  */
+#define TEST_VMLX1(INSN, Q, T1, T2, W, N)  \
+  VECT_VAR(vector_res, T1, W, N) =  \
+INSN##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \
+ VECT_VAR(vector2, T1, W, N),  \
+ VECT_VAR(vector3, T1, W, N)); \
+  vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), \
+   VECT_VAR(vector_res, T1, W, N))
+
+#define TEST_VMLX(INSN, Q, T1, T2, W, N)   \
+  TEST_VMLX1(INSN, Q, T1, T2, W, N)
+
+  DECL_VMLX(int, 8, 8);
+  DECL_VMLX(int, 16, 4);
+  DECL_VMLX(int, 32, 2);
+  DECL_VMLX(uint, 8, 8);
+  DECL_VMLX(uint, 16, 4);
+  DECL_VMLX(uint, 32, 2);
+  DECL_VMLX(float, 32, 2);
+  DECL_VMLX(int, 8, 16);
+  DECL_VMLX(int, 16, 8);
+  DECL_VMLX(int, 32, 4);
+  DECL_VMLX(uint, 8, 16);
+  DECL_VMLX(uint, 16, 8);
+  DECL_VMLX(uint, 32, 4);
+  DECL_VMLX(float, 32, 4);
+
+  clean_results ();
+
+  VLOAD(vector1, buffer, , int, s, 8, 8);
+  VLOAD(vector1, buffer, , int, s, 16, 4);
+  VLOAD(vector1, buffer, , int, s, 32, 2);
+  VLOAD(vector1, buffer, , uint, u, 8, 8);
+  VLOAD(vector1, buffer, , uint, u, 16, 4);
+  VLOAD(vector1, buffer, , uint, u, 32, 2);
+  VLOAD(vector1, buffer, , float, f, 32, 2);
+  VLOAD(vector1, buffer, q, int, s, 8, 16);
+  VLOAD(vector1, buffer, q, int, s, 16, 8);
+  VLOAD(vector1, buffer, q, int, s, 32, 4);
+  VLOAD(vector1, buffer, q, uint, u, 8, 16);
+  VLOAD(vector1, buffer, q, uint, u, 16, 8);
+  VLOAD(vector1, buffer, q, uint, u, 32, 4);
+  VLOAD(vector1, buffer, q, float, f, 32, 4);
+
+  VDUP(vector2, , int, s, 8, 8, 0x11);
+  VDUP(vector2, , int, s, 16, 4, 0x22);
+  VDUP(vector2, , int, s, 32, 2, 0x33);
+  VDUP(vector2, , uint, u, 8, 8, 0x44);
+  VDUP(vector2, , uint, u, 16, 4, 0x55);
+  VDUP(vector2, , uint, u, 32, 2, 0x66);
+  VDUP(vector2, , float, f, 32, 2, 33.1f);
+  VDUP(vector2, q, int, s, 8, 16, 0x77);
+  VDUP(vector2, q, int, s, 16, 8, 0x88);
+  VDUP(vector2, q, int, s, 32, 4, 0x99);
+  VDUP(vector2, q, uint, u, 8, 16, 0xAA);
+  VDUP(vector2, q, uint, u, 16, 8, 0xBB);
+  VDUP(vector2, q, uint, u, 32, 4, 0xCC);
+  VDUP(vector2, q, float, f, 32, 4, 99.2f);
+
+  VDUP(vector3, , int, s, 8, 8, 0xFF);
+  VDUP(vector3, , int, s, 16, 4, 0xEE);
+  VDUP(vector3, , int, s, 32, 2, 0xDD);
+  VDUP(vector3, , uint, u, 8, 8, 0xCC);
+  VDUP(vector3, , uint, u, 16, 4, 0xBB);
+  VDUP(vector3, , uint, u, 32, 2, 0xAA);
+  VDUP(vector3, , float, f, 32, 2, 10.23f);
+  VDUP(vector3, q, int, s, 8, 16, 0x99);
+  VDUP(vector3, q, int, s, 16, 8, 0x88);
+  VDUP(vector3, q, int, s, 32, 4, 0x77);
+  VDUP(vector3, q, uint, u, 8, 16, 0x66);
+  VDUP(vector3, q, uint, u, 16, 8, 0x55);
+  VDUP(vector3, q, uint, u, 32, 4, 0x44);
+  VDUP(vector3, q, float, f, 32, 4, 77.8f);
+
+  TEST_VMLX(INSN_NAME, , int, s, 8, 8);
+  TEST_VMLX(INSN_NAME, , int, s, 16, 4);
+  TEST_VMLX(INSN_NAME, , int, s, 32, 2);
+  TEST_VMLX(INSN_NAME, , uint, u, 8, 8);
+  TEST_VMLX(INSN_NAME, , uint, u, 16, 4);
+  TEST_VMLX(INSN_NAME, , uint, u, 32, 2);
+  TEST_VMLX(INSN_NAME, , float, f, 32, 2);
+  TEST_VMLX(INSN_NAME, q, int, s, 8, 16);
+  TEST_VMLX(INSN_NAME, q, int, s, 16, 8);
+  TEST_VMLX(INSN_NAME, q, int, s, 32, 4);
+  TEST_VMLX(INSN_NAME, q, uint, u, 8, 16);
+  TEST_VMLX(INSN_NAME, q, uint, u, 16, 8);
+  TEST_VMLX(INSN_NAME, q, uint, u, 32, 4);
+  TEST_VMLX(INSN_NAME, q, float, f, 32, 4);
+
+  CHECK_RESULTS (TEST_MSG, "");
+}
+
+int main (void)
+{
+  FNNAME (INSN_NAME) ();
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla.c 
b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla.c
new file mode 100644
index 000..e3da60c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vmla.c
@@ -0,0 +1,50 @@
+#include 
+#include "arm-neon-ref.h"
+#include "compute-ref-data.h"
+
+#define INSN_NAME vmla
+#define TEST_MSG "VMLA"
+
+/* Expected results.  */
+VECT_VAR_DECL(expected,int,8,8) [] = { 0xdf, 0xe0, 0xe1, 0xe2,
+  0xe3, 0xe4, 0xe5, 0xe6 };
+VECT_VAR_DECL(exp