On Tue, Feb 3, 2009 at 8:41 AM, H.J. Lu <hjl.to...@gmail.com> wrote:
> On Sun, Feb 1, 2009 at 11:44 AM, H.J. Lu <hjl.to...@gmail.com> wrote:
>> Hi,
>>
>> We like to update x86-64 psABI to pass aggregates of 32 bytes with
>> single __m256 field
>> in AVX registers, instead of memory. However, finding the proper
>> wording seems tricky.
>> Here is what I got.  Any comments?
>>
>
> Here is the revised proposal. Any comments. I will post a gcc
> patch soon.
>

Here is the gcc patch with testcases. OK for trunk?

Thanks.

-- 
H.J.
gcc/

2009-02-03  H.J. Lu  <hongjiu...@intel.com>

        * config/i386/i386.c (x86_64_reg_class): Remove X86_64_AVX_CLASS.
        (x86_64_reg_class_name): Removed.
        (classify_argument): Return 0 if bytes > 32.  Return 0 if the
        first one isn't X86_64_SSE_CLASS or any other ones aren't
        X86_64_SSEUP_CLASS when size > 16bytes.  Don't turn
        X86_64_SSEUP_CLASS into X86_64_SSE_CLASS if the preceded one
        is X86_64_SSEUP_CLASS.  Set AVX modes to 1 X86_64_SSE_CLASS
        and 3 X86_64_SSEUP_CLASS.
        (construct_container): Remove X86_64_AVX_CLASS.  Handle 4
        registers with 1 X86_64_SSE_CLASS and 3 X86_64_SSEUP_CLASS.

gcc/testsuite/

2009-02-04  H.J. Lu  <hongjiu...@intel.com>

        * gcc.target/x86_64/abi/avx/abi-avx.exp: New.
        * gcc.target/x86_64/abi/avx/args.h: Likewise.
        * gcc.target/x86_64/abi/avx/asm-support.S: Likewise.
        * gcc.target/x86_64/abi/avx/avx-check.h: Likewise.
        * gcc.target/x86_64/abi/avx/test_m256_returning.c: Likewise.
        * gcc.target/x86_64/abi/avx/test_passing_m256.c: Likewise.
        * gcc.target/x86_64/abi/avx/test_passing_structs.c: Likewise.
        * gcc.target/x86_64/abi/avx/test_passing_unions.c: Likewise.

Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c      (revision 5119)
+++ gcc/config/i386/i386.c      (revision 5120)
@@ -1773,7 +1773,6 @@ enum x86_64_reg_class
     X86_64_NO_CLASS,
     X86_64_INTEGER_CLASS,
     X86_64_INTEGERSI_CLASS,
-    X86_64_AVX_CLASS,
     X86_64_SSE_CLASS,
     X86_64_SSESF_CLASS,
     X86_64_SSEDF_CLASS,
@@ -1783,11 +1782,6 @@ enum x86_64_reg_class
     X86_64_COMPLEX_X87_CLASS,
     X86_64_MEMORY_CLASS
   };
-static const char * const x86_64_reg_class_name[] =
-{
-  "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
-  "sseup", "x87", "x87up", "cplx87", "no"
-};
 
 #define MAX_CLASSES 4
 
@@ -4863,8 +4857,8 @@ classify_argument (enum machine_mode mod
       tree field;
       enum x86_64_reg_class subclasses[MAX_CLASSES];
 
-      /* On x86-64 we pass structures larger than 16 bytes on the stack.  */
-      if (bytes > 16)
+      /* On x86-64 we pass structures larger than 32 bytes on the stack.  */
+      if (bytes > 32)
        return 0;
 
       for (i = 0; i < words; i++)
@@ -4974,6 +4968,20 @@ classify_argument (enum machine_mode mod
          gcc_unreachable ();
        }
 
+      if (words > 2)
+       {
+         /* When size > 16 bytes, if the first one isn't
+            X86_64_SSE_CLASS or any other ones aren't
+            X86_64_SSEUP_CLASS, everything should be passed in
+            memory.  */
+         if (classes[0] != X86_64_SSE_CLASS)
+             return 0;
+
+         for (i = 1; i < words; i++)
+           if (classes[i] != X86_64_SSEUP_CLASS)
+             return 0;
+       }
+
       /* Final merger cleanup.  */
       for (i = 0; i < words; i++)
        {
@@ -4983,10 +4991,15 @@ classify_argument (enum machine_mode mod
            return 0;
 
          /* The X86_64_SSEUP_CLASS should be always preceded by
-            X86_64_SSE_CLASS.  */
+            X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
          if (classes[i] == X86_64_SSEUP_CLASS
-             && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
-           classes[i] = X86_64_SSE_CLASS;
+             && classes[i - 1] != X86_64_SSE_CLASS
+             && classes[i - 1] != X86_64_SSEUP_CLASS)
+           {
+             /* The first one should never be X86_64_SSEUP_CLASS.  */
+             gcc_assert (i != 0);
+             classes[i] = X86_64_SSE_CLASS;
+           }
 
          /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
          if (classes[i] == X86_64_X87UP_CLASS
@@ -5107,8 +5120,11 @@ classify_argument (enum machine_mode mod
     case V16HImode:
     case V4DFmode:
     case V4DImode:
-      classes[0] = X86_64_AVX_CLASS;
-      return 1;
+      classes[0] = X86_64_SSE_CLASS;
+      classes[1] = X86_64_SSEUP_CLASS;
+      classes[2] = X86_64_SSEUP_CLASS;
+      classes[3] = X86_64_SSEUP_CLASS;
+      return 4;
     case V4SFmode:
     case V4SImode:
     case V16QImode:
@@ -5165,7 +5181,6 @@ examine_argument (enum machine_mode mode
       case X86_64_INTEGERSI_CLASS:
        (*int_nregs)++;
        break;
-      case X86_64_AVX_CLASS:
       case X86_64_SSE_CLASS:
       case X86_64_SSESF_CLASS:
       case X86_64_SSEDF_CLASS:
@@ -5264,7 +5279,6 @@ construct_container (enum machine_mode m
       case X86_64_INTEGER_CLASS:
       case X86_64_INTEGERSI_CLASS:
        return gen_rtx_REG (mode, intreg[0]);
-      case X86_64_AVX_CLASS:
       case X86_64_SSE_CLASS:
       case X86_64_SSESF_CLASS:
       case X86_64_SSEDF_CLASS:
@@ -5281,6 +5295,13 @@ construct_container (enum machine_mode m
   if (n == 2 && regclass[0] == X86_64_SSE_CLASS
       && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
     return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
+  if (n == 4
+      && regclass[0] == X86_64_SSE_CLASS
+      && regclass[1] == X86_64_SSEUP_CLASS
+      && regclass[2] == X86_64_SSEUP_CLASS
+      && regclass[3] == X86_64_SSEUP_CLASS
+      && mode != BLKmode)
+    return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
 
   if (n == 2
       && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
@@ -5331,14 +5352,22 @@ construct_container (enum machine_mode m
            break;
          case X86_64_SSE_CLASS:
            if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
-             tmpmode = TImode;
+             {
+               if (regclass[i + 2] == X86_64_SSEUP_CLASS
+                   || regclass[i + 3] == X86_64_SSEUP_CLASS)
+                 tmpmode = OImode;
+               else
+                 tmpmode = TImode;
+             }
            else
              tmpmode = DImode;
            exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
                                               gen_rtx_REG (tmpmode,
                                                            SSE_REGNO 
(sse_regno)),
                                               GEN_INT (i*8));
-           if (tmpmode == TImode)
+           if (tmpmode == OImode)
+             i += 3;
+           else if (tmpmode == TImode)
              i++;
            sse_regno++;
            break;

Index: gcc/testsuite/gcc.target/x86_64/abi/avx/avx-check.h
===================================================================
--- gcc/testsuite/gcc.target/x86_64/abi/avx/avx-check.h (revision 0)
+++ gcc/testsuite/gcc.target/x86_64/abi/avx/avx-check.h (revision 5173)
@@ -0,0 +1,28 @@
+#include <stdlib.h>
+#include "cpuid.h"
+
+static void avx_test (void);
+
+int
+main ()
+{
+  unsigned int eax, ebx, ecx, edx;
+ 
+  if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
+    return 0;
+
+  /* Run AVX test only if host has AVX support.  */
+  if (ecx & bit_AVX)
+    {
+      avx_test ();
+#ifdef DEBUG
+      printf ("PASSED\n");
+#endif
+    }
+#ifdef DEBUG
+  else
+    printf ("SKIPPED\n");
+#endif
+
+  return 0;
+}

Property changes on: gcc/testsuite/gcc.target/x86_64/abi/avx/avx-check.h
___________________________________________________________________
Added: svn:mergeinfo

Index: gcc/testsuite/gcc.target/x86_64/abi/avx/test_passing_unions.c
===================================================================
--- gcc/testsuite/gcc.target/x86_64/abi/avx/test_passing_unions.c       
(revision 0)
+++ gcc/testsuite/gcc.target/x86_64/abi/avx/test_passing_unions.c       
(revision 5173)
@@ -0,0 +1,143 @@
+#include "avx-check.h"
+#include "args.h"
+
+struct IntegerRegisters iregs;
+struct FloatRegisters fregs;
+unsigned int num_iregs, num_fregs;
+
+union un1
+{
+  __m256 x;
+  float f;
+};
+
+union un2
+{
+  __m256 x;
+  double d;
+};
+
+union un3
+{
+  __m256 x;
+  __m128 v;
+};
+
+union un4
+{
+  __m256 x;
+  long double ld;
+};
+
+union un5
+{
+  __m256 x;
+  int i;
+};
+
+void
+check_union_passing1(union un1 u1 ATTRIBUTE_UNUSED,
+                    union un1 u2 ATTRIBUTE_UNUSED,
+                    union un1 u3 ATTRIBUTE_UNUSED,
+                    union un1 u4 ATTRIBUTE_UNUSED,
+                    union un1 u5 ATTRIBUTE_UNUSED,
+                    union un1 u6 ATTRIBUTE_UNUSED,
+                    union un1 u7 ATTRIBUTE_UNUSED,
+                    union un1 u8 ATTRIBUTE_UNUSED)
+{
+  check_m256_arguments;
+}
+
+void
+check_union_passing2(union un2 u1 ATTRIBUTE_UNUSED,
+                    union un2 u2 ATTRIBUTE_UNUSED,
+                    union un2 u3 ATTRIBUTE_UNUSED,
+                    union un2 u4 ATTRIBUTE_UNUSED,
+                    union un2 u5 ATTRIBUTE_UNUSED,
+                    union un2 u6 ATTRIBUTE_UNUSED,
+                    union un2 u7 ATTRIBUTE_UNUSED,
+                    union un2 u8 ATTRIBUTE_UNUSED)
+{
+  check_m256_arguments;
+}
+
+void
+check_union_passing3(union un3 u1 ATTRIBUTE_UNUSED,
+                    union un3 u2 ATTRIBUTE_UNUSED,
+                    union un3 u3 ATTRIBUTE_UNUSED,
+                    union un3 u4 ATTRIBUTE_UNUSED,
+                    union un3 u5 ATTRIBUTE_UNUSED,
+                    union un3 u6 ATTRIBUTE_UNUSED,
+                    union un3 u7 ATTRIBUTE_UNUSED,
+                    union un3 u8 ATTRIBUTE_UNUSED)
+{
+  check_m256_arguments;
+}
+
+void
+check_union_passing4(union un4 u ATTRIBUTE_UNUSED)
+{
+   /* Check the passing on the stack by comparing the address of the
+      stack elements to the expected place on the stack.  */
+  assert ((unsigned long)&u.x == rsp+8);
+  assert ((unsigned long)&u.ld == rsp+8);
+}
+
+void
+check_union_passing5(union un5 u ATTRIBUTE_UNUSED)
+{
+   /* Check the passing on the stack by comparing the address of the
+      stack elements to the expected place on the stack.  */
+  assert ((unsigned long)&u.x == rsp+8);
+  assert ((unsigned long)&u.i == rsp+8);
+}
+
+#define check_union_passing1 WRAP_CALL(check_union_passing1)
+#define check_union_passing2 WRAP_CALL(check_union_passing2)
+#define check_union_passing3 WRAP_CALL(check_union_passing3)
+#define check_union_passing4 WRAP_CALL(check_union_passing4)
+#define check_union_passing5 WRAP_CALL(check_union_passing5)
+
+static void
+avx_test (void)
+{
+  union un1 u1[8];
+  union un2 u2[8];
+  union un3 u3[8];
+  union un4 u4;
+  union un5 u5;
+  int i;
+
+  for (i = 0; i < 8; i++)
+    u1[i].x = (__m256){32+i, 0, i, 0, -i, 0, i - 12, i + 8};
+
+  clear_struct_registers;
+  for (i = 0; i < 8; i++)
+    fregs.ymm0._m256[i] = u1[i].x;
+  num_fregs = 8;
+  check_union_passing1(u1[0], u1[1], u1[2], u1[3],
+                      u1[4], u1[5], u1[6], u1[7]);
+
+  clear_struct_registers;
+  for (i = 0; i < 8; i++)
+    {
+      u2[i].x = u1[i].x;
+      fregs.ymm0._m256[i] = u2[i].x;
+    }
+  num_fregs = 8;
+  check_union_passing2(u2[0], u2[1], u2[2], u2[3],
+                      u2[4], u2[5], u2[6], u2[7]);
+
+  clear_struct_registers;
+  for (i = 0; i < 8; i++)
+    {
+      u3[i].x = u1[i].x;
+      fregs.ymm0._m256[i] = u3[i].x;
+    }
+  num_fregs = 8;
+  check_union_passing3(u3[0], u3[1], u3[2], u3[3],
+                      u3[4], u3[5], u3[6], u3[7]);
+
+  check_union_passing4(u4);
+  check_union_passing5(u5);
+}

Property changes on: 
gcc/testsuite/gcc.target/x86_64/abi/avx/test_passing_unions.c
___________________________________________________________________
Added: svn:mergeinfo

Index: gcc/testsuite/gcc.target/x86_64/abi/avx/abi-avx.exp
===================================================================
--- gcc/testsuite/gcc.target/x86_64/abi/avx/abi-avx.exp (revision 0)
+++ gcc/testsuite/gcc.target/x86_64/abi/avx/abi-avx.exp (revision 5173)
@@ -0,0 +1,50 @@
+# Copyright (C) 2009 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# The x86-64 AVX ABI testsuite needs one additional assembler file for most
+# testcases.  For simplicity we will just link it into each test.
+
+load_lib c-torture.exp
+load_lib target-supports.exp
+load_lib torture-options.exp
+
+if { (![istarget x86_64-*-*] && ![istarget i?86-*-*])
+     || ![is-effective-target lp64]
+     || ![is-effective-target avx] } then {
+  return
+}
+
+
+torture-init
+set-torture-options $C_TORTURE_OPTIONS
+set additional_flags "-W -Wall -mavx"
+
+foreach src [lsort [find $srcdir/$subdir test_*.c]] {
+    if {[runtest_file_p $runtests $src]} {
+       if { ([istarget *-*-darwin*]) } then {
+           # FIXME: Darwin isn't tested.
+           c-torture-execute [list $src \
+                                   $srcdir/$subdir/asm-support-darwin.s] \
+                                   $additional_flags
+       } else {
+           c-torture-execute [list $src \
+                                   $srcdir/$subdir/asm-support.S] \
+                                   $additional_flags
+       }
+    }
+}
+
+torture-finish

Property changes on: gcc/testsuite/gcc.target/x86_64/abi/avx/abi-avx.exp
___________________________________________________________________
Added: svn:mergeinfo

Index: gcc/testsuite/gcc.target/x86_64/abi/avx/args.h
===================================================================
--- gcc/testsuite/gcc.target/x86_64/abi/avx/args.h      (revision 0)
+++ gcc/testsuite/gcc.target/x86_64/abi/avx/args.h      (revision 5173)
@@ -0,0 +1,180 @@
+#ifndef INCLUDED_ARGS_H
+#define INCLUDED_ARGS_H
+
+#include <immintrin.h>
+#include <string.h>
+
+/* Assertion macro.  */
+#define assert(test) if (!(test)) abort()
+
+#ifdef __GNUC__
+#define ATTRIBUTE_UNUSED __attribute__((__unused__))
+#else
+#define ATTRIBUTE_UNUSED
+#endif
+
+/* This defines the calling sequences for integers and floats.  */
+#define I0 rdi
+#define I1 rsi
+#define I2 rdx
+#define I3 rcx
+#define I4 r8
+#define I5 r9
+#define F0 ymm0
+#define F1 ymm1
+#define F2 ymm2
+#define F3 ymm3
+#define F4 ymm4
+#define F5 ymm5
+#define F6 ymm6
+#define F7 ymm7
+
+typedef union {
+  float _float[8];
+  double _double[4];
+  long _long[4];
+  int _int[8];
+  unsigned long _ulong[4];
+  __m64 _m64[4];
+  __m128 _m128[2];
+  __m256 _m256[1];
+} YMM_T;
+
+typedef union {
+  float _float;
+  double _double;
+  long double _ldouble;
+  unsigned long _ulong[2];
+} X87_T;
+extern void (*callthis)(void);
+extern unsigned long 
rax,rbx,rcx,rdx,rsi,rdi,rsp,rbp,r8,r9,r10,r11,r12,r13,r14,r15;
+YMM_T ymm_regs[16];
+X87_T x87_regs[8];
+extern volatile unsigned long volatile_var;
+extern void snapshot (void);
+extern void snapshot_ret (void);
+#define WRAP_CALL(N) \
+  (callthis = (void (*)()) (N), (typeof (&N)) snapshot)
+#define WRAP_RET(N) \
+  (callthis = (void (*)()) (N), (typeof (&N)) snapshot_ret)
+
+/* Clear all integer registers.  */
+#define clear_int_hardware_registers \
+  asm __volatile__ ("xor %%rax, %%rax\n\t" \
+                   "xor %%rbx, %%rbx\n\t" \
+                   "xor %%rcx, %%rcx\n\t" \
+                   "xor %%rdx, %%rdx\n\t" \
+                   "xor %%rsi, %%rsi\n\t" \
+                   "xor %%rdi, %%rdi\n\t" \
+                   "xor %%r8, %%r8\n\t" \
+                   "xor %%r9, %%r9\n\t" \
+                   "xor %%r10, %%r10\n\t" \
+                   "xor %%r11, %%r11\n\t" \
+                   "xor %%r12, %%r12\n\t" \
+                   "xor %%r13, %%r13\n\t" \
+                   "xor %%r14, %%r14\n\t" \
+                   "xor %%r15, %%r15\n\t" \
+                   ::: "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", \
+                   "r9", "r10", "r11", "r12", "r13", "r14", "r15");
+
+/* This is the list of registers available for passing arguments. Not all of
+   these are used or even really available.  */
+struct IntegerRegisters
+{
+  unsigned long rax, rbx, rcx, rdx, rsi, rdi, r8, r9, r10, r11, r12, r13, r14, 
r15;
+};
+struct FloatRegisters
+{
+  double mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;
+  long double st0, st1, st2, st3, st4, st5, st6, st7;
+  YMM_T ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm8, ymm9,
+        ymm10, ymm11, ymm12, ymm13, ymm14, ymm15;
+};
+
+/* Implemented in scalarargs.c  */
+extern struct IntegerRegisters iregs;
+extern struct FloatRegisters fregs;
+extern unsigned int num_iregs, num_fregs;
+
+#define check_int_arguments do { \
+  assert (num_iregs <= 0 || iregs.I0 == I0); \
+  assert (num_iregs <= 1 || iregs.I1 == I1); \
+  assert (num_iregs <= 2 || iregs.I2 == I2); \
+  assert (num_iregs <= 3 || iregs.I3 == I3); \
+  assert (num_iregs <= 4 || iregs.I4 == I4); \
+  assert (num_iregs <= 5 || iregs.I5 == I5); \
+  } while (0)
+
+#define check_char_arguments check_int_arguments
+#define check_short_arguments check_int_arguments
+#define check_long_arguments check_int_arguments
+
+/* Clear register struct.  */
+#define clear_struct_registers \
+  rax = rbx = rcx = rdx = rdi = rsi = rbp = rsp \
+    = r8 = r9 = r10 = r11 = r12 = r13 = r14 = r15 = 0; \
+  memset (&iregs, 0, sizeof (iregs)); \
+  memset (&fregs, 0, sizeof (fregs)); \
+  memset (ymm_regs, 0, sizeof (ymm_regs)); \
+  memset (x87_regs, 0, sizeof (x87_regs));
+
+/* Clear both hardware and register structs for integers.  */
+#define clear_int_registers \
+  clear_struct_registers \
+  clear_int_hardware_registers
+
+/* TODO: Do the checking.  */
+#define check_f_arguments(T) do { \
+  assert (num_fregs <= 0 || fregs.ymm0._ ## T [0] == ymm_regs[0]._ ## T [0]); \
+  assert (num_fregs <= 1 || fregs.ymm1._ ## T [0] == ymm_regs[1]._ ## T [0]); \
+  assert (num_fregs <= 2 || fregs.ymm2._ ## T [0] == ymm_regs[2]._ ## T [0]); \
+  assert (num_fregs <= 3 || fregs.ymm3._ ## T [0] == ymm_regs[3]._ ## T [0]); \
+  assert (num_fregs <= 4 || fregs.ymm4._ ## T [0] == ymm_regs[4]._ ## T [0]); \
+  assert (num_fregs <= 5 || fregs.ymm5._ ## T [0] == ymm_regs[5]._ ## T [0]); \
+  assert (num_fregs <= 6 || fregs.ymm6._ ## T [0] == ymm_regs[6]._ ## T [0]); \
+  assert (num_fregs <= 7 || fregs.ymm7._ ## T [0] == ymm_regs[7]._ ## T [0]); \
+  } while (0)
+
+#define check_float_arguments check_f_arguments(float)
+#define check_double_arguments check_f_arguments(double)
+
+#define check_vector_arguments(T,O) do { \
+  assert (num_fregs <= 0 \
+         || memcmp (((char *) &fregs.ymm0) + (O), \
+                    &ymm_regs[0], \
+                    sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 1 \
+         || memcmp (((char *) &fregs.ymm1) + (O), \
+                    &ymm_regs[1], \
+                    sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 2 \
+         || memcmp (((char *) &fregs.ymm2) + (O), \
+                    &ymm_regs[2], \
+                    sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 3 \
+         || memcmp (((char *) &fregs.ymm3) + (O), \
+                    &ymm_regs[3], \
+                    sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 4 \
+         || memcmp (((char *) &fregs.ymm4) + (O), \
+                    &ymm_regs[4], \
+                    sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 5 \
+         || memcmp (((char *) &fregs.ymm5) + (O), \
+                    &ymm_regs[5], \
+                    sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 6 \
+         || memcmp (((char *) &fregs.ymm6) + (O), \
+                    &ymm_regs[6], \
+                    sizeof (__ ## T) - (O)) == 0); \
+  assert (num_fregs <= 7 \
+         || memcmp (((char *) &fregs.ymm7) + (O), \
+                    &ymm_regs[7], \
+                    sizeof (__ ## T) - (O)) == 0); \
+  } while (0)
+
+#define check_m64_arguments check_vector_arguments(m64, 0)
+#define check_m128_arguments check_vector_arguments(m128, 0)
+#define check_m256_arguments check_vector_arguments(m256, 0)
+
+#endif /* INCLUDED_ARGS_H  */

Property changes on: gcc/testsuite/gcc.target/x86_64/abi/avx/args.h
___________________________________________________________________
Added: svn:mergeinfo

Index: gcc/testsuite/gcc.target/x86_64/abi/avx/test_m256_returning.c
===================================================================
--- gcc/testsuite/gcc.target/x86_64/abi/avx/test_m256_returning.c       
(revision 0)
+++ gcc/testsuite/gcc.target/x86_64/abi/avx/test_m256_returning.c       
(revision 5173)
@@ -0,0 +1,32 @@
+#include <stdio.h>
+#include "avx-check.h"
+#include "args.h"
+
+struct IntegerRegisters iregs;
+struct FloatRegisters fregs;
+unsigned int num_iregs, num_fregs;
+
+__m256
+fun_test_returning___m256 (void)
+{
+  volatile_var++;
+  return (__m256){73,0,0,0,0,0,0,0};
+}
+
+__m256 test_256;
+
+static void
+avx_test (void)
+{
+  unsigned failed = 0;
+  YMM_T ymmt1, ymmt2;
+
+  clear_struct_registers;
+  test_256 = (__m256){73,0,0,0,0,0,0,0};
+  ymmt1._m256[0] = test_256;
+  ymmt2._m256[0] = WRAP_RET (fun_test_returning___m256)();
+  if (memcmp (&ymmt1, &ymmt2, sizeof (ymmt2)) != 0)
+    printf ("fail m256\n"), failed++;
+  if (failed)
+    abort ();
+}

Property changes on: 
gcc/testsuite/gcc.target/x86_64/abi/avx/test_m256_returning.c
___________________________________________________________________
Added: svn:mergeinfo

Index: gcc/testsuite/gcc.target/x86_64/abi/avx/asm-support.S
===================================================================
--- gcc/testsuite/gcc.target/x86_64/abi/avx/asm-support.S       (revision 0)
+++ gcc/testsuite/gcc.target/x86_64/abi/avx/asm-support.S       (revision 5173)
@@ -0,0 +1,80 @@
+       .file   "snapshot.S"
+       .text
+       .p2align 4,,15
+.globl snapshot
+       .type   snapshot, @function
+snapshot:
+.LFB3:
+       movq    %rax, rax(%rip)
+       movq    %rbx, rbx(%rip)
+       movq    %rcx, rcx(%rip)
+       movq    %rdx, rdx(%rip)
+       movq    %rdi, rdi(%rip)
+       movq    %rsi, rsi(%rip)
+       movq    %rbp, rbp(%rip)
+       movq    %rsp, rsp(%rip)
+       movq    %r8, r8(%rip)
+       movq    %r9, r9(%rip)
+       movq    %r10, r10(%rip)
+       movq    %r11, r11(%rip)
+       movq    %r12, r12(%rip)
+       movq    %r13, r13(%rip)
+       movq    %r14, r14(%rip)
+       movq    %r15, r15(%rip)
+       vmovdqu %ymm0, ymm_regs+0(%rip)
+       vmovdqu %ymm1, ymm_regs+32(%rip)
+       vmovdqu %ymm2, ymm_regs+32*2(%rip)
+       vmovdqu %ymm3, ymm_regs+32*3(%rip)
+       vmovdqu %ymm4, ymm_regs+32*4(%rip)
+       vmovdqu %ymm5, ymm_regs+32*5(%rip)
+       vmovdqu %ymm6, ymm_regs+32*6(%rip)
+       vmovdqu %ymm7, ymm_regs+32*7(%rip)
+       vmovdqu %ymm8, ymm_regs+32*8(%rip)
+       vmovdqu %ymm9, ymm_regs+32*9(%rip)
+       vmovdqu %ymm10, ymm_regs+32*10(%rip)
+       vmovdqu %ymm11, ymm_regs+32*11(%rip)
+       vmovdqu %ymm12, ymm_regs+32*12(%rip)
+       vmovdqu %ymm13, ymm_regs+32*13(%rip)
+       vmovdqu %ymm14, ymm_regs+32*14(%rip)
+       vmovdqu %ymm15, ymm_regs+32*15(%rip)
+       jmp     *callthis(%rip)
+.LFE3:
+       .size   snapshot, .-snapshot
+
+       .p2align 4,,15
+.globl snapshot_ret
+       .type   snapshot_ret, @function
+snapshot_ret:
+       movq    %rdi, rdi(%rip)
+       call    *callthis(%rip)
+       movq    %rax, rax(%rip)
+       movq    %rdx, rdx(%rip)
+       vmovdqu %ymm0, ymm_regs+0(%rip)
+       vmovdqu %ymm1, ymm_regs+32(%rip)
+       fstpt   x87_regs(%rip)
+       fstpt   x87_regs+16(%rip)
+       fldt    x87_regs+16(%rip)
+       fldt    x87_regs(%rip)
+       ret
+       .size   snapshot_ret, .-snapshot_ret
+
+       .comm   callthis,8,8
+       .comm   rax,8,8
+       .comm   rbx,8,8
+       .comm   rcx,8,8
+       .comm   rdx,8,8
+       .comm   rsi,8,8
+       .comm   rdi,8,8
+       .comm   rsp,8,8
+       .comm   rbp,8,8
+       .comm   r8,8,8
+       .comm   r9,8,8
+       .comm   r10,8,8
+       .comm   r11,8,8
+       .comm   r12,8,8
+       .comm   r13,8,8
+       .comm   r14,8,8
+       .comm   r15,8,8
+       .comm   ymm_regs,512,32
+       .comm   x87_regs,128,32
+       .comm   volatile_var,8,8

Property changes on: gcc/testsuite/gcc.target/x86_64/abi/avx/asm-support.S
___________________________________________________________________
Added: svn:mergeinfo

Index: gcc/testsuite/gcc.target/x86_64/abi/avx/test_passing_structs.c
===================================================================
--- gcc/testsuite/gcc.target/x86_64/abi/avx/test_passing_structs.c      
(revision 0)
+++ gcc/testsuite/gcc.target/x86_64/abi/avx/test_passing_structs.c      
(revision 5173)
@@ -0,0 +1,61 @@
+#include "avx-check.h"
+#include "args.h"
+
+struct IntegerRegisters iregs;
+struct FloatRegisters fregs;
+unsigned int num_iregs, num_fregs;
+
+struct m256_struct
+{
+  __m256 x;
+};
+
+struct m256_2_struct
+{
+  __m256 x1, x2;
+};
+
+/* Check that the struct is passed as the individual members in fregs.  */
+void
+check_struct_passing1 (struct m256_struct ms1 ATTRIBUTE_UNUSED,
+                      struct m256_struct ms2 ATTRIBUTE_UNUSED,
+                      struct m256_struct ms3 ATTRIBUTE_UNUSED,
+                      struct m256_struct ms4 ATTRIBUTE_UNUSED,
+                      struct m256_struct ms5 ATTRIBUTE_UNUSED,
+                      struct m256_struct ms6 ATTRIBUTE_UNUSED,
+                      struct m256_struct ms7 ATTRIBUTE_UNUSED,
+                      struct m256_struct ms8 ATTRIBUTE_UNUSED)
+{
+  check_m256_arguments;
+}
+
+void
+check_struct_passing2 (struct m256_2_struct ms ATTRIBUTE_UNUSED)
+{
+  /* Check the passing on the stack by comparing the address of the
+     stack elements to the expected place on the stack.  */
+  assert ((unsigned long)&ms.x1 == rsp+8);
+  assert ((unsigned long)&ms.x2 == rsp+40);
+}
+
+static void
+avx_test (void)
+{
+  struct m256_struct m256s [8];
+  struct m256_2_struct m256_2s = { 
+      { 48.394, 39.3, -397.9, 3484.9, -8.394, -93.3, 7.9, 84.94 },
+      { -8.394, -3.3, -39.9, 34.9, 7.9, 84.94, -48.394, 39.3 }
+  };
+  int i;
+
+  for (i = 0; i < 8; i++)
+    m256s[i].x = (__m256){32+i, 0, i, 0, -i, 0, i - 12, i + 8};
+
+  clear_struct_registers;
+  for (i = 0; i < 8; i++)
+    fregs.ymm0._m256[i] = m256s[i].x;
+  num_fregs = 8;
+  WRAP_CALL (check_struct_passing1)(m256s[0], m256s[1], m256s[2], m256s[3],
+                                   m256s[4], m256s[5], m256s[6], m256s[7]);
+  WRAP_CALL (check_struct_passing2)(m256_2s);
+}

Property changes on: 
gcc/testsuite/gcc.target/x86_64/abi/avx/test_passing_structs.c
___________________________________________________________________
Added: svn:mergeinfo

Index: gcc/testsuite/gcc.target/x86_64/abi/avx/test_passing_m256.c
===================================================================
--- gcc/testsuite/gcc.target/x86_64/abi/avx/test_passing_m256.c (revision 0)
+++ gcc/testsuite/gcc.target/x86_64/abi/avx/test_passing_m256.c (revision 5173)
@@ -0,0 +1,168 @@
+#include <stdio.h>
+#include "avx-check.h"
+#include "args.h"
+
+struct IntegerRegisters iregs;
+struct FloatRegisters fregs;
+unsigned int num_iregs, num_fregs;
+
+/* This struct holds values for argument checking.  */
+struct 
+{
+  YMM_T i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10, i11, i12, i13, i14, i15, 
i16, i17, i18, i19, i20, i21, i22, i23;
+} values;
+
+char *pass;
+int failed = 0;
+
+#undef assert
+#define assert(c) do { \
+  if (!(c)) {failed++; printf ("failed %s\n", pass); } \
+} while (0)
+
+#define compare(X1,X2,T) do { \
+  assert (memcmp (&X1, &X2, sizeof (T)) == 0); \
+} while (0)
+
+fun_check_passing_m256_8_values (__m256 i0 ATTRIBUTE_UNUSED, __m256 i1 
ATTRIBUTE_UNUSED, __m256 i2 ATTRIBUTE_UNUSED, __m256 i3 ATTRIBUTE_UNUSED, 
__m256 i4 ATTRIBUTE_UNUSED, __m256 i5 ATTRIBUTE_UNUSED, __m256 i6 
ATTRIBUTE_UNUSED, __m256 i7 ATTRIBUTE_UNUSED)
+{
+  /* Check argument values.  */
+  compare (values.i0, i0, __m256);
+  compare (values.i1, i1, __m256);
+  compare (values.i2, i2, __m256);
+  compare (values.i3, i3, __m256);
+  compare (values.i4, i4, __m256);
+  compare (values.i5, i5, __m256);
+  compare (values.i6, i6, __m256);
+  compare (values.i7, i7, __m256);
+}
+
+void
+fun_check_passing_m256_8_regs (__m256 i0 ATTRIBUTE_UNUSED, __m256 i1 
ATTRIBUTE_UNUSED, __m256 i2 ATTRIBUTE_UNUSED, __m256 i3 ATTRIBUTE_UNUSED, 
__m256 i4 ATTRIBUTE_UNUSED, __m256 i5 ATTRIBUTE_UNUSED, __m256 i6 
ATTRIBUTE_UNUSED, __m256 i7 ATTRIBUTE_UNUSED)
+{
+  /* Check register contents.  */
+  check_m256_arguments;
+}
+
+void
+fun_check_passing_m256_20_values (__m256 i0 ATTRIBUTE_UNUSED, __m256 i1 
ATTRIBUTE_UNUSED, __m256 i2 ATTRIBUTE_UNUSED, __m256 i3 ATTRIBUTE_UNUSED, 
__m256 i4 ATTRIBUTE_UNUSED, __m256 i5 ATTRIBUTE_UNUSED, __m256 i6 
ATTRIBUTE_UNUSED, __m256 i7 ATTRIBUTE_UNUSED, __m256 i8 ATTRIBUTE_UNUSED, 
__m256 i9 ATTRIBUTE_UNUSED, __m256 i10 ATTRIBUTE_UNUSED, __m256 i11 
ATTRIBUTE_UNUSED, __m256 i12 ATTRIBUTE_UNUSED, __m256 i13 ATTRIBUTE_UNUSED, 
__m256 i14 ATTRIBUTE_UNUSED, __m256 i15 ATTRIBUTE_UNUSED, __m256 i16 
ATTRIBUTE_UNUSED, __m256 i17 ATTRIBUTE_UNUSED, __m256 i18 ATTRIBUTE_UNUSED, 
__m256 i19 ATTRIBUTE_UNUSED)
+{
+  /* Check argument values.  */
+  compare (values.i0, i0, __m256);
+  compare (values.i1, i1, __m256);
+  compare (values.i2, i2, __m256);
+  compare (values.i3, i3, __m256);
+  compare (values.i4, i4, __m256);
+  compare (values.i5, i5, __m256);
+  compare (values.i6, i6, __m256);
+  compare (values.i7, i7, __m256);
+  compare (values.i8, i8, __m256);
+  compare (values.i9, i9, __m256);
+  compare (values.i10, i10, __m256);
+  compare (values.i11, i11, __m256);
+  compare (values.i12, i12, __m256);
+  compare (values.i13, i13, __m256);
+  compare (values.i14, i14, __m256);
+  compare (values.i15, i15, __m256);
+  compare (values.i16, i16, __m256);
+  compare (values.i17, i17, __m256);
+  compare (values.i18, i18, __m256);
+  compare (values.i19, i19, __m256);
+}
+
+void
+fun_check_passing_m256_20_regs (__m256 i0 ATTRIBUTE_UNUSED, __m256 i1 
ATTRIBUTE_UNUSED, __m256 i2 ATTRIBUTE_UNUSED, __m256 i3 ATTRIBUTE_UNUSED, 
__m256 i4 ATTRIBUTE_UNUSED, __m256 i5 ATTRIBUTE_UNUSED, __m256 i6 
ATTRIBUTE_UNUSED, __m256 i7 ATTRIBUTE_UNUSED, __m256 i8 ATTRIBUTE_UNUSED, 
__m256 i9 ATTRIBUTE_UNUSED, __m256 i10 ATTRIBUTE_UNUSED, __m256 i11 
ATTRIBUTE_UNUSED, __m256 i12 ATTRIBUTE_UNUSED, __m256 i13 ATTRIBUTE_UNUSED, 
__m256 i14 ATTRIBUTE_UNUSED, __m256 i15 ATTRIBUTE_UNUSED, __m256 i16 
ATTRIBUTE_UNUSED, __m256 i17 ATTRIBUTE_UNUSED, __m256 i18 ATTRIBUTE_UNUSED, 
__m256 i19 ATTRIBUTE_UNUSED)
+{
+  /* Check register contents.  */
+  check_m256_arguments;
+}
+
+
+#define def_check_passing8(_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _func1, 
_func2, TYPE) \
+  values.i0.TYPE[0] = _i0; \
+  values.i1.TYPE[0] = _i1; \
+  values.i2.TYPE[0] = _i2; \
+  values.i3.TYPE[0] = _i3; \
+  values.i4.TYPE[0] = _i4; \
+  values.i5.TYPE[0] = _i5; \
+  values.i6.TYPE[0] = _i6; \
+  values.i7.TYPE[0] = _i7; \
+  WRAP_CALL(_func1) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7); \
+  \
+  clear_struct_registers; \
+  fregs.F0.TYPE[0] = _i0; \
+  fregs.F1.TYPE[0] = _i1; \
+  fregs.F2.TYPE[0] = _i2; \
+  fregs.F3.TYPE[0] = _i3; \
+  fregs.F4.TYPE[0] = _i4; \
+  fregs.F5.TYPE[0] = _i5; \
+  fregs.F6.TYPE[0] = _i6; \
+  fregs.F7.TYPE[0] = _i7; \
+  num_fregs = 8; \
+  WRAP_CALL(_func2) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7);
+
+#define def_check_passing20(_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, _i9, 
_i10, _i11, _i12, _i13, _i14, _i15, _i16, _i17, _i18, _i19, _func1, _func2, 
TYPE) \
+  values.i0.TYPE[0] = _i0; \
+  values.i1.TYPE[0] = _i1; \
+  values.i2.TYPE[0] = _i2; \
+  values.i3.TYPE[0] = _i3; \
+  values.i4.TYPE[0] = _i4; \
+  values.i5.TYPE[0] = _i5; \
+  values.i6.TYPE[0] = _i6; \
+  values.i7.TYPE[0] = _i7; \
+  values.i8.TYPE[0] = _i8; \
+  values.i9.TYPE[0] = _i9; \
+  values.i10.TYPE[0] = _i10; \
+  values.i11.TYPE[0] = _i11; \
+  values.i12.TYPE[0] = _i12; \
+  values.i13.TYPE[0] = _i13; \
+  values.i14.TYPE[0] = _i14; \
+  values.i15.TYPE[0] = _i15; \
+  values.i16.TYPE[0] = _i16; \
+  values.i17.TYPE[0] = _i17; \
+  values.i18.TYPE[0] = _i18; \
+  values.i19.TYPE[0] = _i19; \
+  WRAP_CALL(_func1) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, _i9, _i10, 
_i11, _i12, _i13, _i14, _i15, _i16, _i17, _i18, _i19); \
+  \
+  clear_struct_registers; \
+  fregs.F0.TYPE[0] = _i0; \
+  fregs.F1.TYPE[0] = _i1; \
+  fregs.F2.TYPE[0] = _i2; \
+  fregs.F3.TYPE[0] = _i3; \
+  fregs.F4.TYPE[0] = _i4; \
+  fregs.F5.TYPE[0] = _i5; \
+  fregs.F6.TYPE[0] = _i6; \
+  fregs.F7.TYPE[0] = _i7; \
+  num_fregs = 8; \
+  WRAP_CALL(_func2) (_i0, _i1, _i2, _i3, _i4, _i5, _i6, _i7, _i8, _i9, _i10, 
_i11, _i12, _i13, _i14, _i15, _i16, _i17, _i18, _i19);
+
+void
+test_m256_on_stack ()
+{
+  __m256 x[8];
+  int i;
+  for (i = 0; i < 8; i++)
+    x[i] = (__m256){32+i, 0, 0, 0, 0, 0, 0, 0};
+  pass = "m256-8";
+  def_check_passing8(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], 
fun_check_passing_m256_8_values, fun_check_passing_m256_8_regs, _m256);
+}
+
+void
+test_too_many_m256 ()
+{
+  __m256 x[20];
+  int i;
+  for (i = 0; i < 20; i++)
+    x[i] = (__m256){32+i, 0, 0, 0, 0, 0, 0, 0};
+  pass = "m256-20";
+  def_check_passing20(x[0], x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], 
x[9], x[10], x[11], x[12], x[13], x[14], x[15], x[16], x[17], x[18], x[19], 
fun_check_passing_m256_20_values, fun_check_passing_m256_20_regs, _m256);
+}
+
+static void
+avx_test (void)
+{
+  test_m256_on_stack ();
+  test_too_many_m256 ();
+  if (failed)
+    abort ();
+}

Property changes on: gcc/testsuite/gcc.target/x86_64/abi/avx/test_passing_m256.c
___________________________________________________________________
Added: svn:mergeinfo

Reply via email to