https://gcc.gnu.org/g:fc811192bb897b3244da0d99894a3909281e3b63
commit r17-850-gfc811192bb897b3244da0d99894a3909281e3b63 Author: oltolm <[email protected]> Date: Sun May 24 12:57:51 2026 +0200 testsuite, i386: add win64 AVX indirect alignment tests [PR54412] On x86_64-w64-mingw32, PR target/54412 is triggered when AVX and AVX512 values are passed or returned indirectly and GCC materializes under-aligned stack storage for them. Add run tests for the original by-value AVX cases, for isolated hidden sret allocation and callee by-reference parameter materialization, and for a dedicated aligned(64) AVX512 case. gcc/testsuite/ChangeLog: PR target/54412 * gcc.target/i386/pr54412-v4d-o0-aligned-locals.c: New test. * gcc.target/i386/pr54412-o2-by-value-cases.c: New test. * gcc.target/i386/pr54412-sret-no-args.c: New test. * gcc.target/i386/pr54412-callee-byref-param.c: New test. * gcc.target/i386/pr54412-avx512-aligned64.c: New test. Signed-off-by: oltolm <[email protected]> Signed-off-by: Jonathan Yong <[email protected]> Diff: --- .../gcc.target/i386/pr54412-avx512-aligned64.c | 29 ++++++ .../gcc.target/i386/pr54412-callee-byref-param.c | 27 ++++++ .../gcc.target/i386/pr54412-o2-by-value-cases.c | 104 +++++++++++++++++++++ .../gcc.target/i386/pr54412-sret-no-args.c | 36 +++++++ .../i386/pr54412-v4d-o0-aligned-locals.c | 27 ++++++ 5 files changed, 223 insertions(+) diff --git a/gcc/testsuite/gcc.target/i386/pr54412-avx512-aligned64.c b/gcc/testsuite/gcc.target/i386/pr54412-avx512-aligned64.c new file mode 100644 index 000000000000..c496f76e76e8 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr54412-avx512-aligned64.c @@ -0,0 +1,29 @@ +/* PR target/54412 */ +/* { dg-do run { target { avx512f && { ! ia32 } } } } */ +/* { dg-skip-if "PR target/54412 exercises the Win64 ABI" { ! x86_64-*-mingw* } } */ +/* { dg-options "-O0 -mavx512f" } */ + +#include "avx512-check.h" + +typedef double v8d __attribute__ ((vector_size (64), aligned (64))); + +__attribute__ ((noinline)) static v8d +roundtrip_v8d (v8d x) +{ + return x; +} + +static void +test_512 (void) +{ + v8d x __attribute__ ((aligned (64))) + = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0 }; + v8d expected __attribute__ ((aligned (64))) + = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0 }; + v8d r __attribute__ ((aligned (64))); + + r = roundtrip_v8d (x); + + if (__builtin_memcmp (&r, &expected, sizeof (r)) != 0) + __builtin_abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/pr54412-callee-byref-param.c b/gcc/testsuite/gcc.target/i386/pr54412-callee-byref-param.c new file mode 100644 index 000000000000..1eb1164ff9b7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr54412-callee-byref-param.c @@ -0,0 +1,27 @@ +/* PR target/54412 */ +/* { dg-do run { target { avx && { ! ia32 } } } } */ +/* { dg-skip-if "PR target/54412 exercises the Win64 ABI" { ! x86_64-*-mingw* } } */ +/* { dg-options "-O0 -mavx" } */ + +#include "avx-check.h" + +typedef float v8f __attribute__ ((vector_size (32), aligned (32))); + +static const v8f expected = { 1.0f, 2.0f, 3.0f, 4.0f, + 5.0f, 6.0f, 7.0f, 8.0f }; + +__attribute__ ((noinline)) static int +param_matches (v8f x) +{ + return __builtin_memcmp (&x, &expected, sizeof (x)) == 0; +} + +static void +avx_test (void) +{ + v8f x __attribute__ ((aligned (32))) + = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f }; + + if (!param_matches (x)) + __builtin_abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/pr54412-o2-by-value-cases.c b/gcc/testsuite/gcc.target/i386/pr54412-o2-by-value-cases.c new file mode 100644 index 000000000000..cdcca7b31f4c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr54412-o2-by-value-cases.c @@ -0,0 +1,104 @@ +/* PR target/54412 */ +/* { dg-do run { target { avx && { ! ia32 } } } } */ +/* { dg-skip-if "PR target/54412 exercises the Win64 ABI" { ! x86_64-*-mingw* } } */ +/* { dg-options "-O2 -mavx -std=gnu99" } */ + +#include "avx-check.h" +#include <immintrin.h> + +typedef double v4d __attribute__ ((vector_size (32), aligned (32))); +typedef char byte_vec __attribute__ ((vector_size (32))); + +struct avx_wrapper +{ + __m256 value; +}; + +#define PERM_LEFT_BVEC \ + (byte_vec) { 63, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, \ + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, \ + 29, 30 } + +__attribute__ ((noinline, noclone)) static v4d +roundtrip_v4d (v4d x) +{ + return x; +} + +__attribute__ ((noinline)) static __m256 +pass_m256 (__m256 x) +{ + return x; +} + +__attribute__ ((noinline)) static struct avx_wrapper +pass_wrapper (struct avx_wrapper x) +{ + return x; +} + +__attribute__ ((noinline)) static byte_vec +permute (byte_vec x, byte_vec y) +{ + return __builtin_shuffle (x, y, PERM_LEFT_BVEC); +} + +static void +test_v4d_roundtrip (void) +{ + v4d x = (v4d) { 1.0, 2.0, 3.0, 4.0 }; + v4d r = roundtrip_v4d (x); + v4d expected = (v4d) { 1.0, 2.0, 3.0, 4.0 }; + + if (__builtin_memcmp (&r, &expected, sizeof (r)) != 0) + __builtin_abort (); +} + +static void +test_m256_and_wrapper_by_value (void) +{ + __m256 m = _mm256_set_ps (8.0f, 7.0f, 6.0f, 5.0f, + 4.0f, 3.0f, 2.0f, 1.0f); + __m256 r = pass_m256 (m); + struct avx_wrapper x; + struct avx_wrapper y; + + x.value = _mm256_set_ps (16.0f, 15.0f, 14.0f, 13.0f, + 12.0f, 11.0f, 10.0f, 9.0f); + y = pass_wrapper (x); + + if (__builtin_memcmp (&r, &m, sizeof (r)) != 0) + __builtin_abort (); + + if (__builtin_memcmp (&y, &x, sizeof (y)) != 0) + __builtin_abort (); +} + +static void +test_two_arg_bytevec_shuffle (void) +{ + byte_vec x = { 1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32 }; + byte_vec y = { 11, 12, 13, 14, 15, 16, 17, 18, + 19, 110, 111, 112, 113, 114, 115, 116, + 117, 118, 119, 120, 121, 122, 123, 124, + 125, 126, 127, 88, 89, 90, 91, 92 }; + byte_vec expected = { 92, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; + byte_vec z = permute (x, y); + + if (__builtin_memcmp (&z, &expected, sizeof (z)) != 0) + __builtin_abort (); +} + +static void +avx_test (void) +{ + test_v4d_roundtrip (); + test_m256_and_wrapper_by_value (); + test_two_arg_bytevec_shuffle (); +} diff --git a/gcc/testsuite/gcc.target/i386/pr54412-sret-no-args.c b/gcc/testsuite/gcc.target/i386/pr54412-sret-no-args.c new file mode 100644 index 000000000000..af23e6a99a0b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr54412-sret-no-args.c @@ -0,0 +1,36 @@ +/* PR target/54412 */ +/* { dg-do run { target { avx && { ! ia32 } } } } */ +/* { dg-skip-if "PR target/54412 exercises the Win64 ABI" { ! x86_64-*-mingw* } } */ +/* { dg-options "-O2 -mavx -std=gnu99" } */ + +#include "avx-check.h" +#include <immintrin.h> + +struct avx_wrapper +{ + __m256 value; +}; + +__attribute__ ((noinline)) static struct avx_wrapper +make_wrapper (void) +{ + struct avx_wrapper x; + + x.value = _mm256_set_ps (8.0f, 7.0f, 6.0f, 5.0f, + 4.0f, 3.0f, 2.0f, 1.0f); + return x; +} + +static void +avx_test (void) +{ + struct avx_wrapper expected; + struct avx_wrapper r; + + expected.value = _mm256_set_ps (8.0f, 7.0f, 6.0f, 5.0f, + 4.0f, 3.0f, 2.0f, 1.0f); + r = make_wrapper (); + + if (__builtin_memcmp (&r, &expected, sizeof (r)) != 0) + __builtin_abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/pr54412-v4d-o0-aligned-locals.c b/gcc/testsuite/gcc.target/i386/pr54412-v4d-o0-aligned-locals.c new file mode 100644 index 000000000000..74675f53bcb5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr54412-v4d-o0-aligned-locals.c @@ -0,0 +1,27 @@ +/* PR target/54412 */ +/* { dg-do run { target { avx && { ! ia32 } } } } */ +/* { dg-skip-if "PR target/54412 exercises the Win64 ABI" { ! x86_64-*-mingw* } } */ +/* { dg-options "-O0 -mavx" } */ + +#include "avx-check.h" + +typedef double v4d __attribute__ ((vector_size (32))); + +__attribute__ ((noinline)) static v4d +f (v4d x) +{ + return x; +} + +static void +avx_test (void) +{ + v4d x __attribute__ ((aligned (32))) = { 1.0, 2.0, 3.0, 4.0 }; + v4d expected __attribute__ ((aligned (32))) = { 1.0, 2.0, 3.0, 4.0 }; + v4d r __attribute__ ((aligned (32))); + + r = f (x); + + if (__builtin_memcmp (&r, &expected, sizeof (r)) != 0) + __builtin_abort (); +}
