On Fri, 7 Jun 2019 at 18:26, Richard Sandiford <richard.sandif...@arm.com> wrote: > > Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> writes: > > On Thu, 6 Jun 2019 at 16:54, Richard Sandiford > > <richard.sandif...@arm.com> wrote: > >> > >> Szabolcs Nagy <szabolcs.n...@arm.com> writes: > >> > On 03/06/2019 08:26, Prathamesh Kulkarni wrote: > >> >> +++ b/gcc/testsuite/gcc.target/aarch64/sve/init_8.c > >> >> @@ -0,0 +1,32 @@ > >> >> +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ > >> >> +/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 > >> >> --save-temps" } */ > >> >> + > >> >> +/* Case 5.2: Interleaved elements and constants. */ > >> >> + > >> >> +#include <stdint.h> > >> >> + > >> >> +typedef int32_t vnx4si __attribute__((vector_size (32))); > >> >> + > >> >> +__attribute__((noipa)) > >> >> +vnx4si foo(int a, int b, int c, int d) > >> >> +{ > >> >> + return (vnx4si) { a, 1, b, 2, c, 3, d, 4 }; > >> >> +} > >> >> + > >> >> +/* > >> >> +foo: > >> >> +.LFB0: > >> >> + .cfi_startproc > >> >> + ptrue p0.s, vl8 > >> >> + mov z0.s, w3 > >> >> + adrp x3, .LANCHOR0 > >> >> + insr z0.s, w2 > >> >> + add x3, x3, :lo12:.LANCHOR0 > >> >> + insr z0.s, w1 > >> >> + ld1w z1.s, p0/z, [x3] > >> >> + insr z0.s, w0 > >> >> + zip1 z0.s, z0.s, z1.s > >> >> + ret > >> >> +*/ > >> >> + > >> >> +/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), > >> >> w3\n\tadrp\t(x[0-9]+), \.LANCHOR0\n\tinsr\t\1, w2\n\tadd\t\2, \2, > >> >> :lo12:\.LANCHOR0\n\tinsr\t\1, w1\n\tld1w\t(z[0-9]+\.s), p[0-9]+/z, > >> >> \[\2\]\n\tinsr\t\1, w0\n\tzip1\t\1, \1, \3} } } */ > >> > > >> > this fails with tiny model when i'm testing aarch64-none-elf > >> > > >> > $ make check-c > >> > 'RUNTESTFLAGS=--target_board=aarch64-elf-qemu{-mcmodel=tiny} > >> > aarch64-sve.exp=init_8.c' > >> > ... > >> > FAIL: gcc.target/aarch64/sve/init_8.c -march=armv8.2-a+sve > >> > scan-assembler \\tmov\\t(z[0-9]+\\.s), w3\\n\\tadrp\\t(x[0-9]+), > >> > \\.LANCHOR0\\n\\tinsr\\t\\1, w2\\n\\tadd\\t\\2, \\2, > >> > :lo12:\\.LANCHOR0\\n\\tinsr\\t\\1, w1\\n\\tld1w\\t(z[0-9]+\\.s), > >> > p[0-9]+/z, > >> > \\[\\2\\]\\n\\tinsr\\t\\1, w0\\n\\tzip1\\t\\1, \\1, \\3 > >> > > >> > i think you need conditional scan asm for { target aarch64_small } > >> > and { target aarch64_tiny } or just skip the test for tiny, > >> > >> Maybe we should remove the address calculation and replace the ld1w > >> address with \[[^]]*\]. All that really matters for this test is that > >> the vector is loaded from memory. > >> > >> > but even then matching exact register name and instruction scheduling > >> > seems fragile. > >> > >> The only hard-coded register names are the parameters, which are > >> guaranteed by the ABI. Testing for those should be fine. > >> > >> The dg-options pass -fno-schedule-insns, but I guess they should > >> also pass -fno-schedule-insns2. Or maybe just use -O instead. > >> We can always revisit this later if even that isn't enough to make > >> the order stable. > > Thanks for the suggestions. Passing -fno-schedule-insns2 does seem to > > make the order stable. > > For init_1.c to init_4.c there were no intervening instructions, and > > for remaining tests, the patch passes -fno-schedule-insns2 > > and adjusts dg-scan accordingly. I verified the tests pass with > > -mcmodel=tiny. > > I think we should use consistent options for all the test though. > So either we should add -fno-schedule-insns2 to all of them, > or we should switch to -O. TBH -O seems easier :-) (I checked > that all tests do still pass with -O.) > > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_10.c > > b/gcc/testsuite/gcc.target/aarch64/sve/init_10.c > > index 9d6e2dfc876..08437e5d8f1 100644 > > --- a/gcc/testsuite/gcc.target/aarch64/sve/init_10.c > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/init_10.c > > @@ -1,5 +1,5 @@ > > /* { dg-do assemble { target aarch64_asm_sve_ok } } */ > > -/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 > > --save-temps" } */ > > +/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 > > -msve-vector-bits=256 --save-temps" } */ > > > > /* Case 5.4: Interleaved repeating elements and non-repeating elements. */ > > > > @@ -17,13 +17,14 @@ vnx4si foo(int a, int b, int c, int f) > > foo: > > .LFB0: > > .cfi_startproc > > - mov z0.s, w2 > > mov z1.s, w3 > > + mov z0.s, w2 > > insr z0.s, w1 > > - ptrue p0.s, vl8 > > insr z0.s, w0 > > zip1 z0.s, z0.s, z1.s > > + ptrue p0.s, vl8 > > + st1w z0.s, p0, [x8] > > ret > > */ > > > > -/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), > > w3\n\tmov\t(z[0-9]+\.s), w2\n.*\n\tinsr\t\2, w1\n\tinsr\t\2, > > w0\n\tzip1\t\2, \2, \1} } } */ > > +/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), > > w3\n\tmov\t(z[0-9]+\.s), w2\n\tinsr\t\2, w1\n\tinsr\t\2, w0\n\tzip1\t\2, > > \2, \1} } } */ > > You're reintroducing the st1w as part of the asms. We should either > do that for all the tests or leave it out. Oops, sorry about that. Attached patch removes st1w and passes -O for all tests in the attached patch. OK to commit ?
Thanks, Prathamesh > > Thanks, > Richard
2019-06-07 Prathamesh Kulkarni <prathamesh.kulka...@linaro.org> * gcc.target/aarch64/sve/init_1.c: Remove options -O2 -fno-schedule-insns and instead pass -O. Update assembly in comments. * gcc.target/aarch64/sve/init_2.c: Likewise. * gcc.target/aarch64/sve/init_3.c: Likewise. * gcc.target/aarch64/sve/init_4.c: Likewise. * gcc.target/aarch64/sve/init_5.c: Likewise and additionally adjust dg-scan. * gcc.target/aarch64/sve/init_6.c: Likewise. * gcc.target/aarch64/sve/init_7.c: Likewise. * gcc.target/aarch64/sve/init_8.c: Likewise. * gcc.target/aarch64/sve/init_9.c: Likewise. * gcc.target/aarch64/sve/init_10.c: Likewise. * gcc.target/aarch64/sve/init_11.c: Likewise. * gcc.target/aarch64/sve/init_12.c: Likewise. diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_1.c b/gcc/testsuite/gcc.target/aarch64/sve/init_1.c index 5c14b603f46..4f18088f3b0 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/init_1.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/init_1.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */ +/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ /* Case 1.1: Trailing constants with stepped sequence. */ @@ -17,10 +17,10 @@ vnx4si foo(int a, int b) foo: .LFB0: .cfi_startproc - ptrue p0.s, vl8 index z0.s, #1, #1 insr z0.s, w1 insr z0.s, w0 + ptrue p0.s, vl8 ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_10.c b/gcc/testsuite/gcc.target/aarch64/sve/init_10.c index 9d6e2dfc876..1ee1db723e6 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/init_10.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/init_10.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */ +/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ /* Case 5.4: Interleaved repeating elements and non-repeating elements. */ @@ -17,13 +17,13 @@ vnx4si foo(int a, int b, int c, int f) foo: .LFB0: .cfi_startproc - mov z0.s, w2 mov z1.s, w3 + mov z0.s, w2 insr z0.s, w1 - ptrue p0.s, vl8 insr z0.s, w0 zip1 z0.s, z0.s, z1.s + ptrue p0.s, vl8 ret */ -/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w3\n\tmov\t(z[0-9]+\.s), w2\n.*\n\tinsr\t\2, w1\n\tinsr\t\2, w0\n\tzip1\t\2, \2, \1} } } */ +/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w3\n\tmov\t(z[0-9]+\.s), w2\n\tinsr\t\2, w1\n\tinsr\t\2, w0\n\tzip1\t\2, \2, \1} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_11.c b/gcc/testsuite/gcc.target/aarch64/sve/init_11.c index e50cd54ef13..0b3c4a51198 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/init_11.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/init_11.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */ +/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ /* Case 5.5: Interleaved repeating elements and trailing same elements. */ @@ -18,11 +18,11 @@ foo: .LFB0: .cfi_startproc mov z0.s, w1 - mov z1.s, w2 insr z0.s, w0 - ptrue p0.s, vl8 + mov z1.s, w2 zip1 z0.s, z0.s, z1.s + ptrue p0.s, vl8 ret */ -/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w1\n\tmov\t(z[0-9]+\.s), w2\n\tinsr\t\1, w0\n.*\tzip1\t\1, \1, \2} } } */ +/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w1\n\tinsr\t\1, w0\n\tmov\t(z[0-9]+\.s), w2\n\tzip1\t\1, \1, \2} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_12.c b/gcc/testsuite/gcc.target/aarch64/sve/init_12.c index 21d9e764360..2473a5ecb80 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/init_12.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/init_12.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */ +/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ /* Case 5.5: Interleaved repeating elements and trailing same elements. */ @@ -17,14 +17,14 @@ vnx4si foo(int a, int b, int f) foo: .LFB0: .cfi_startproc - mov z0.s, w0 mov z1.s, w2 + mov z0.s, w0 insr z0.s, w1 - ptrue p0.s, vl8 insr z0.s, w1 insr z0.s, w1 zip1 z0.s, z0.s, z1.s + ptrue p0.s, vl8 ret */ -/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w2\n\tmov\t(z[0-9]+\.s), w0\n.*\n\tinsr\t\2, w1\n\tinsr\t\2, w1\n\tinsr\t\2, w1\n\tzip1\t\2, \2, \1} } } */ +/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w2\n\tmov\t(z[0-9]+\.s), w0\n\tinsr\t\2, w1\n\tinsr\t\2, w1\n\tinsr\t\2, w1\n\tzip1\t\2, \2, \1} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_2.c b/gcc/testsuite/gcc.target/aarch64/sve/init_2.c index a8b2a25b325..5b4ba105af2 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/init_2.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/init_2.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */ +/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ /* Case 1.2: Trailing constants with repeating sequence. */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_3.c b/gcc/testsuite/gcc.target/aarch64/sve/init_3.c index 6b000b887ba..62f31b75efd 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/init_3.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/init_3.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */ +/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ /* Case 2.1: Leading constants with stepped sequence. */ @@ -17,11 +17,11 @@ vnx4si foo(int a, int b) foo: .LFB0: .cfi_startproc - ptrue p0.s, vl8 index z0.s, #6, #-1 insr z0.s, w0 insr z0.s, w1 rev z0.s, z0.s + ptrue p0.s, vl8 ret */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_4.c b/gcc/testsuite/gcc.target/aarch64/sve/init_4.c index 619274928e4..94484b1a4e6 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/init_4.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/init_4.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */ +/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ /* Case 2.2: Leading constants with stepped sequence. */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_5.c b/gcc/testsuite/gcc.target/aarch64/sve/init_5.c index e7fbdd1a2aa..0a0e8ebd1fe 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/init_5.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/init_5.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */ +/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ /* Case 3: Trailing same element. */ @@ -18,10 +18,10 @@ foo: .LFB0: .cfi_startproc mov z0.s, w2 - ptrue p0.s, vl8 insr z0.s, w1 insr z0.s, w0 + ptrue p0.s, vl8 ret */ -/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w2\n.*\tinsr\t\1, w1\n\tinsr\t\1, w0} } } */ +/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w2\n\tinsr\t\1, w1\n\tinsr\t\1, w0} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_6.c b/gcc/testsuite/gcc.target/aarch64/sve/init_6.c index f6f3da5958d..10eca3a9001 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/init_6.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/init_6.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */ +/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ /* Case 3: Trailing same element. */ @@ -18,11 +18,11 @@ foo: .LFB0: .cfi_startproc mov z0.s, w2 - ptrue p0.s, vl8 insr z0.s, w1 insr z0.s, w0 rev z0.s, z0.s + ptrue p0.s, vl8 ret */ -/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w2\n.*\tinsr\t\1, w1\n\tinsr\t\1, w0\n\trev\t\1, \1} } } */ +/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w2\n\tinsr\t\1, w1\n\tinsr\t\1, w0\n\trev\t\1, \1} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_7.c b/gcc/testsuite/gcc.target/aarch64/sve/init_7.c index e3104a35f13..d83fa9c08f2 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/init_7.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/init_7.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */ +/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ /* Case 5.1: All elements. */ @@ -18,7 +18,6 @@ foo: .LFB0: .cfi_startproc mov z0.s, w7 - ptrue p0.s, vl8 insr z0.s, w6 insr z0.s, w5 insr z0.s, w4 @@ -26,7 +25,8 @@ foo: insr z0.s, w2 insr z0.s, w1 insr z0.s, w0 + ptrue p0.s, vl8 ret */ -/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w7\n.*\tinsr\t\1, w6\n\tinsr\t\1, w5\n\tinsr\t\1, w4\n\tinsr\t\1, w3\n\tinsr\t\1, w2\n\tinsr\t\1, w1\n\tinsr\t\1, w0} } } */ +/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w7\n\tinsr\t\1, w6\n\tinsr\t\1, w5\n\tinsr\t\1, w4\n\tinsr\t\1, w3\n\tinsr\t\1, w2\n\tinsr\t\1, w1\n\tinsr\t\1, w0} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_8.c b/gcc/testsuite/gcc.target/aarch64/sve/init_8.c index 7ff3e0849cc..73f7aba3df3 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/init_8.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/init_8.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */ +/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ /* Case 5.2: Interleaved elements and constants. */ @@ -18,15 +18,15 @@ foo: .LFB0: .cfi_startproc ptrue p0.s, vl8 + adrp x4, .LANCHOR0 + add x4, x4, :lo12:.LANCHOR0 + ld1w z1.s, p0/z, [x4] mov z0.s, w3 - adrp x3, .LANCHOR0 insr z0.s, w2 - add x3, x3, :lo12:.LANCHOR0 insr z0.s, w1 - ld1w z1.s, p0/z, [x3] insr z0.s, w0 zip1 z0.s, z0.s, z1.s ret */ -/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w3\n\tadrp\t(x[0-9]+), \.LANCHOR0\n\tinsr\t\1, w2\n\tadd\t\2, \2, :lo12:\.LANCHOR0\n\tinsr\t\1, w1\n\tld1w\t(z[0-9]+\.s), p[0-9]+/z, \[\2\]\n\tinsr\t\1, w0\n\tzip1\t\1, \1, \3} } } */ +/* { dg-final { scan-assembler {\tld1w\t(z[0-9]+\.s), p[0-9]+/z, \[x[0-9]+\]\n\tmov\t(z[0-9]+\.s), w3\n\tinsr\t\2, w2\n\tinsr\t\2, w1\n\tinsr\t\2, w0\n\tzip1\t\2, \2, \1} } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve/init_9.c b/gcc/testsuite/gcc.target/aarch64/sve/init_9.c index 4d3c59b3bf8..668b4efdbc5 100644 --- a/gcc/testsuite/gcc.target/aarch64/sve/init_9.c +++ b/gcc/testsuite/gcc.target/aarch64/sve/init_9.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target aarch64_asm_sve_ok } } */ -/* { dg-options "-O2 -fno-schedule-insns -msve-vector-bits=256 --save-temps" } */ +/* { dg-options "-O -msve-vector-bits=256 --save-temps" } */ /* Case 5.3: Repeated elements. */ @@ -19,9 +19,9 @@ foo: .cfi_startproc mov z0.s, w0 mov z1.s, w1 - ptrue p0.s, vl8 zip1 z0.s, z0.s, z1.s + ptrue p0.s, vl8 ret */ -/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w0\n\tmov\t(z[0-9]+\.s), w1\n.*\tzip1\t\1, \1, \2} } } */ +/* { dg-final { scan-assembler {\tmov\t(z[0-9]+\.s), w0\n\tmov\t(z[0-9]+\.s), w1\n\tzip1\t\1, \1, \2} } } */