On Tue, Mar 14, 2023 at 5:09 PM Jakub Jelinek <ja...@redhat.com> wrote: > > Hi! > > In my PR107627 change I've missed one important case, which causes > miscompilation of f4 and f6 in the following tests. > > Combine matches there *concatsidi3_3 define_insn_and_split (as with all > other f* functions in those tests), and RA ends up with: > (insn 11 10 17 2 (set (reg:DI 0 ax [89]) > (ior:DI (ashift:DI (zero_extend:DI (mem:SI (plus:SI (mult:SI (reg:SI > 0 ax [94]) > (const_int 4 [0x4])) > (symbol_ref:SI ("arr") [flags 0x2] <var_decl > 0x7f4e7fe4ccf0 arr>)) [1 arr[ax_6(D)]+0 S4 A32])) > (const_int 32 [0x20])) > (zero_extend:DI (reg:SI 1 dx [95])))) "pr109109-6.c":24:49 681 > {*concatsidi3_3} > (nil)) > split_double_concat turned that into: > movl arr(,%eax,4), %edx > movl %edx, %eax > which is incorrect, because the first instruction overrides the input > %edx value that should be put into output %eax; the two insns can't be > swapped because the MEM's address uses %eax. > > The following patch fixes that case to emit > movl arr(,%eax,4), %eax > xchgl %edx, %eax > instead. > > Bootstrap/regtest on x86_64-linux and i686-linux pending, ok for trunk > if it passes on both? > > 2023-03-14 Jakub Jelinek <ja...@redhat.com> > > PR target/109109 > * config/i386/i386-expand.cc (split_double_concat): Fix splitting > when lo is equal to dhi and hi is a MEM which uses dlo register. > > * gcc.target/i386/pr109109-1.c: New test. > * gcc.target/i386/pr109109-2.c: New test.
OK. Thanks, Uros. > > --- gcc/config/i386/i386-expand.cc.jj 2023-02-18 12:39:58.334768946 +0100 > +++ gcc/config/i386/i386-expand.cc 2023-03-14 15:07:38.672919652 +0100 > @@ -197,9 +197,20 @@ split_double_concat (machine_mode mode, > { > /* In this case, code below would first emit_move_insn (dlo, lo) > and then emit_move_insn (dhi, hi). But the former would > - invalidate hi's address. Load into dhi first. */ > - emit_move_insn (dhi, hi); > - hi = dhi; > + invalidate hi's address. */ > + if (rtx_equal_p (dhi, lo)) > + { > + /* We can't load into dhi first, so load into dlo > + first and we'll swap. */ > + emit_move_insn (dlo, hi); > + hi = dlo; > + } > + else > + { > + /* Load into dhi first. */ > + emit_move_insn (dhi, hi); > + hi = dhi; > + } > } > if (!rtx_equal_p (dlo, hi)) > { > --- gcc/testsuite/gcc.target/i386/pr109109-1.c.jj 2023-03-14 > 15:51:35.104926863 +0100 > +++ gcc/testsuite/gcc.target/i386/pr109109-1.c 2023-03-14 15:51:16.715191961 > +0100 > @@ -0,0 +1,139 @@ > +/* PR target/109109 */ > +/* { dg-do run { target ia32 } } */ > +/* { dg-options "-O2" } */ > + > +unsigned int arr[64]; > + > +__attribute__((noipa, regparm (2))) unsigned long long > +f1 (unsigned int ax, unsigned int dx) > +{ > + return (((unsigned long long) arr[ax]) << 32) | ax; > +} > + > +__attribute__((noipa, regparm (2))) unsigned long long > +f2 (unsigned int ax, unsigned int dx) > +{ > + return (((unsigned long long) arr[dx]) << 32) | ax; > +} > + > +__attribute__((noipa, regparm (2))) unsigned long long > +f3 (unsigned int ax, unsigned int dx) > +{ > + return (((unsigned long long) ((unsigned int *) (((char *) arr) + > ax))[dx]) << 32) | ax; > +} > + > +__attribute__((noipa, regparm (2))) unsigned long long > +f4 (unsigned int ax, unsigned int dx) > +{ > + return (((unsigned long long) arr[ax]) << 32) | dx; > +} > + > +__attribute__((noipa, regparm (2))) unsigned long long > +f5 (unsigned int ax, unsigned int dx) > +{ > + return (((unsigned long long) arr[dx]) << 32) | dx; > +} > + > +__attribute__((noipa, regparm (2))) unsigned long long > +f6 (unsigned int ax, unsigned int dx) > +{ > + return (((unsigned long long) ((unsigned int *) (((char *) arr) + > ax))[dx]) << 32) | dx; > +} > + > +__attribute__((noipa, regparm (3))) unsigned long long > +f7 (unsigned int ax, unsigned int dx, unsigned int cx) > +{ > + return (((unsigned long long) arr[ax]) << 32) | cx; > +} > + > +__attribute__((noipa, regparm (3))) unsigned long long > +f8 (unsigned int ax, unsigned int dx, unsigned int cx) > +{ > + return (((unsigned long long) arr[dx]) << 32) | cx; > +} > + > +__attribute__((noipa, regparm (3))) unsigned long long > +f9 (unsigned int ax, unsigned int dx, unsigned int cx) > +{ > + return (((unsigned long long) ((unsigned int *) (((char *) arr) + > ax))[dx]) << 32) | cx; > +} > + > +__attribute__((noipa, regparm (2))) unsigned long long > +f10 (unsigned int ax, unsigned int dx) > +{ > + return (((unsigned long long) ax) << 32) | arr[ax]; > +} > + > +__attribute__((noipa, regparm (2))) unsigned long long > +f11 (unsigned int ax, unsigned int dx) > +{ > + return (((unsigned long long) ax) << 32) | arr[dx]; > +} > + > +__attribute__((noipa, regparm (2))) unsigned long long > +f12 (unsigned int ax, unsigned int dx) > +{ > + return (((unsigned long long) ax) << 32) | ((unsigned int *) (((char *) > arr) + ax))[dx]; > +} > + > +__attribute__((noipa, regparm (2))) unsigned long long > +f13 (unsigned int ax, unsigned int dx) > +{ > + return (((unsigned long long) dx) << 32) | arr[ax]; > +} > + > +__attribute__((noipa, regparm (2))) unsigned long long > +f14 (unsigned int ax, unsigned int dx) > +{ > + return (((unsigned long long) dx) << 32) | arr[dx]; > +} > + > +__attribute__((noipa, regparm (2))) unsigned long long > +f15 (unsigned int ax, unsigned int dx) > +{ > + return (((unsigned long long) dx) << 32) | ((unsigned int *) (((char *) > arr) + ax))[dx]; > +} > + > +__attribute__((noipa, regparm (3))) unsigned long long > +f16 (unsigned int ax, unsigned int dx, unsigned int cx) > +{ > + return (((unsigned long long) cx) << 32) | arr[ax]; > +} > + > +__attribute__((noipa, regparm (3))) unsigned long long > +f17 (unsigned int ax, unsigned int dx, unsigned int cx) > +{ > + return (((unsigned long long) cx) << 32) | arr[dx]; > +} > + > +__attribute__((noipa, regparm (3))) unsigned long long > +f18 (unsigned int ax, unsigned int dx, unsigned int cx) > +{ > + return (((unsigned long long) cx) << 32) | ((unsigned int *) (((char *) > arr) + ax))[dx]; > +} > + > +int > +main () > +{ > + for (int i = 0; i < 64; i++) > + arr[i] = 64 + i; > +#define CHECK_EQ(x, y) do { if (x != y) __builtin_abort (); } while (0) > + CHECK_EQ (f1 (8, 9), 0x4800000008ULL); > + CHECK_EQ (f2 (8, 9), 0x4900000008ULL); > + CHECK_EQ (f3 (8, 9), 0x4b00000008ULL); > + CHECK_EQ (f4 (8, 9), 0x4800000009ULL); > + CHECK_EQ (f5 (8, 9), 0x4900000009ULL); > + CHECK_EQ (f6 (8, 9), 0x4b00000009ULL); > + CHECK_EQ (f7 (8, 9, 10), 0x480000000aULL); > + CHECK_EQ (f8 (8, 9, 10), 0x490000000aULL); > + CHECK_EQ (f9 (8, 9, 10), 0x4b0000000aULL); > + CHECK_EQ (f10 (8, 9), 0x800000048ULL); > + CHECK_EQ (f11 (8, 9), 0x800000049ULL); > + CHECK_EQ (f12 (8, 9), 0x80000004bULL); > + CHECK_EQ (f13 (8, 9), 0x900000048ULL); > + CHECK_EQ (f14 (8, 9), 0x900000049ULL); > + CHECK_EQ (f15 (8, 9), 0x90000004bULL); > + CHECK_EQ (f16 (8, 9, 10), 0xa00000048ULL); > + CHECK_EQ (f17 (8, 9, 10), 0xa00000049ULL); > + CHECK_EQ (f18 (8, 9, 10), 0xa0000004bULL); > +} > --- gcc/testsuite/gcc.target/i386/pr109109-2.c.jj 2023-03-14 > 15:53:08.619578782 +0100 > +++ gcc/testsuite/gcc.target/i386/pr109109-2.c 2023-03-14 16:05:22.675995934 > +0100 > @@ -0,0 +1,175 @@ > +/* PR target/109109 */ > +/* { dg-do run { target lp64 } } */ > +/* { dg-options "-O2" } */ > + > +unsigned long arr[64]; > + > +__attribute__((noipa)) unsigned __int128 > +f1 (unsigned long di, unsigned long si, unsigned long dx) > +{ > + unsigned long ax; > + asm ("" : "=a" (ax) : "0" (di)); > + return (((unsigned __int128) arr[ax]) << 64) | ax; > +} > + > +__attribute__((noipa)) unsigned __int128 > +f2 (unsigned long di, unsigned long si, unsigned long dx) > +{ > + unsigned long ax; > + asm ("" : "=a" (ax) : "0" (di)); > + return (((unsigned __int128) arr[dx]) << 64) | ax; > +} > + > +__attribute__((noipa)) unsigned __int128 > +f3 (unsigned long di, unsigned long si, unsigned long dx) > +{ > + unsigned long ax; > + asm ("" : "=a" (ax) : "0" (di)); > + return (((unsigned __int128) ((unsigned long *) (((char *) arr) + > ax))[dx]) << 64) | ax; > +} > + > +__attribute__((noipa)) unsigned __int128 > +f4 (unsigned long di, unsigned long si, unsigned long dx) > +{ > + unsigned long ax; > + asm ("" : "=a" (ax) : "0" (di)); > + return (((unsigned __int128) arr[ax]) << 64) | dx; > +} > + > +__attribute__((noipa)) unsigned __int128 > +f5 (unsigned long di, unsigned long si, unsigned long dx) > +{ > + unsigned long ax; > + asm ("" : "=a" (ax) : "0" (di)); > + return (((unsigned __int128) arr[dx]) << 64) | dx; > +} > + > +__attribute__((noipa)) unsigned __int128 > +f6 (unsigned long di, unsigned long si, unsigned long dx) > +{ > + unsigned long ax; > + asm ("" : "=a" (ax) : "0" (di)); > + return (((unsigned __int128) ((unsigned long *) (((char *) arr) + > ax))[dx]) << 64) | dx; > +} > + > +__attribute__((noipa)) unsigned __int128 > +f7 (unsigned long di, unsigned long si, unsigned long dx, unsigned long cx) > +{ > + unsigned long ax; > + asm ("" : "=a" (ax) : "0" (di)); > + return (((unsigned __int128) arr[ax]) << 64) | cx; > +} > + > +__attribute__((noipa)) unsigned __int128 > +f8 (unsigned long di, unsigned long si, unsigned long dx, unsigned long cx) > +{ > + unsigned long ax; > + asm ("" : "=a" (ax) : "0" (di)); > + return (((unsigned __int128) arr[dx]) << 64) | cx; > +} > + > +__attribute__((noipa)) unsigned __int128 > +f9 (unsigned long di, unsigned long si, unsigned long dx, unsigned long cx) > +{ > + unsigned long ax; > + asm ("" : "=a" (ax) : "0" (di)); > + return (((unsigned __int128) ((unsigned long *) (((char *) arr) + > ax))[dx]) << 64) | cx; > +} > + > +__attribute__((noipa)) unsigned __int128 > +f10 (unsigned long di, unsigned long si, unsigned long dx) > +{ > + unsigned long ax; > + asm ("" : "=a" (ax) : "0" (di)); > + return (((unsigned __int128) ax) << 64) | arr[ax]; > +} > + > +__attribute__((noipa)) unsigned __int128 > +f11 (unsigned long di, unsigned long si, unsigned long dx) > +{ > + unsigned long ax; > + asm ("" : "=a" (ax) : "0" (di)); > + return (((unsigned __int128) ax) << 64) | arr[dx]; > +} > + > +__attribute__((noipa)) unsigned __int128 > +f12 (unsigned long di, unsigned long si, unsigned long dx) > +{ > + unsigned long ax; > + asm ("" : "=a" (ax) : "0" (di)); > + return (((unsigned __int128) ax) << 64) | ((unsigned long *) (((char *) > arr) + ax))[dx]; > +} > + > +__attribute__((noipa)) unsigned __int128 > +f13 (unsigned long di, unsigned long si, unsigned long dx) > +{ > + unsigned long ax; > + asm ("" : "=a" (ax) : "0" (di)); > + return (((unsigned __int128) dx) << 64) | arr[ax]; > +} > + > +__attribute__((noipa)) unsigned __int128 > +f14 (unsigned long di, unsigned long si, unsigned long dx) > +{ > + unsigned long ax; > + asm ("" : "=a" (ax) : "0" (di)); > + return (((unsigned __int128) dx) << 64) | arr[dx]; > +} > + > +__attribute__((noipa)) unsigned __int128 > +f15 (unsigned long di, unsigned long si, unsigned long dx) > +{ > + unsigned long ax; > + asm ("" : "=a" (ax) : "0" (di)); > + return (((unsigned __int128) dx) << 64) | ((unsigned long *) (((char *) > arr) + ax))[dx]; > +} > + > +__attribute__((noipa)) unsigned __int128 > +f16 (unsigned long di, unsigned long si, unsigned long dx, unsigned long cx) > +{ > + unsigned long ax; > + asm ("" : "=a" (ax) : "0" (di)); > + return (((unsigned __int128) cx) << 64) | arr[ax]; > +} > + > +__attribute__((noipa)) unsigned __int128 > +f17 (unsigned long di, unsigned long si, unsigned long dx, unsigned long cx) > +{ > + unsigned long ax; > + asm ("" : "=a" (ax) : "0" (di)); > + return (((unsigned __int128) cx) << 64) | arr[dx]; > +} > + > +__attribute__((noipa)) unsigned __int128 > +f18 (unsigned long di, unsigned long si, unsigned long dx, unsigned long cx) > +{ > + unsigned long ax; > + asm ("" : "=a" (ax) : "0" (di)); > + return (((unsigned __int128) cx) << 64) | ((unsigned long *) (((char *) > arr) + ax))[dx]; > +} > + > +int > +main () > +{ > + for (int i = 0; i < 64; i++) > + arr[i] = 64 + i; > +#define CHECK_EQ(x, y1, y2) do { unsigned __int128 y = y1; y <<= 64; y += > y2; if (x != y) __builtin_abort (); } while (0) > + CHECK_EQ (f1 (8, 0, 9), 0x48, 0x8); > + CHECK_EQ (f2 (8, 0, 9), 0x49, 0x8); > + CHECK_EQ (f3 (8, 0, 9), 0x4a, 0x8); > + CHECK_EQ (f4 (8, 0, 9), 0x48, 0x9); > + CHECK_EQ (f5 (8, 0, 9), 0x49, 0x9); > + CHECK_EQ (f6 (8, 0, 9), 0x4a, 0x9); > + CHECK_EQ (f7 (8, 0, 9, 10), 0x48, 0xa); > + CHECK_EQ (f8 (8, 0, 9, 10), 0x49, 0xa); > + CHECK_EQ (f9 (8, 0, 9, 10), 0x4a, 0xa); > + CHECK_EQ (f10 (8, 0, 9), 0x8, 0x48); > + CHECK_EQ (f11 (8, 0, 9), 0x8, 0x49); > + CHECK_EQ (f12 (8, 0, 9), 0x8, 0x4a); > + CHECK_EQ (f13 (8, 0, 9), 0x9, 0x48); > + CHECK_EQ (f14 (8, 0, 9), 0x9, 0x49); > + CHECK_EQ (f15 (8, 0, 9), 0x9, 0x4a); > + CHECK_EQ (f16 (8, 0, 9, 10), 0xa, 0x48); > + CHECK_EQ (f17 (8, 0, 9, 10), 0xa, 0x49); > + CHECK_EQ (f18 (8, 0, 9, 10), 0xa, 0x4a); > +} > > Jakub >