Hi! The following testcase is miscompiled on x86_64-linux. expand_compound_operation is called on (zero_extract:DI (mem/c:TI (reg/f:DI 16 argp) [3 i+0 S16 A128]) (const_int 16 [0x10]) (const_int 63 [0x3f])) so mode is DImode, inner_mode is TImode, pos 63, len 16 and modewidth 64.
A couple of lines above the problematic spot we have: if (modewidth >= pos + len) { tem = gen_lowpart (mode, XEXP (x, 0)); where the code uses gen_lowpart and then shift left/right to extract it in mode. But the guarding condition is false - 64 >= 63 + 16 and so we enter the next condition, where the code shifts XEXP (x, 0) right by pos and then adds AND. It does so incorrectly though. Given the modewidth < pos + len, inner_mode must be necessarily larger than mode and XEXP (x, 0) has the innermode, but it was calling simplify_shift_const with mode rather than inner_mode, which meant inconsistent arguments to simplify_shift_const and in this case made a DImode MEM shift out of it. The following patch fixes it, by doing the shift in inner_mode properly and then after the shift doing the lowpart subreg and masking already in mode. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2021-04-08 Jakub Jelinek <ja...@redhat.com> PR rtl-optimization/99905 * combine.c (expand_compound_operation): If pos + len > modewidth, perform the right shift by pos in inner_mode and then convert to mode, instead of trying to simplify a shift of rtx with inner_mode by pos as if it was a shift in mode. * gcc.target/i386/pr99905.c: New test. --- gcc/combine.c.jj 2021-01-04 10:25:39.127230495 +0100 +++ gcc/combine.c 2021-04-07 11:33:13.442626682 +0200 @@ -7409,11 +7409,15 @@ expand_compound_operation (rtx x) mode, tem, modewidth - len); } else if (unsignedp && len < HOST_BITS_PER_WIDE_INT) - tem = simplify_and_const_int (NULL_RTX, mode, - simplify_shift_const (NULL_RTX, LSHIFTRT, - mode, XEXP (x, 0), - pos), - (HOST_WIDE_INT_1U << len) - 1); + { + tem = simplify_shift_const (NULL_RTX, LSHIFTRT, inner_mode, + XEXP (x, 0), pos); + tem = gen_lowpart (mode, tem); + if (!tem || GET_CODE (tem) == CLOBBER) + return x; + tem = simplify_and_const_int (NULL_RTX, mode, tem, + (HOST_WIDE_INT_1U << len) - 1); + } else /* Any other cases we can't handle. */ return x; --- gcc/testsuite/gcc.target/i386/pr99905.c.jj 2021-04-07 11:42:40.954292535 +0200 +++ gcc/testsuite/gcc.target/i386/pr99905.c 2021-04-07 11:42:06.659675315 +0200 @@ -0,0 +1,33 @@ +/* PR rtl-optimization/99905 */ +/* { dg-do run { target int128 } } */ +/* { dg-options "-Os -mno-mmx -mno-sse" } */ + +typedef unsigned char U; +typedef unsigned char __attribute__((__vector_size__ (8))) A; +typedef unsigned char __attribute__((__vector_size__ (16))) B; +typedef unsigned char __attribute__((__vector_size__ (32))) C; +typedef unsigned int __attribute__((__vector_size__ (8))) D; +typedef unsigned long long __attribute__((__vector_size__ (8))) E; +typedef unsigned __int128 I; +typedef unsigned long long L; + +D gv; +I gi; + +L __attribute__((__noipa__)) +foo (int ua, int ub, int uc, int ud, E ue, I i) +{ + D d = (U) __builtin_bswap16 (i >> 63) + gv; + B y = ((union { C a; B b[2];}) (C){ }).b[0] + (B) gi; + A z = ((union { B a; A b[2];}) y).b[0] + (A) d; + return (L)z; +} + +int +main () +{ + L x = foo (0, 0, 0, 0, (E) { }, (I) 0x100 << 63); + if (x != 0x100000001) + __builtin_abort (); + return 0; +} Jakub