Hi! The following testcases are miscompiled on little endian targets. The bug occurs if we are merging bitfield stores, there is a signed bitfield with bitlen multiple of BITS_PER_UNIT but not equal to the bitsize of the corresponding mode, the bitfield doesn't start on multiple of BITS_PER_UNIT, we store negative constant to it and store to field overlapping 7 bits above this bitfield is done before store to this bitfield.
In particular in the testcases, we have bitfield with bitpos 19, bitlen 24 and store value -5 to it. That means we use byte_size of 5 (SImode bytesize + 1 extra byte for shifting), and we end up with 0xfb, 0xff, 0xff, 0xff, 0x00 bytes in tmpbuf. Next, we find out that there is padding of 1 byte, so decrement byte_size to 4 (again, real byte size of the non-shifted bitfield is 3, but 1 extra byte for shifting), but as bitlen is a multiple of BITS_PER_UNIT, we don't clear anything after those 0xfb, 0xff, 0xff bytes. And then shift_bytes_in_array just shifts all bytes left and we end up with 0xd8, 0xff, 0xff, 0xff when we want 0xd8, 0xff, 0xff, 0x07. Finally these bytes are ored into the real buffer where we've previously cleared the bitrange (which is why having 0xff in the last byte is harmful). Now, if bitlen is not a multiple of BITS_PER_UNIT, we already clear that extra byte plus some bits: - else - clear_bit_region (tmpbuf, bitlen, - byte_size * BITS_PER_UNIT - bitlen); Here byte_size is still the actual byte size + 1 and thus say if instead of bitlen 24 we had bitlen 23, we would have cleared 0xfb, 0xff, 0xff, 0xff, 0x00 tmpbuf to 0xfb, 0xff, 0x7f, 0x00, 0x00. For big endian, my understanding is that we also rely on tmpbuf[byte_size - 1] being 0, but we should not suffer from this problem, we clear it first everywhere, then if (native_encode_expr (expr, tmpbuf, byte_size, 0) == 0) should really just write byte_size - 1 bytes due to the mode (though, I'd say it would be much clearer if we called if (native_encode_expr (expr, tmpbuf, byte_size - 1, 0) == 0) because that is what we want (ok to make this change?). Then the padding adjustment for big-endian is actually tmpbuf += padding, so we move the start of the buffer, and for obvious reason don't want to access any bytes before tmpbuf during shifting. The following patch fixes that, bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk (without or with the above native_encode_expr call change)? Other options would be to do there tmpbuf[byte_size - 1] = '\0'; unconditionally (we rely on it everywhere, just in other cases it should be already cleared), or do such clearing inside of the two shift_bytes_in_array* functions. 2017-02-28 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/79737 * gimple-ssa-store-merging.c (encode_tree_to_bitpos): If bitlen is a multiple of BITS_PER_UNIT and !BYTES_BIG_ENDIAN, clear tmpbuf[byte_size - 1]. Formatting fix. (shift_bytes_in_array_right): Formatting fix. * gcc.c-torture/execute/pr79737-1.c: New test. * gcc.c-torture/execute/pr79737-2.c: New test. --- gcc/gimple-ssa-store-merging.c.jj 2017-01-01 12:45:38.000000000 +0100 +++ gcc/gimple-ssa-store-merging.c 2017-02-28 10:34:05.848174576 +0100 @@ -253,9 +253,9 @@ shift_bytes_in_array_right (unsigned cha unsigned prev_carry_over = carry_over; carry_over = ptr[i] & carry_mask; - carry_over <<= (unsigned char) BITS_PER_UNIT - amnt; - ptr[i] >>= amnt; - ptr[i] |= prev_carry_over; + carry_over <<= (unsigned char) BITS_PER_UNIT - amnt; + ptr[i] >>= amnt; + ptr[i] |= prev_carry_over; } } @@ -352,8 +352,9 @@ encode_tree_to_bitpos (tree expr, unsign { unsigned int first_byte = bitpos / BITS_PER_UNIT; tree tmp_int = expr; - bool sub_byte_op_p = (bitlen % BITS_PER_UNIT) || (bitpos % BITS_PER_UNIT) - || mode_for_size (bitlen, MODE_INT, 0) == BLKmode; + bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT) + || (bitpos % BITS_PER_UNIT) + || mode_for_size (bitlen, MODE_INT, 0) == BLKmode); if (!sub_byte_op_p) return (native_encode_expr (tmp_int, ptr + first_byte, total_bytes, 0) @@ -418,25 +419,27 @@ encode_tree_to_bitpos (tree expr, unsign contain a sign bit due to sign-extension). */ unsigned int padding = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1; - if (padding != 0 - || bitlen % BITS_PER_UNIT != 0) + /* On big-endian the padding is at the 'front' so just skip the initial + bytes. */ + if (BYTES_BIG_ENDIAN) + tmpbuf += padding; + + byte_size -= padding; + + if (bitlen % BITS_PER_UNIT != 0) { - /* On big-endian the padding is at the 'front' so just skip the initial - bytes. */ if (BYTES_BIG_ENDIAN) - tmpbuf += padding; - - byte_size -= padding; - if (bitlen % BITS_PER_UNIT != 0) - { - if (BYTES_BIG_ENDIAN) - clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1, - BITS_PER_UNIT - (bitlen % BITS_PER_UNIT)); - else - clear_bit_region (tmpbuf, bitlen, - byte_size * BITS_PER_UNIT - bitlen); - } - } + clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1, + BITS_PER_UNIT - (bitlen % BITS_PER_UNIT)); + else + clear_bit_region (tmpbuf, bitlen, + byte_size * BITS_PER_UNIT - bitlen); + } + /* Left shifting relies on the last byte being clear if bitlen is + a multiple of BITS_PER_UNIT, which might not be clear if + there are padding bytes. */ + else if (!BYTES_BIG_ENDIAN) + tmpbuf[byte_size - 1] = '\0'; /* Clear the bit region in PTR where the bits from TMPBUF will be inserted into. */ --- gcc/testsuite/gcc.c-torture/execute/pr79737-1.c.jj 2017-02-28 10:36:13.678474604 +0100 +++ gcc/testsuite/gcc.c-torture/execute/pr79737-1.c 2017-02-28 10:36:00.861645051 +0100 @@ -0,0 +1,37 @@ +/* PR tree-optimization/79737 */ + +#pragma pack(1) +struct S +{ + int b:18; + int c:1; + int d:24; + int e:15; + int f:14; +} i; +int g, j, k; +static struct S h; + +void +foo () +{ + for (j = 0; j < 6; j++) + k = 0; + for (; k < 3; k++) + { + struct S m = { 5, 0, -5, 9, 5 }; + h = m; + if (g) + i = m; + h.e = 0; + } +} + +int +main () +{ + foo (); + if (h.e != 0) + __builtin_abort (); + return 0; +} --- gcc/testsuite/gcc.c-torture/execute/pr79737-2.c.jj 2017-02-28 10:36:16.993430520 +0100 +++ gcc/testsuite/gcc.c-torture/execute/pr79737-2.c 2017-02-28 10:41:25.217325124 +0100 @@ -0,0 +1,41 @@ +/* PR tree-optimization/79737 */ + +#pragma pack(1) +struct S +{ + int b:18; + int c:1; + int d:24; + int e:15; + int f:14; +} i, j; + +void +foo () +{ + i.e = 0; + i.b = 5; + i.c = 0; + i.d = -5; + i.f = 5; +} + +void +bar () +{ + j.b = 5; + j.c = 0; + j.d = -5; + j.e = 0; + j.f = 5; +} + +int +main () +{ + foo (); + bar (); + asm volatile ("" : : : "memory"); + if (i.b != j.b || i.c != j.c || i.d != j.d || i.e != j.e || i.f != j.f) + __builtin_abort (); +} Jakub