Hi! The following is the last spot in vn_reference_lookup_3 that didn't allow non-byte aligned offsets or sizes. To be precise, it did allow size that wasn't multiple of byte size and that caused a wrong-code issue on big-endian, as the pr93945.c testcase shows, so for GCC 9 we should add && multiple_p (ref->size, BITS_PER_UNIT) check instead. For the memset with SSA_NAME middle-argument, it still requires byte-aligned offset, as we'd otherwise need to rotate the value at runtime.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2020-02-27 Jakub Jelinek <ja...@redhat.com> PR tree-optimization/93582 PR tree-optimization/93945 * tree-ssa-sccvn.c (vn_reference_lookup_3): Handle memset with non-zero INTEGER_CST second argument and ref->offset or ref->size not a multiple of BITS_PER_UNIT. * gcc.dg/tree-ssa/pr93582-9.c: New test. * gcc.c-torture/execute/pr93945.c: New test. --- gcc/tree-ssa-sccvn.c.jj 2020-02-24 12:55:32.619143689 +0100 +++ gcc/tree-ssa-sccvn.c 2020-02-26 13:38:01.899937521 +0100 @@ -2386,7 +2386,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vn_reference_t vr = data->vr; gimple *def_stmt = SSA_NAME_DEF_STMT (vuse); tree base = ao_ref_base (ref); - HOST_WIDE_INT offseti, maxsizei; + HOST_WIDE_INT offseti = 0, maxsizei, sizei = 0; static vec<vn_reference_op_s> lhs_ops; ao_ref lhs_ref; bool lhs_ref_ok = false; @@ -2541,9 +2541,13 @@ vn_reference_lookup_3 (ao_ref *ref, tree && (integer_zerop (gimple_call_arg (def_stmt, 1)) || ((TREE_CODE (gimple_call_arg (def_stmt, 1)) == INTEGER_CST || (INTEGRAL_TYPE_P (vr->type) && known_eq (ref->size, 8))) - && CHAR_BIT == 8 && BITS_PER_UNIT == 8 + && CHAR_BIT == 8 + && BITS_PER_UNIT == 8 + && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN && offset.is_constant (&offseti) - && offseti % BITS_PER_UNIT == 0)) + && ref->size.is_constant (&sizei) + && (offseti % BITS_PER_UNIT == 0 + || TREE_CODE (gimple_call_arg (def_stmt, 1)) == INTEGER_CST))) && poly_int_tree_p (gimple_call_arg (def_stmt, 2)) && (TREE_CODE (gimple_call_arg (def_stmt, 0)) == ADDR_EXPR || TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME)) @@ -2604,7 +2608,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree else return (void *)-1; tree len = gimple_call_arg (def_stmt, 2); - HOST_WIDE_INT leni, offset2i, offseti; + HOST_WIDE_INT leni, offset2i; /* Sometimes the above trickery is smarter than alias analysis. Take advantage of that. */ if (!ranges_maybe_overlap_p (offset, maxsize, offset2, @@ -2618,7 +2622,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree tree val; if (integer_zerop (gimple_call_arg (def_stmt, 1))) val = build_zero_cst (vr->type); - else if (INTEGRAL_TYPE_P (vr->type) && known_eq (ref->size, 8)) + else if (INTEGRAL_TYPE_P (vr->type) + && known_eq (ref->size, 8) + && offseti % BITS_PER_UNIT == 0) { gimple_match_op res_op (gimple_match_cond::UNCOND, NOP_EXPR, vr->type, gimple_call_arg (def_stmt, 1)); @@ -2630,10 +2636,34 @@ vn_reference_lookup_3 (ao_ref *ref, tree } else { - unsigned buflen = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (vr->type)); + unsigned buflen = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (vr->type)) + 1; + if (INTEGRAL_TYPE_P (vr->type)) + buflen = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (vr->type)) + 1; unsigned char *buf = XALLOCAVEC (unsigned char, buflen); memset (buf, TREE_INT_CST_LOW (gimple_call_arg (def_stmt, 1)), buflen); + if (BYTES_BIG_ENDIAN) + { + unsigned int amnt + = (((unsigned HOST_WIDE_INT) offseti + sizei) + % BITS_PER_UNIT); + if (amnt) + { + shift_bytes_in_array_right (buf, buflen, + BITS_PER_UNIT - amnt); + buf++; + buflen--; + } + } + else if (offseti % BITS_PER_UNIT != 0) + { + unsigned int amnt + = BITS_PER_UNIT - ((unsigned HOST_WIDE_INT) offseti + % BITS_PER_UNIT); + shift_bytes_in_array_left (buf, buflen, amnt); + buf++; + buflen--; + } val = native_interpret_expr (vr->type, buf, buflen); if (!val) return (void *)-1; --- gcc/testsuite/gcc.dg/tree-ssa/pr93582-9.c.jj 2020-02-26 13:47:42.246393489 +0100 +++ gcc/testsuite/gcc.dg/tree-ssa/pr93582-9.c 2020-02-26 13:48:42.786502204 +0100 @@ -0,0 +1,23 @@ +/* PR tree-optimization/93582 */ +/* { dg-do compile { target int32 } } */ +/* { dg-options "-O2 -fdump-tree-fre1" } */ +/* { dg-final { scan-tree-dump "return 5364;" "fre1" { target le } } } */ +/* { dg-final { scan-tree-dump "return 7838;" "fre1" { target be } } } */ +/* { dg-final { scan-tree-dump "return 1959;" "fre1" { target le } } } */ +/* { dg-final { scan-tree-dump "return 1268;" "fre1" { target be } } } */ + +union U { char a[8]; struct S { unsigned int b : 8, c : 13, d : 11; } e; } u; + +__attribute__((noipa)) int +foo (void) +{ + __builtin_memset (&u.a, 0xf4, sizeof (u.a)); + return u.e.c; +} + +__attribute__((noipa)) int +baz (void) +{ + __builtin_memset (&u.a, 0xf4, sizeof (u.a)); + return u.e.d; +} --- gcc/testsuite/gcc.c-torture/execute/pr93945.c.jj 2020-02-26 13:49:47.165554390 +0100 +++ gcc/testsuite/gcc.c-torture/execute/pr93945.c 2020-02-26 12:56:13.659868782 +0100 @@ -0,0 +1,45 @@ +/* PR tree-optimization/93945 */ + +union U { char a[8]; struct S { unsigned int b : 8, c : 13, d : 11; } e; } u; + +__attribute__((noipa)) int +foo (void) +{ + __builtin_memset (&u.a, 0xf4, sizeof (u.a)); + return u.e.c; +} + +__attribute__((noipa)) int +bar (void) +{ + asm volatile ("" : : "g" (&u) : "memory"); + return u.e.c; +} + +__attribute__((noipa)) int +baz (void) +{ + __builtin_memset (&u.a, 0xf4, sizeof (u.a)); + return u.e.d; +} + +__attribute__((noipa)) int +qux (void) +{ + asm volatile ("" : : "g" (&u) : "memory"); + return u.e.d; +} + +int +main () +{ + int a = foo (); + int b = bar (); + if (a != b) + __builtin_abort (); + a = baz (); + b = qux (); + if (a != b) + __builtin_abort (); + return 0; +} Jakub