The recent subreg-CSE allows a part of PR42587 to be fixed with minimal surgery to the bswap pass - namely adding support for BIT_FIELD_REF.
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk. Richard. 2016-05-13 Richard Biener <rguent...@suse.de> PR tree-optimization/42587 * tree-ssa-math-opts.c (perform_symbolic_merge): Handle BIT_FIELD_REF. (find_bswap_or_nop_1): Likewise. (bswap_replace): Likewise. * gcc.dg/optimize-bswapsi-4.c: New testcase. Index: gcc/tree-ssa-math-opts.c =================================================================== *** gcc/tree-ssa-math-opts.c (revision 236159) --- gcc/tree-ssa-math-opts.c (working copy) *************** perform_symbolic_merge (gimple *source_s *** 2160,2168 **** gimple *source_stmt; struct symbolic_number *n_start; /* Sources are different, cancel bswap if they are not memory location with the same base (array, structure, ...). */ ! if (gimple_assign_rhs1 (source_stmt1) != gimple_assign_rhs1 (source_stmt2)) { uint64_t inc; HOST_WIDE_INT start_sub, end_sub, end1, end2, end; --- 2160,2175 ---- gimple *source_stmt; struct symbolic_number *n_start; + tree rhs1 = gimple_assign_rhs1 (source_stmt1); + if (TREE_CODE (rhs1) == BIT_FIELD_REF) + rhs1 = TREE_OPERAND (rhs1, 0); + tree rhs2 = gimple_assign_rhs1 (source_stmt2); + if (TREE_CODE (rhs2) == BIT_FIELD_REF) + rhs2 = TREE_OPERAND (rhs2, 0); + /* Sources are different, cancel bswap if they are not memory location with the same base (array, structure, ...). */ ! if (rhs1 != rhs2) { uint64_t inc; HOST_WIDE_INT start_sub, end_sub, end1, end2, end; *************** find_bswap_or_nop_1 (gimple *stmt, struc *** 2285,2290 **** --- 2292,2330 ---- if (find_bswap_or_nop_load (stmt, rhs1, n)) return stmt; + /* Handle BIT_FIELD_REF. */ + if (TREE_CODE (rhs1) == BIT_FIELD_REF + && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME) + { + unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1)); + unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2)); + if (bitpos % BITS_PER_UNIT == 0 + && bitsize % BITS_PER_UNIT == 0 + && init_symbolic_number (n, TREE_OPERAND (rhs1, 0))) + { + /* Shift. */ + if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos)) + return NULL; + + /* Mask. */ + uint64_t mask = 0; + uint64_t tmp = (1 << BITS_PER_UNIT) - 1; + for (unsigned i = 0; i < bitsize / BITS_PER_UNIT; + i++, tmp <<= BITS_PER_UNIT) + mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER); + n->n &= mask; + + /* Convert. */ + n->type = TREE_TYPE (rhs1); + if (!n->base_addr) + n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT; + + return verify_symbolic_number_p (n, stmt) ? stmt : NULL; + } + + return NULL; + } + if (TREE_CODE (rhs1) != SSA_NAME) return NULL; *************** bswap_replace (gimple *cur_stmt, gimple *** 2683,2688 **** --- 2723,2730 ---- } src = val_tmp; } + else if (TREE_CODE (src) == BIT_FIELD_REF) + src = TREE_OPERAND (src, 0); if (n->range == 16) bswap_stats.found_16bit++; Index: gcc/testsuite/gcc.dg/optimize-bswapsi-4.c =================================================================== *** gcc/testsuite/gcc.dg/optimize-bswapsi-4.c (revision 0) --- gcc/testsuite/gcc.dg/optimize-bswapsi-4.c (working copy) *************** *** 0 **** --- 1,28 ---- + /* { dg-do compile } */ + /* { dg-require-effective-target bswap32 } */ + /* { dg-options "-O2 -fdump-tree-bswap" } */ + /* { dg-additional-options "-march=z900" { target s390-*-* } } */ + + typedef unsigned char u8; + typedef unsigned int u32; + union __anonunion + { + u32 value; + u8 bytes[4]; + }; + + u32 + acpi_ut_dword_byte_swap (u32 value) + { + union __anonunion in; + in.value = value; + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return ((in.bytes[0] << 24) | (in.bytes[1] << 16) + | (in.bytes[2] << 8) | in.bytes[3]); + #else + return ((in.bytes[3] << 24) | (in.bytes[2] << 16) + | (in.bytes[1] << 8) | in.bytes[0]); + #endif + } + + /* { dg-final { scan-tree-dump "32 bit bswap implementation found at" "bswap" } } */