On Thu, May 28, 2015 at 02:42:22PM -0500, Segher Boessenkool wrote: > > That record form andi. is slower on many processors, > > Is it? On which processors?
That sort of info is in the IBM confidential processor book4 supplements. So I can't tell you. (I think it is completely crazy to keep information out of the hands of engineers, but my opinion doesn't count for much..) I'll tell you one of the reasons why they are slower, as any decent hardware engineer could probably figure this out themselves anyway. The record form instructions are cracked into two internal ops, the basic arithmetic/logic op, and a compare. There's a limit to how much hardware can do in one clock cycle, or conversely, if you try to do more your clock must be slower. > > one of the aims of the wider patch I was working > > on was to remove patterns like rotlsi3_64, ashlsi3_64, lshrsi3_64 and > > ashrsi3_64. > > We will need such patterns no matter what; the compiler cannot magically > know what machine insns set the high bits of a 64-bit reg to zero. No, not by magic. I define EXTEND_OP in rs6000.h and use it in record_value_for_reg. Full patch follows. I see enough code gen improvements on powerpc64le to make this patch worth pursuing, things like "rlwinm 0,5,6,0,25; extsw 0,0" being converted to "rldic 0,5,6,52". No doubt due to being able to prove an int var doesn't have the sign bit set. Hmm, in fact the 52 says it is known to be only 6 bits before shifting. Index: combine.c =================================================================== --- combine.c (revision 223725) +++ combine.c (working copy) @@ -1739,7 +1739,7 @@ set_nonzero_bits_and_sign_copies (rtx x, const_rtx if (set == 0 || GET_CODE (set) == CLOBBER) { - rsp->nonzero_bits = GET_MODE_MASK (GET_MODE (x)); + rsp->nonzero_bits = ~(unsigned HOST_WIDE_INT) 0; rsp->sign_bit_copies = 1; return; } @@ -1769,7 +1769,7 @@ set_nonzero_bits_and_sign_copies (rtx x, const_rtx break; if (!link) { - rsp->nonzero_bits = GET_MODE_MASK (GET_MODE (x)); + rsp->nonzero_bits = ~(unsigned HOST_WIDE_INT) 0; rsp->sign_bit_copies = 1; return; } @@ -1788,7 +1788,7 @@ set_nonzero_bits_and_sign_copies (rtx x, const_rtx update_rsp_from_reg_equal (rsp, insn, set, x); else { - rsp->nonzero_bits = GET_MODE_MASK (GET_MODE (x)); + rsp->nonzero_bits = ~(unsigned HOST_WIDE_INT) 0; rsp->sign_bit_copies = 1; } } @@ -9832,10 +9832,16 @@ reg_nonzero_bits_for_combine (const_rtx x, machine REGNO (x))))) { unsigned HOST_WIDE_INT mask = rsp->last_set_nonzero_bits; + machine_mode mask_mode = rsp->last_set_mode; - if (GET_MODE_PRECISION (rsp->last_set_mode) < GET_MODE_PRECISION (mode)) + /* We possibly calculated last_set_nonzero_bits in a wider mode. */ + if (GET_MODE_CLASS (mask_mode) == MODE_INT + && GET_MODE_PRECISION (mask_mode) < HOST_BITS_PER_WIDE_INT) + mask_mode = nonzero_bits_mode; + + if (GET_MODE_PRECISION (mask_mode) < GET_MODE_PRECISION (mode)) /* We don't know anything about the upper bits. */ - mask |= GET_MODE_MASK (mode) ^ GET_MODE_MASK (rsp->last_set_mode); + mask |= GET_MODE_MASK (mode) ^ GET_MODE_MASK (mask_mode); *nonzero &= mask; return NULL; @@ -9852,16 +9858,8 @@ reg_nonzero_bits_for_combine (const_rtx x, machine return tem; } else if (nonzero_sign_valid && rsp->nonzero_bits) - { - unsigned HOST_WIDE_INT mask = rsp->nonzero_bits; + *nonzero &= rsp->nonzero_bits; - if (GET_MODE_PRECISION (GET_MODE (x)) < GET_MODE_PRECISION (mode)) - /* We don't know anything about the upper bits. */ - mask |= GET_MODE_MASK (mode) ^ GET_MODE_MASK (GET_MODE (x)); - - *nonzero &= mask; - } - return NULL; } @@ -9883,7 +9881,11 @@ reg_num_sign_bit_copies_for_combine (const_rtx x, rsp = ®_stat[REGNO (x)]; if (rsp->last_set_value != 0 - && rsp->last_set_mode == mode + && (rsp->last_set_mode == mode + || (GET_MODE_CLASS (rsp->last_set_mode) == MODE_INT + && GET_MODE_CLASS (mode) == MODE_INT + && (GET_MODE_PRECISION (mode) + <= GET_MODE_PRECISION (rsp->last_set_mode)))) && ((rsp->last_set_label >= label_tick_ebb_start && rsp->last_set_label < label_tick) || (rsp->last_set_label == label_tick @@ -9895,7 +9897,12 @@ reg_num_sign_bit_copies_for_combine (const_rtx x, (DF_LR_IN (ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb), REGNO (x))))) { - *result = rsp->last_set_sign_bit_copies; + int signbits = rsp->last_set_sign_bit_copies; + signbits -= (GET_MODE_PRECISION (rsp->last_set_mode) + - GET_MODE_PRECISION (mode)); + if (signbits <= 0) + signbits = 1; + *result = signbits; return NULL; } @@ -12716,9 +12723,26 @@ record_value_for_reg (rtx reg, rtx_insn *insn, rtx if (GET_MODE_CLASS (mode) == MODE_INT && HWI_COMPUTABLE_MODE_P (mode)) mode = nonzero_bits_mode; - rsp->last_set_nonzero_bits = nonzero_bits (value, mode); - rsp->last_set_sign_bit_copies - = num_sign_bit_copies (value, GET_MODE (reg)); + unsigned HOST_WIDE_INT nonzero = nonzero_bits (value, mode); +#if defined (WORD_REGISTER_OPERATIONS) && defined (EXTEND_OP) + /* Some operations might be known to zero extend to a wider mode. */ + if (GET_MODE_PRECISION (GET_MODE (reg)) < BITS_PER_WORD + && EXTEND_OP (value) == ZERO_EXTEND) + nonzero &= GET_MODE_MASK (GET_MODE (reg)); +#endif + rsp->last_set_nonzero_bits = nonzero; + unsigned int signbits = num_sign_bit_copies (value, GET_MODE (reg)); +#if defined (WORD_REGISTER_OPERATIONS) && defined (EXTEND_OP) + /* Some operations might be known to sign extend to a wider mode. */ + if (GET_MODE_PRECISION (GET_MODE (reg)) < BITS_PER_WORD + && GET_MODE_CLASS (GET_MODE (reg)) == MODE_INT + && EXTEND_OP (value) == SIGN_EXTEND) + { + rsp->last_set_mode = word_mode; + signbits += BITS_PER_WORD - GET_MODE_PRECISION (GET_MODE (reg)); + } +#endif + rsp->last_set_sign_bit_copies = signbits; } } Index: config/rs6000/rs6000.h =================================================================== --- config/rs6000/rs6000.h (revision 223725) +++ config/rs6000/rs6000.h (working copy) @@ -2043,6 +2043,23 @@ do { \ on the full register even if a narrower mode is specified. */ #define WORD_REGISTER_OPERATIONS +/* Describe how rtl operations on registers behave on this target when + operating on less than the entire register. */ +#define EXTEND_OP(OP) \ + (GET_MODE (OP) != SImode \ + || !TARGET_POWERPC64 \ + ? UNKNOWN \ + : (GET_CODE (OP) == AND \ + || GET_CODE (OP) == ZERO_EXTEND \ + || GET_CODE (OP) == ASHIFT \ + || GET_CODE (OP) == ROTATE \ + || GET_CODE (OP) == LSHIFTRT) \ + ? ZERO_EXTEND \ + : (GET_CODE (OP) == SIGN_EXTEND \ + || GET_CODE (OP) == ASHIFTRT) \ + ? SIGN_EXTEND \ + : UNKNOWN) + /* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD will either zero-extend or sign-extend. The value of this macro should be the code that says which one of the two operations is implicitly -- Alan Modra Australia Development Lab, IBM