https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108803
--- Comment #5 from Jakub Jelinek <jakub at gcc dot gnu.org> --- The change then would be --- gcc/optabs.cc.jj 2023-01-02 09:32:53.309838465 +0100 +++ gcc/optabs.cc 2023-02-16 19:33:14.583883584 +0100 @@ -507,7 +507,7 @@ expand_subword_shift (scalar_int_mode op rtx outof_input, rtx into_input, rtx op1, rtx outof_target, rtx into_target, int unsignedp, enum optab_methods methods, - unsigned HOST_WIDE_INT shift_mask) + unsigned HOST_WIDE_INT shift_mask, bool mask_count) { optab reverse_unsigned_shift, unsigned_shift; rtx tmp, carries; @@ -526,6 +526,23 @@ expand_subword_shift (scalar_int_mode op tmp = simplify_expand_binop (op1_mode, sub_optab, tmp, op1, 0, true, methods); } + else if (mask_count) + { + /* When called from expand_doubleword_shift_condmove with shift_mask 0, + we need to mask the shift count (and on some targets have that later + be combined with shifts into a single instruction). In that case + we can avoid the separate shift by 1 and another by + (BITS_PER_WORD - 1) - op1 and can just do one shift by + -op1 & (BITS_PER_WORD - 1). */ + carries = outof_input; + tmp = expand_unop (op1_mode, neg_optab, op1, 0, false); + rtx tmp2 = immed_wide_int_const (wi::shwi (BITS_PER_WORD - 1, + op1_mode), op1_mode); + tmp = simplify_expand_binop (op1_mode, and_optab, tmp, tmp2, 0, true, + methods); + op1 = simplify_expand_binop (op1_mode, and_optab, op1, tmp2, 0, true, + methods); + } else { /* We must avoid shifting by BITS_PER_WORD bits since that is either @@ -596,6 +613,15 @@ expand_doubleword_shift_condmove (scalar { rtx outof_superword, into_superword; + if (shift_mask < BITS_PER_WORD - 1) + { + rtx tmp = immed_wide_int_const (wi::shwi (BITS_PER_WORD - 1, op1_mode), + op1_mode); + superword_op1 + = simplify_expand_binop (op1_mode, and_optab, superword_op1, tmp, + 0, true, methods); + } + /* Put the superword version of the output into OUTOF_SUPERWORD and INTO_SUPERWORD. */ outof_superword = outof_target != 0 ? gen_reg_rtx (word_mode) : 0; @@ -621,7 +647,8 @@ expand_doubleword_shift_condmove (scalar if (!expand_subword_shift (op1_mode, binoptab, outof_input, into_input, subword_op1, outof_target, into_target, - unsignedp, methods, shift_mask)) + unsignedp, methods, shift_mask, + shift_mask < BITS_PER_WORD - 1)) return false; /* Select between them. Do the INTO half first because INTO_SUPERWORD @@ -742,7 +769,7 @@ expand_doubleword_shift (scalar_int_mode return expand_subword_shift (op1_mode, binoptab, outof_input, into_input, op1, outof_target, into_target, - unsignedp, methods, shift_mask); + unsignedp, methods, shift_mask, false); } /* Try using conditional moves to generate straight-line code. */ @@ -781,7 +808,7 @@ expand_doubleword_shift (scalar_int_mode if (!expand_subword_shift (op1_mode, binoptab, outof_input, into_input, op1, outof_target, into_target, - unsignedp, methods, shift_mask)) + unsignedp, methods, shift_mask, false)) return false; emit_label (done_label); or so and emits one fewer instruction for foo and bar as before. But somehow the #c0 testcase with it aborts again, so something is not right...