"H.J. Lu" <hjl.to...@gmail.com> writes: > diff --git a/gcc/builtins.c b/gcc/builtins.c > index 39ab139b7e1..1972301ce3c 100644 > --- a/gcc/builtins.c > +++ b/gcc/builtins.c > @@ -3890,13 +3890,16 @@ expand_builtin_strnlen (tree exp, rtx target, > machine_mode target_mode) > > static rtx > builtin_memcpy_read_str (void *data, void *, HOST_WIDE_INT offset, > - scalar_int_mode mode) > + fixed_size_mode mode) > { > /* The REPresentation pointed to by DATA need not be a nul-terminated > string but the caller guarantees it's large enough for MODE. */ > const char *rep = (const char *) data; > > - return c_readstr (rep + offset, mode, /*nul_terminated=*/false); > + /* NB: Vector mode in the by-pieces infrastructure is only used by > + the memset expander. */
Sorry to nitpick, but I guess this might get out out-of-date. Maybe: /* The by-pieces infrastructure does not try to pick a vector mode for memcpy expansion. */ > + return c_readstr (rep + offset, as_a <scalar_int_mode> (mode), > + /*nul_terminated=*/false); > } > > /* LEN specify length of the block of memcpy/memset operation. > @@ -6478,14 +6481,16 @@ expand_builtin_stpncpy (tree exp, rtx) > > rtx > builtin_strncpy_read_str (void *data, void *, HOST_WIDE_INT offset, > - scalar_int_mode mode) > + fixed_size_mode mode) > { > const char *str = (const char *) data; > > if ((unsigned HOST_WIDE_INT) offset > strlen (str)) > return const0_rtx; > > - return c_readstr (str + offset, mode); > + /* NB: Vector mode in the by-pieces infrastructure is only used by > + the memset expander. */ Similarly here for strncpy expansion. > + return c_readstr (str + offset, as_a <scalar_int_mode> (mode)); > } > > /* Helper to check the sizes of sequences and the destination of calls > @@ -6686,30 +6691,117 @@ expand_builtin_strncpy (tree exp, rtx target) > return NULL_RTX; > } > > -/* Callback routine for store_by_pieces. Read GET_MODE_BITSIZE (MODE) > - bytes from constant string DATA + OFFSET and return it as target > - constant. If PREV isn't nullptr, it has the RTL info from the > +/* Return the RTL of a register in MODE generated from PREV in the > previous iteration. */ > > -rtx > -builtin_memset_read_str (void *data, void *prevp, > - HOST_WIDE_INT offset ATTRIBUTE_UNUSED, > - scalar_int_mode mode) > +static rtx > +gen_memset_value_from_prev (by_pieces_prev *prev, fixed_size_mode mode) > { > - by_pieces_prev *prev = (by_pieces_prev *) prevp; > + rtx target = nullptr; > if (prev != nullptr && prev->data != nullptr) > { > /* Use the previous data in the same mode. */ > if (prev->mode == mode) > return prev->data; > + > + fixed_size_mode prev_mode = prev->mode; > + > + /* Don't use the previous data to write QImode if it is in a > + vector mode. */ > + if (VECTOR_MODE_P (prev_mode) && mode == QImode) > + return target; > + > + rtx prev_rtx = prev->data; > + > + if (REG_P (prev_rtx) > + && HARD_REGISTER_P (prev_rtx) > + && lowpart_subreg_regno (REGNO (prev_rtx), prev_mode, mode) < 0) > + { > + /* If we can't put a hard register in MODE, first generate a > + subreg of word mode if the previous mode is wider than word > + mode and word mode is wider than MODE. */ > + if (UNITS_PER_WORD < GET_MODE_SIZE (prev_mode) > + && UNITS_PER_WORD > GET_MODE_SIZE (mode)) > + { > + prev_rtx = lowpart_subreg (word_mode, prev_rtx, > + prev_mode); > + if (prev_rtx != nullptr) > + prev_mode = word_mode; > + } > + else > + prev_rtx = nullptr; I don't understand this. Why not just do the: if (REG_P (prev_rtx) && HARD_REGISTER_P (prev_rtx) && lowpart_subreg_regno (REGNO (prev_rtx), prev_mode, mode) < 0) prev_rtx = copy_to_reg (prev_rtx); that I suggested in the previous review? IMO anything that relies on a sequence of two subreg operations is doing something wrong. > + } > + if (prev_rtx != nullptr) > + target = lowpart_subreg (mode, prev_rtx, prev_mode); > } > + return target; > +} > + > […] > @@ -769,21 +769,41 @@ alignment_for_piecewise_move (unsigned int max_pieces, > unsigned int align) > return align; > } > > -/* Return the widest integer mode that is narrower than SIZE bytes. */ > +/* Return the widest QI vector, if QI_MODE is true, or integer mode > + that is narrower than SIZE bytes. */ > > -static scalar_int_mode > -widest_int_mode_for_size (unsigned int size) > +static fixed_size_mode > +widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector) > { > - scalar_int_mode result = NARROWEST_INT_MODE; > + machine_mode result = NARROWEST_INT_MODE; > > gcc_checking_assert (size > 1); > > + /* Use QI vector only if size is wider than a WORD. */ > + if (qi_vector && size > UNITS_PER_WORD) > + { > + machine_mode mode; > + FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_INT) > + if (GET_MODE_INNER (mode) == QImode > + && GET_MODE_SIZE (mode).is_constant ()) > + { > + if (GET_MODE_SIZE (mode).to_constant () >= size) > + break; > + if (optab_handler (vec_duplicate_optab, mode) > + != CODE_FOR_nothing) > + result = mode; > + } > + > + if (result != NARROWEST_INT_MODE) > + return as_a <fixed_size_mode> (result); > + } > + > opt_scalar_int_mode tmode; > FOR_EACH_MODE_IN_CLASS (tmode, MODE_INT) > if (GET_MODE_SIZE (tmode.require ()) < size) > result = tmode.require (); > > - return result; > + return as_a <fixed_size_mode> (result); > } A less cast-heavy way to write this would be: fixed_size_mode result = NARROWEST_INT_MODE; gcc_checking_assert (size > 1); /* Use QI vector only if size is wider than a WORD. */ if (qi_vector && size > UNITS_PER_WORD) { machine_mode mode; fixed_size_mode candidate; FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_INT) if (is_a<fixed_size_mode> (mode, &candidate) && GET_MODE_INNER (candidate) == QImode) { if (GET_MODE_SIZE (candidate) >= size) break; if (optab_handler (vec_duplicate_optab, candidate) != CODE_FOR_nothing) result = candidate; } if (result != NARROWEST_INT_MODE) return result; } opt_scalar_int_mode tmode; FOR_EACH_MODE_IN_CLASS (tmode, MODE_INT) if (GET_MODE_SIZE (tmode.require ()) < size) result = tmode.require (); return result; > @@ -1148,13 +1178,43 @@ op_by_pieces_d::get_usable_mode (scalar_int_mode > mode, unsigned int len) > size = GET_MODE_SIZE (mode); > if (len >= size && prepare_mode (mode, m_align)) > break; > - /* NB: widest_int_mode_for_size checks SIZE > 1. */ > - mode = widest_int_mode_for_size (size); > + /* NB: widest_fixed_size_mode_for_size checks SIZE > 1. */ > + mode = widest_fixed_size_mode_for_size (size, m_qi_vector_mode); > } > while (1); > return mode; > } > > +/* Return the smallest integer or QI vector mode that is not narrower > + than SIZE bytes. */ > + > +fixed_size_mode > +op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size) > +{ > + /* Use QI vector only for > size of WORD. */ > + if (m_qi_vector_mode && size > UNITS_PER_WORD) > + { > + machine_mode mode; > + FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_INT) > + if (GET_MODE_INNER (mode) == QImode > + && GET_MODE_SIZE (mode).is_constant ()) > + { > + unsigned int mode_size = GET_MODE_SIZE (mode).to_constant (); > + > + /* NB: Don't return a mode wider than M_LEN. */ > + if (mode_size > m_len) > + break; > + > + if (mode_size >= size > + && (optab_handler (vec_duplicate_optab, mode) > + != CODE_FOR_nothing)) > + return as_a <fixed_size_mode> (mode); > + } > + } Same idea here. I realise this is pre-existing, but the heavy use of “NB:” seems a bit odd. I don't think it's something that the patch needs to adopt. Otherwise it looks good, thanks. I think the main sticking point is the subreg thing. Richard