Re: [PATCH] Rewrite memset expanders with vec_duplicate
On Fri, Jul 16, 2021 at 6:24 AM Richard Sandiford wrote: > > "H.J. Lu via Gcc-patches" writes: > > On Fri, Jul 16, 2021 at 4:38 AM Richard Sandiford > > wrote: > >> > >> "H.J. Lu via Gcc-patches" writes: > >> > 1. Rewrite builtin_memset_read_str and builtin_memset_gen_str with > >> > vec_duplicate_optab to duplicate QI value to TI/OI/XI value. > >> > 2. Add TARGET_GEN_MEMSET_SCRATCH_RTX to allow the backend to use a hard > >> > scratch register to avoid stack realignment when expanding memset. > >> > > >> > PR middle-end/90773 > >> > * builtins.c (gen_memset_value_from_prev): New function. > >> > (gen_memset_broadcast): Likewise. > >> > (builtin_memset_read_str): Use gen_memset_value_from_prev > >> > and gen_memset_broadcast. > >> > (builtin_memset_gen_str): Likewise. > >> > * target.def (gen_memset_scratch_rtx): New hook. > >> > * doc/tm.texi.in: Add TARGET_GEN_MEMSET_SCRATCH_RTX. > >> > * doc/tm.texi: Regenerated. > >> > --- > >> > gcc/builtins.c | 123 + > >> > gcc/doc/tm.texi| 5 ++ > >> > gcc/doc/tm.texi.in | 2 + > >> > gcc/target.def | 7 +++ > >> > 4 files changed, 116 insertions(+), 21 deletions(-) > >> > > >> > diff --git a/gcc/builtins.c b/gcc/builtins.c > >> > index 39ab139b7e1..c1758ae2efc 100644 > >> > --- a/gcc/builtins.c > >> > +++ b/gcc/builtins.c > >> > @@ -6686,26 +6686,111 @@ expand_builtin_strncpy (tree exp, rtx target) > >> >return NULL_RTX; > >> > } > >> > > >> > -/* Callback routine for store_by_pieces. Read GET_MODE_BITSIZE (MODE) > >> > - bytes from constant string DATA + OFFSET and return it as target > >> > - constant. If PREV isn't nullptr, it has the RTL info from the > >> > +/* Return the RTL of a register in MODE generated from PREV in the > >> > previous iteration. */ > >> > > >> > -rtx > >> > -builtin_memset_read_str (void *data, void *prevp, > >> > - HOST_WIDE_INT offset ATTRIBUTE_UNUSED, > >> > - scalar_int_mode mode) > >> > +static rtx > >> > +gen_memset_value_from_prev (void *prevp, scalar_int_mode mode) > >> > { > >> > + rtx target = nullptr; > >> >by_pieces_prev *prev = (by_pieces_prev *) prevp; > >> >if (prev != nullptr && prev->data != nullptr) > >> > { > >> >/* Use the previous data in the same mode. */ > >> >if (prev->mode == mode) > >> > return prev->data; > >> > + > >> > + rtx prev_rtx = prev->data; > >> > + machine_mode prev_mode = prev->mode; > >> > + unsigned int word_size = GET_MODE_SIZE (word_mode); > >> > + if (word_size < GET_MODE_SIZE (prev->mode) > >> > + && word_size > GET_MODE_SIZE (mode)) > >> > + { > >> > + /* First generate subreg of word mode if the previous mode is > >> > + wider than word mode and word mode is wider than MODE. */ > >> > + prev_rtx = simplify_gen_subreg (word_mode, prev_rtx, > >> > + prev_mode, 0); > >> > + prev_mode = word_mode; > >> > + } > >> > + if (prev_rtx != nullptr) > >> > + target = simplify_gen_subreg (mode, prev_rtx, prev_mode, 0); > >> > } > >> > + return target; > >> > +} > >> > + > >> > +/* Return the RTL of a register in MODE broadcasted from DATA. */ > >> > + > >> > +static rtx > >> > +gen_memset_broadcast (rtx data, scalar_int_mode mode) > >> > +{ > >> > + /* Skip if regno_reg_rtx isn't initialized. */ > >> > + if (!regno_reg_rtx) > >> > +return nullptr; > >> > + > >> > + rtx target = nullptr; > >> > + > >> > + unsigned int nunits = GET_MODE_SIZE (mode) / GET_MODE_SIZE (QImode); > >> > + machine_mode vector_mode; > >> > + if (!mode_for_vector (QImode, nunits).exists (_mode)) > >> > +gcc_unreachable (); > >> > >> Sorry, I realise it's a bit late to be raising this objection now, > >> but I don't think it's a good idea to use scalar integer modes as > >> a proxy for vector modes. In principle there's no reason why a > >> target has to define an integer mode for every vector mode. > > > > A target always defines the largest integer mode. > > Right. But a target shouldn't *need* to define an integer mode > for every vector mode. > > >> If we want the mode to be a vector then I think the by-pieces > >> infrastructure should be extended to support vectors directly, > >> rather than assuming that each piece can be represented as > >> a scalar_int_mode. > >> > > > > The current by-pieces infrastructure operates on scalar_int_mode. > > Only for memset, there is > > > > /* Callback routine for store_by_pieces. Return the RTL of a register > >containing GET_MODE_SIZE (MODE) consecutive copies of the unsigned > >char value given in the RTL register data. For example, if mode is > >4 bytes wide, return the RTL for 0x01010101*data. If PREV isn't > >nullptr, it has the RTL info from the previous iteration. */ > > > > static rtx > > builtin_memset_gen_str (void
Re: [PATCH] Rewrite memset expanders with vec_duplicate
"H.J. Lu via Gcc-patches" writes: > On Fri, Jul 16, 2021 at 4:38 AM Richard Sandiford > wrote: >> >> "H.J. Lu via Gcc-patches" writes: >> > 1. Rewrite builtin_memset_read_str and builtin_memset_gen_str with >> > vec_duplicate_optab to duplicate QI value to TI/OI/XI value. >> > 2. Add TARGET_GEN_MEMSET_SCRATCH_RTX to allow the backend to use a hard >> > scratch register to avoid stack realignment when expanding memset. >> > >> > PR middle-end/90773 >> > * builtins.c (gen_memset_value_from_prev): New function. >> > (gen_memset_broadcast): Likewise. >> > (builtin_memset_read_str): Use gen_memset_value_from_prev >> > and gen_memset_broadcast. >> > (builtin_memset_gen_str): Likewise. >> > * target.def (gen_memset_scratch_rtx): New hook. >> > * doc/tm.texi.in: Add TARGET_GEN_MEMSET_SCRATCH_RTX. >> > * doc/tm.texi: Regenerated. >> > --- >> > gcc/builtins.c | 123 + >> > gcc/doc/tm.texi| 5 ++ >> > gcc/doc/tm.texi.in | 2 + >> > gcc/target.def | 7 +++ >> > 4 files changed, 116 insertions(+), 21 deletions(-) >> > >> > diff --git a/gcc/builtins.c b/gcc/builtins.c >> > index 39ab139b7e1..c1758ae2efc 100644 >> > --- a/gcc/builtins.c >> > +++ b/gcc/builtins.c >> > @@ -6686,26 +6686,111 @@ expand_builtin_strncpy (tree exp, rtx target) >> >return NULL_RTX; >> > } >> > >> > -/* Callback routine for store_by_pieces. Read GET_MODE_BITSIZE (MODE) >> > - bytes from constant string DATA + OFFSET and return it as target >> > - constant. If PREV isn't nullptr, it has the RTL info from the >> > +/* Return the RTL of a register in MODE generated from PREV in the >> > previous iteration. */ >> > >> > -rtx >> > -builtin_memset_read_str (void *data, void *prevp, >> > - HOST_WIDE_INT offset ATTRIBUTE_UNUSED, >> > - scalar_int_mode mode) >> > +static rtx >> > +gen_memset_value_from_prev (void *prevp, scalar_int_mode mode) >> > { >> > + rtx target = nullptr; >> >by_pieces_prev *prev = (by_pieces_prev *) prevp; >> >if (prev != nullptr && prev->data != nullptr) >> > { >> >/* Use the previous data in the same mode. */ >> >if (prev->mode == mode) >> > return prev->data; >> > + >> > + rtx prev_rtx = prev->data; >> > + machine_mode prev_mode = prev->mode; >> > + unsigned int word_size = GET_MODE_SIZE (word_mode); >> > + if (word_size < GET_MODE_SIZE (prev->mode) >> > + && word_size > GET_MODE_SIZE (mode)) >> > + { >> > + /* First generate subreg of word mode if the previous mode is >> > + wider than word mode and word mode is wider than MODE. */ >> > + prev_rtx = simplify_gen_subreg (word_mode, prev_rtx, >> > + prev_mode, 0); >> > + prev_mode = word_mode; >> > + } >> > + if (prev_rtx != nullptr) >> > + target = simplify_gen_subreg (mode, prev_rtx, prev_mode, 0); >> > } >> > + return target; >> > +} >> > + >> > +/* Return the RTL of a register in MODE broadcasted from DATA. */ >> > + >> > +static rtx >> > +gen_memset_broadcast (rtx data, scalar_int_mode mode) >> > +{ >> > + /* Skip if regno_reg_rtx isn't initialized. */ >> > + if (!regno_reg_rtx) >> > +return nullptr; >> > + >> > + rtx target = nullptr; >> > + >> > + unsigned int nunits = GET_MODE_SIZE (mode) / GET_MODE_SIZE (QImode); >> > + machine_mode vector_mode; >> > + if (!mode_for_vector (QImode, nunits).exists (_mode)) >> > +gcc_unreachable (); >> >> Sorry, I realise it's a bit late to be raising this objection now, >> but I don't think it's a good idea to use scalar integer modes as >> a proxy for vector modes. In principle there's no reason why a >> target has to define an integer mode for every vector mode. > > A target always defines the largest integer mode. Right. But a target shouldn't *need* to define an integer mode for every vector mode. >> If we want the mode to be a vector then I think the by-pieces >> infrastructure should be extended to support vectors directly, >> rather than assuming that each piece can be represented as >> a scalar_int_mode. >> > > The current by-pieces infrastructure operates on scalar_int_mode. > Only for memset, there is > > /* Callback routine for store_by_pieces. Return the RTL of a register >containing GET_MODE_SIZE (MODE) consecutive copies of the unsigned >char value given in the RTL register data. For example, if mode is >4 bytes wide, return the RTL for 0x01010101*data. If PREV isn't >nullptr, it has the RTL info from the previous iteration. */ > > static rtx > builtin_memset_gen_str (void *data, void *prevp, > HOST_WIDE_INT offset ATTRIBUTE_UNUSED, > scalar_int_mode mode) > > It is a broadcast. If a target can broadcast a byte to a wider integer, > can you suggest a way to use it in the current by-pieces infrastructure? I
Re: [PATCH] Rewrite memset expanders with vec_duplicate
On Fri, Jul 16, 2021 at 4:38 AM Richard Sandiford wrote: > > "H.J. Lu via Gcc-patches" writes: > > 1. Rewrite builtin_memset_read_str and builtin_memset_gen_str with > > vec_duplicate_optab to duplicate QI value to TI/OI/XI value. > > 2. Add TARGET_GEN_MEMSET_SCRATCH_RTX to allow the backend to use a hard > > scratch register to avoid stack realignment when expanding memset. > > > > PR middle-end/90773 > > * builtins.c (gen_memset_value_from_prev): New function. > > (gen_memset_broadcast): Likewise. > > (builtin_memset_read_str): Use gen_memset_value_from_prev > > and gen_memset_broadcast. > > (builtin_memset_gen_str): Likewise. > > * target.def (gen_memset_scratch_rtx): New hook. > > * doc/tm.texi.in: Add TARGET_GEN_MEMSET_SCRATCH_RTX. > > * doc/tm.texi: Regenerated. > > --- > > gcc/builtins.c | 123 + > > gcc/doc/tm.texi| 5 ++ > > gcc/doc/tm.texi.in | 2 + > > gcc/target.def | 7 +++ > > 4 files changed, 116 insertions(+), 21 deletions(-) > > > > diff --git a/gcc/builtins.c b/gcc/builtins.c > > index 39ab139b7e1..c1758ae2efc 100644 > > --- a/gcc/builtins.c > > +++ b/gcc/builtins.c > > @@ -6686,26 +6686,111 @@ expand_builtin_strncpy (tree exp, rtx target) > >return NULL_RTX; > > } > > > > -/* Callback routine for store_by_pieces. Read GET_MODE_BITSIZE (MODE) > > - bytes from constant string DATA + OFFSET and return it as target > > - constant. If PREV isn't nullptr, it has the RTL info from the > > +/* Return the RTL of a register in MODE generated from PREV in the > > previous iteration. */ > > > > -rtx > > -builtin_memset_read_str (void *data, void *prevp, > > - HOST_WIDE_INT offset ATTRIBUTE_UNUSED, > > - scalar_int_mode mode) > > +static rtx > > +gen_memset_value_from_prev (void *prevp, scalar_int_mode mode) > > { > > + rtx target = nullptr; > >by_pieces_prev *prev = (by_pieces_prev *) prevp; > >if (prev != nullptr && prev->data != nullptr) > > { > >/* Use the previous data in the same mode. */ > >if (prev->mode == mode) > > return prev->data; > > + > > + rtx prev_rtx = prev->data; > > + machine_mode prev_mode = prev->mode; > > + unsigned int word_size = GET_MODE_SIZE (word_mode); > > + if (word_size < GET_MODE_SIZE (prev->mode) > > + && word_size > GET_MODE_SIZE (mode)) > > + { > > + /* First generate subreg of word mode if the previous mode is > > + wider than word mode and word mode is wider than MODE. */ > > + prev_rtx = simplify_gen_subreg (word_mode, prev_rtx, > > + prev_mode, 0); > > + prev_mode = word_mode; > > + } > > + if (prev_rtx != nullptr) > > + target = simplify_gen_subreg (mode, prev_rtx, prev_mode, 0); > > } > > + return target; > > +} > > + > > +/* Return the RTL of a register in MODE broadcasted from DATA. */ > > + > > +static rtx > > +gen_memset_broadcast (rtx data, scalar_int_mode mode) > > +{ > > + /* Skip if regno_reg_rtx isn't initialized. */ > > + if (!regno_reg_rtx) > > +return nullptr; > > + > > + rtx target = nullptr; > > + > > + unsigned int nunits = GET_MODE_SIZE (mode) / GET_MODE_SIZE (QImode); > > + machine_mode vector_mode; > > + if (!mode_for_vector (QImode, nunits).exists (_mode)) > > +gcc_unreachable (); > > Sorry, I realise it's a bit late to be raising this objection now, > but I don't think it's a good idea to use scalar integer modes as > a proxy for vector modes. In principle there's no reason why a > target has to define an integer mode for every vector mode. A target always defines the largest integer mode. > If we want the mode to be a vector then I think the by-pieces > infrastructure should be extended to support vectors directly, > rather than assuming that each piece can be represented as > a scalar_int_mode. > The current by-pieces infrastructure operates on scalar_int_mode. Only for memset, there is /* Callback routine for store_by_pieces. Return the RTL of a register containing GET_MODE_SIZE (MODE) consecutive copies of the unsigned char value given in the RTL register data. For example, if mode is 4 bytes wide, return the RTL for 0x01010101*data. If PREV isn't nullptr, it has the RTL info from the previous iteration. */ static rtx builtin_memset_gen_str (void *data, void *prevp, HOST_WIDE_INT offset ATTRIBUTE_UNUSED, scalar_int_mode mode) It is a broadcast. If a target can broadcast a byte to a wider integer, can you suggest a way to use it in the current by-pieces infrastructure? Thanks. > Thanks, > Richard > > > + > > + enum insn_code icode = optab_handler (vec_duplicate_optab, > > + vector_mode); > > + if (icode != CODE_FOR_nothing) > > +{ > > + rtx reg = targetm.gen_memset_scratch_rtx
Re: [PATCH] Rewrite memset expanders with vec_duplicate
"H.J. Lu via Gcc-patches" writes: > 1. Rewrite builtin_memset_read_str and builtin_memset_gen_str with > vec_duplicate_optab to duplicate QI value to TI/OI/XI value. > 2. Add TARGET_GEN_MEMSET_SCRATCH_RTX to allow the backend to use a hard > scratch register to avoid stack realignment when expanding memset. > > PR middle-end/90773 > * builtins.c (gen_memset_value_from_prev): New function. > (gen_memset_broadcast): Likewise. > (builtin_memset_read_str): Use gen_memset_value_from_prev > and gen_memset_broadcast. > (builtin_memset_gen_str): Likewise. > * target.def (gen_memset_scratch_rtx): New hook. > * doc/tm.texi.in: Add TARGET_GEN_MEMSET_SCRATCH_RTX. > * doc/tm.texi: Regenerated. > --- > gcc/builtins.c | 123 + > gcc/doc/tm.texi| 5 ++ > gcc/doc/tm.texi.in | 2 + > gcc/target.def | 7 +++ > 4 files changed, 116 insertions(+), 21 deletions(-) > > diff --git a/gcc/builtins.c b/gcc/builtins.c > index 39ab139b7e1..c1758ae2efc 100644 > --- a/gcc/builtins.c > +++ b/gcc/builtins.c > @@ -6686,26 +6686,111 @@ expand_builtin_strncpy (tree exp, rtx target) >return NULL_RTX; > } > > -/* Callback routine for store_by_pieces. Read GET_MODE_BITSIZE (MODE) > - bytes from constant string DATA + OFFSET and return it as target > - constant. If PREV isn't nullptr, it has the RTL info from the > +/* Return the RTL of a register in MODE generated from PREV in the > previous iteration. */ > > -rtx > -builtin_memset_read_str (void *data, void *prevp, > - HOST_WIDE_INT offset ATTRIBUTE_UNUSED, > - scalar_int_mode mode) > +static rtx > +gen_memset_value_from_prev (void *prevp, scalar_int_mode mode) > { > + rtx target = nullptr; >by_pieces_prev *prev = (by_pieces_prev *) prevp; >if (prev != nullptr && prev->data != nullptr) > { >/* Use the previous data in the same mode. */ >if (prev->mode == mode) > return prev->data; > + > + rtx prev_rtx = prev->data; > + machine_mode prev_mode = prev->mode; > + unsigned int word_size = GET_MODE_SIZE (word_mode); > + if (word_size < GET_MODE_SIZE (prev->mode) > + && word_size > GET_MODE_SIZE (mode)) > + { > + /* First generate subreg of word mode if the previous mode is > + wider than word mode and word mode is wider than MODE. */ > + prev_rtx = simplify_gen_subreg (word_mode, prev_rtx, > + prev_mode, 0); > + prev_mode = word_mode; > + } > + if (prev_rtx != nullptr) > + target = simplify_gen_subreg (mode, prev_rtx, prev_mode, 0); > } > + return target; > +} > + > +/* Return the RTL of a register in MODE broadcasted from DATA. */ > + > +static rtx > +gen_memset_broadcast (rtx data, scalar_int_mode mode) > +{ > + /* Skip if regno_reg_rtx isn't initialized. */ > + if (!regno_reg_rtx) > +return nullptr; > + > + rtx target = nullptr; > + > + unsigned int nunits = GET_MODE_SIZE (mode) / GET_MODE_SIZE (QImode); > + machine_mode vector_mode; > + if (!mode_for_vector (QImode, nunits).exists (_mode)) > +gcc_unreachable (); Sorry, I realise it's a bit late to be raising this objection now, but I don't think it's a good idea to use scalar integer modes as a proxy for vector modes. In principle there's no reason why a target has to define an integer mode for every vector mode. If we want the mode to be a vector then I think the by-pieces infrastructure should be extended to support vectors directly, rather than assuming that each piece can be represented as a scalar_int_mode. Thanks, Richard > + > + enum insn_code icode = optab_handler (vec_duplicate_optab, > + vector_mode); > + if (icode != CODE_FOR_nothing) > +{ > + rtx reg = targetm.gen_memset_scratch_rtx (vector_mode); > + if (CONST_INT_P (data)) > + { > + /* Use the move expander with CONST_VECTOR. */ > + rtx const_vec = gen_const_vec_duplicate (vector_mode, data); > + emit_move_insn (reg, const_vec); > + } > + else > + { > + > + class expand_operand ops[2]; > + create_output_operand ([0], reg, vector_mode); > + create_input_operand ([1], data, QImode); > + expand_insn (icode, 2, ops); > + if (!rtx_equal_p (reg, ops[0].value)) > + emit_move_insn (reg, ops[0].value); > + } > + target = lowpart_subreg (mode, reg, vector_mode); > +} > + > + return target; > +} > + > +/* Callback routine for store_by_pieces. Read GET_MODE_BITSIZE (MODE) > + bytes from constant string DATA + OFFSET and return it as target > + constant. If PREV isn't nullptr, it has the RTL info from the > + previous iteration. */ > > +rtx > +builtin_memset_read_str (void *data, void *prev, > + HOST_WIDE_INT offset ATTRIBUTE_UNUSED, > + scalar_int_mode
[PATCH] Rewrite memset expanders with vec_duplicate
1. Rewrite builtin_memset_read_str and builtin_memset_gen_str with vec_duplicate_optab to duplicate QI value to TI/OI/XI value. 2. Add TARGET_GEN_MEMSET_SCRATCH_RTX to allow the backend to use a hard scratch register to avoid stack realignment when expanding memset. PR middle-end/90773 * builtins.c (gen_memset_value_from_prev): New function. (gen_memset_broadcast): Likewise. (builtin_memset_read_str): Use gen_memset_value_from_prev and gen_memset_broadcast. (builtin_memset_gen_str): Likewise. * target.def (gen_memset_scratch_rtx): New hook. * doc/tm.texi.in: Add TARGET_GEN_MEMSET_SCRATCH_RTX. * doc/tm.texi: Regenerated. --- gcc/builtins.c | 123 + gcc/doc/tm.texi| 5 ++ gcc/doc/tm.texi.in | 2 + gcc/target.def | 7 +++ 4 files changed, 116 insertions(+), 21 deletions(-) diff --git a/gcc/builtins.c b/gcc/builtins.c index 39ab139b7e1..c1758ae2efc 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -6686,26 +6686,111 @@ expand_builtin_strncpy (tree exp, rtx target) return NULL_RTX; } -/* Callback routine for store_by_pieces. Read GET_MODE_BITSIZE (MODE) - bytes from constant string DATA + OFFSET and return it as target - constant. If PREV isn't nullptr, it has the RTL info from the +/* Return the RTL of a register in MODE generated from PREV in the previous iteration. */ -rtx -builtin_memset_read_str (void *data, void *prevp, -HOST_WIDE_INT offset ATTRIBUTE_UNUSED, -scalar_int_mode mode) +static rtx +gen_memset_value_from_prev (void *prevp, scalar_int_mode mode) { + rtx target = nullptr; by_pieces_prev *prev = (by_pieces_prev *) prevp; if (prev != nullptr && prev->data != nullptr) { /* Use the previous data in the same mode. */ if (prev->mode == mode) return prev->data; + + rtx prev_rtx = prev->data; + machine_mode prev_mode = prev->mode; + unsigned int word_size = GET_MODE_SIZE (word_mode); + if (word_size < GET_MODE_SIZE (prev->mode) + && word_size > GET_MODE_SIZE (mode)) + { + /* First generate subreg of word mode if the previous mode is +wider than word mode and word mode is wider than MODE. */ + prev_rtx = simplify_gen_subreg (word_mode, prev_rtx, + prev_mode, 0); + prev_mode = word_mode; + } + if (prev_rtx != nullptr) + target = simplify_gen_subreg (mode, prev_rtx, prev_mode, 0); } + return target; +} + +/* Return the RTL of a register in MODE broadcasted from DATA. */ + +static rtx +gen_memset_broadcast (rtx data, scalar_int_mode mode) +{ + /* Skip if regno_reg_rtx isn't initialized. */ + if (!regno_reg_rtx) +return nullptr; + + rtx target = nullptr; + + unsigned int nunits = GET_MODE_SIZE (mode) / GET_MODE_SIZE (QImode); + machine_mode vector_mode; + if (!mode_for_vector (QImode, nunits).exists (_mode)) +gcc_unreachable (); + + enum insn_code icode = optab_handler (vec_duplicate_optab, + vector_mode); + if (icode != CODE_FOR_nothing) +{ + rtx reg = targetm.gen_memset_scratch_rtx (vector_mode); + if (CONST_INT_P (data)) + { + /* Use the move expander with CONST_VECTOR. */ + rtx const_vec = gen_const_vec_duplicate (vector_mode, data); + emit_move_insn (reg, const_vec); + } + else + { + + class expand_operand ops[2]; + create_output_operand ([0], reg, vector_mode); + create_input_operand ([1], data, QImode); + expand_insn (icode, 2, ops); + if (!rtx_equal_p (reg, ops[0].value)) + emit_move_insn (reg, ops[0].value); + } + target = lowpart_subreg (mode, reg, vector_mode); +} + + return target; +} + +/* Callback routine for store_by_pieces. Read GET_MODE_BITSIZE (MODE) + bytes from constant string DATA + OFFSET and return it as target + constant. If PREV isn't nullptr, it has the RTL info from the + previous iteration. */ +rtx +builtin_memset_read_str (void *data, void *prev, +HOST_WIDE_INT offset ATTRIBUTE_UNUSED, +scalar_int_mode mode) +{ + rtx target; const char *c = (const char *) data; - char *p = XALLOCAVEC (char, GET_MODE_SIZE (mode)); + char *p; + + /* Don't use the previous value if size is 1. */ + if (GET_MODE_SIZE (mode) != 1) +{ + target = gen_memset_value_from_prev (prev, mode); + if (target != nullptr) + return target; + + p = XALLOCAVEC (char, GET_MODE_SIZE (QImode)); + memset (p, *c, GET_MODE_SIZE (QImode)); + rtx src = c_readstr (p, QImode); + target = gen_memset_broadcast (src, mode); + if (target != nullptr) + return target; +} + + p = XALLOCAVEC (char, GET_MODE_SIZE (mode)); memset (p, *c, GET_MODE_SIZE (mode));