On Mon, 4 Dec 2023, Robin Dapp wrote:

> Hi,
> 
> this changes the vec_extract path of extract_bit_field to use QImode
> instead of BImode when extracting from mask vectors and changes
> GET_MODE_BITSIZE to GET_MODE_PRECISION.  This fixes an ICE on riscv
> where we did not find a vec_extract optab and continued with the generic
> code that requires 1-byte alignment that riscv mask modes do not
> provide.  Using QImode extraction makes this piece of code
> behave similarly as vectorizable_live_operation where we create
> a VEC_EXTRACT whose extraction mode expand_convert_optab_fn converts
> from <signed-boolean:1> to QImode by TYPE_MODE.
> 
> Apart from that it adds poly_int support to riscv's vec_extract
> expander and makes the RVV..BImode -> QImode expander call
> emit_vec_extract in order to not duplicate code.
> 
> Bootstrapped and regtested on aarch64 and x86.  Regtested on
> riscv64, still running on riscv32.
> 
> Regards
>  Robin
> 
> gcc/testsuite/ChangeLog:
> 
>       * gcc.target/riscv/rvv/autovec/partial/pr112773.c: New test.
> ---
>  gcc/config/riscv/autovec.md                   | 35 ++++++++++---------
>  gcc/config/riscv/riscv-protos.h               |  3 +-
>  gcc/config/riscv/riscv-v.cc                   | 14 ++++----
>  gcc/config/riscv/riscv.cc                     |  6 ++--
>  gcc/expmed.cc                                 | 24 ++++++++-----
>  .../riscv/rvv/autovec/partial/pr112773.c      | 20 +++++++++++
>  6 files changed, 68 insertions(+), 34 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/pr112773.c
> 
> diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
> index 2d727c2609b..3c4d68367f0 100644
> --- a/gcc/config/riscv/autovec.md
> +++ b/gcc/config/riscv/autovec.md
> @@ -1380,12 +1380,23 @@ (define_expand "vec_extract<mode><vel>"
>    rtx tmp = NULL_RTX;
>    if (operands[2] != const0_rtx)
>      {
> -      /* Emit the slide down to index 0 in a new vector.  */
> -      tmp = gen_reg_rtx (<MODE>mode);
> -      operands[2] = gen_lowpart (Pmode, operands[2]);
> -      rtx ops[] = {tmp, operands[1], operands[2]};
> -      riscv_vector::emit_vlmax_insn
> -     (code_for_pred_slide (UNSPEC_VSLIDEDOWN, <MODE>mode), 
> riscv_vector::BINARY_OP, ops);
> +      /* Properly convert a poly_int value and put the result into a
> +      register.  */
> +      if (CONST_POLY_INT_P (operands[2]))
> +     {
> +       rtx pos = gen_reg_rtx (Pmode);
> +       riscv_legitimize_poly_move (Pmode, pos, gen_reg_rtx (Pmode),
> +                                   operands[2]);
> +       operands[2] = pos;
> +     }
> +
> +    /* Emit the slide down to index 0 in a new vector.  */
> +    tmp = gen_reg_rtx (<MODE>mode);
> +    operands[2] = gen_lowpart (Pmode, operands[2]);
> +    rtx ops[] = {tmp, operands[1], operands[2]};
> +    riscv_vector::emit_vlmax_insn
> +      (code_for_pred_slide (UNSPEC_VSLIDEDOWN, <MODE>mode),
> +       riscv_vector::BINARY_OP, ops);
>      }
>  
>    /* Emit v(f)mv.[xf].s.  */
> @@ -1417,16 +1428,8 @@ (define_expand "vec_extract<mode>qi"
>    riscv_vector::emit_vlmax_insn (code_for_pred_merge (qimode),
>                                riscv_vector::MERGE_OP, ops1);
>  
> -  /* Slide down the requested byte element.  */
> -  rtx tmp2 = gen_reg_rtx (qimode);
> -
> -  rtx ops2[] = {tmp2, tmp1, operands[2]};
> -  riscv_vector::emit_vlmax_insn
> -    (code_for_pred_slide (UNSPEC_VSLIDEDOWN, qimode),
> -     riscv_vector::BINARY_OP, ops2);
> -
> -  /* Extract it.  */
> -  emit_insn (gen_pred_extract_first (qimode, operands[0], tmp2));
> +  /* Extract from it.  */
> +  riscv_vector::emit_vec_extract (operands[0], tmp1, operands[2]);
>    DONE;
>  })
>  
> diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
> index 695ee24ad6f..c02de84d6ef 100644
> --- a/gcc/config/riscv/riscv-protos.h
> +++ b/gcc/config/riscv/riscv-protos.h
> @@ -129,6 +129,7 @@ extern void riscv_asm_output_alias (FILE *, const tree, 
> const tree);
>  extern void riscv_asm_output_external (FILE *, const tree, const char *);
>  extern bool
>  riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int);
> +extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx);
>  
>  #ifdef RTX_CODE
>  extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool 
> *invert_ptr = 0);
> @@ -558,7 +559,7 @@ void expand_cond_binop (unsigned, rtx *);
>  void expand_cond_ternop (unsigned, rtx *);
>  void expand_popcount (rtx *);
>  void expand_rawmemchr (machine_mode, rtx, rtx, rtx);
> -void emit_vec_extract (rtx, rtx, poly_int64);
> +void emit_vec_extract (rtx, rtx, rtx);
>  
>  /* Rounding mode bitfield for fixed point VXRM.  */
>  enum fixed_point_rounding_mode
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index 588c127343e..430aae3dc69 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -3253,7 +3253,7 @@ shuffle_extract_and_slide1up_patterns (struct 
> expand_vec_perm_d *d)
>    /* Extract the last element of the first vector.  */
>    scalar_mode smode = GET_MODE_INNER (d->vmode);
>    rtx tmp = gen_reg_rtx (smode);
> -  emit_vec_extract (tmp, d->op0, nunits - 1);
> +  emit_vec_extract (tmp, d->op0, gen_int_mode (nunits - 1, Pmode));
>  
>    /* Insert the scalar into element 0.  */
>    unsigned int unspec
> @@ -4664,9 +4664,8 @@ can_be_broadcasted_p (rtx op)
>    return can_create_pseudo_p () && nonmemory_operand (op, mode);
>  }
>  
> -/* Helper function to emit vec_extract_optab.  */
>  void
> -emit_vec_extract (rtx target, rtx src, poly_int64 index)
> +emit_vec_extract (rtx target, rtx src, rtx index)
>  {
>    machine_mode vmode = GET_MODE (src);
>    machine_mode smode = GET_MODE (target);
> @@ -4677,10 +4676,13 @@ emit_vec_extract (rtx target, rtx src, poly_int64 
> index)
>    create_output_operand (&ops[0], target, smode);
>    ops[0].target = 1;
>    create_input_operand (&ops[1], src, vmode);
> -  if (index.is_constant ())
> -    create_integer_operand (&ops[2], index);
> +
> +  poly_int64 val;
> +  if (poly_int_rtx_p (index, &val))
> +    create_integer_operand (&ops[2], val);
>    else
> -    create_input_operand (&ops[2], gen_int_mode (index, Pmode), Pmode);
> +    create_input_operand (&ops[2], index, Pmode);
> +
>    expand_insn (icode, 3, ops);
>    if (ops[0].value != target)
>      emit_move_insn (target, ops[0].value);
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 3f111fa0393..84771ee98bc 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -431,7 +431,6 @@ static const struct riscv_tune_param 
> optimize_size_tune_info = {
>  static bool riscv_avoid_shrink_wrapping_separate ();
>  static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
>  static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *);
> -static void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx);
>  
>  /* Defining target-specific uses of __attribute__.  */
>  TARGET_GNU_ATTRIBUTES (riscv_attribute_table,
> @@ -2422,7 +2421,7 @@ riscv_expand_mult_with_const_int (machine_mode mode, 
> rtx dest, rtx multiplicand,
>  
>  /* Analyze src and emit const_poly_int mov sequence.  */
>  
> -static void
> +void
>  riscv_legitimize_poly_move (machine_mode mode, rtx dest, rtx tmp, rtx src)
>  {
>    poly_int64 value = rtx_to_poly_int64 (src);
> @@ -2637,7 +2636,8 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx 
> src)
>             else
>               result = gen_reg_rtx (smode);
>  
> -           riscv_vector::emit_vec_extract (result, v, index + i);
> +           riscv_vector::emit_vec_extract (result, v,
> +                                           gen_int_mode (index + i, Pmode));
>  
>             if (i == 1)
>               {
> diff --git a/gcc/expmed.cc b/gcc/expmed.cc
> index b294eabb08d..b59dc91ffed 100644
> --- a/gcc/expmed.cc
> +++ b/gcc/expmed.cc
> @@ -772,8 +772,8 @@ store_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, 
> poly_uint64 bitnum,
>        && !MEM_P (op0)
>        && optab_handler (vec_set_optab, outermode) != CODE_FOR_nothing
>        && fieldmode == innermode
> -      && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
> -      && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
> +      && known_eq (bitsize, GET_MODE_PRECISION (innermode))
> +      && multiple_p (bitnum, GET_MODE_PRECISION (innermode), &pos))
>      {
>        class expand_operand ops[3];
>        enum insn_code icode = optab_handler (vec_set_optab, outermode);
> @@ -1675,7 +1675,7 @@ extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, 
> poly_uint64 bitnum,
>    if (VECTOR_MODE_P (GET_MODE (op0))
>        && !MEM_P (op0)
>        && VECTOR_MODE_P (tmode)
> -      && known_eq (bitsize, GET_MODE_BITSIZE (tmode))
> +      && known_eq (bitsize, GET_MODE_PRECISION (tmode))
>        && maybe_gt (GET_MODE_SIZE (GET_MODE (op0)), GET_MODE_SIZE (tmode)))
>      {
>        machine_mode new_mode = GET_MODE (op0);
> @@ -1758,16 +1758,24 @@ extract_bit_field_1 (rtx str_rtx, poly_uint64 
> bitsize, poly_uint64 bitnum,
>    if (VECTOR_MODE_P (outermode) && !MEM_P (op0))
>      {
>        scalar_mode innermode = GET_MODE_INNER (outermode);
> +
> +      /* If we extract from a mask vector extract via QImode instead
> +      of BImode.  */
> +      scalar_mode extract_mode = innermode;
> +      if (GET_MODE_CLASS (outermode) == MODE_VECTOR_BOOL)
> +     extract_mode = QImode;
> +

But how do we know BI<N>mode fits in QImode?

I think the issue is more that we try to extract an element from
the mask vector?  How is element extraction defined for VLA vectors
anyway?  How can we be sure to not access out-of-bounds?

Richard.

>        enum insn_code icode
> -     = convert_optab_handler (vec_extract_optab, outermode, innermode);
> +     = convert_optab_handler (vec_extract_optab, outermode, extract_mode);
> +
>        poly_uint64 pos;
> -      if (icode != CODE_FOR_nothing
> -       && known_eq (bitsize, GET_MODE_BITSIZE (innermode))
> -       && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
> +      if ((icode != CODE_FOR_nothing
> +        && known_eq (bitsize, GET_MODE_PRECISION (innermode))
> +        && multiple_p (bitnum, GET_MODE_PRECISION (innermode), &pos)))
>       {
>         class expand_operand ops[3];
>  
> -       create_output_operand (&ops[0], target, innermode);
> +       create_output_operand (&ops[0], target, extract_mode);
>         ops[0].target = 1;
>         create_input_operand (&ops[1], op0, outermode);
>         create_integer_operand (&ops[2], pos);
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/pr112773.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/pr112773.c
> new file mode 100644
> index 00000000000..5f7374b0040
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/partial/pr112773.c
> @@ -0,0 +1,20 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gcv_zvl256b -mabi=ilp32d -O3" } */
> +
> +long long a;
> +int b, c;
> +int *d;
> +void e(unsigned f) {
> +  for (;; ++c)
> +    if (f) {
> +      a = 0;
> +      for (; a <= 3; a++) {
> +        f = 0;
> +        for (; f <= 0; f++)
> +          if ((long)a)
> +            break;
> +      }
> +      if (b)
> +        *d = f;
> +    }
> +}
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to