Christophe Lyon <christophe.lyon....@gmail.com> writes:
> diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
> index 9c645722230..dd537ec1679 100644
> --- a/gcc/config/arm/arm-builtins.c
> +++ b/gcc/config/arm/arm-builtins.c
> @@ -1553,11 +1553,25 @@ arm_init_simd_builtin_types (void)
>        tree eltype = arm_simd_types[i].eltype;
>        machine_mode mode = arm_simd_types[i].mode;
>  
> -      if (eltype == NULL)
> +      if (eltype == NULL
> +       /* VECTOR_BOOL is not supported unless MVE is activated, this would
> +          make build_truth_vector_type_for_mode crash.  */
> +       && ((GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)
> +           ||!TARGET_HAVE_MVE))

For the record: this kind of thing wouldn't be OK in aarch64,
since there we should allow a target to be selected later.
But I agree that here it's valid, since TARGET_HAVE_MVE already
decides whether arm_neon.h or arm_mve.h builtins are registered.

Formatting nit though: missing space after “||”.

>       continue;
>        if (arm_simd_types[i].itype == NULL)
>       {
> -       tree type = build_vector_type (eltype, GET_MODE_NUNITS (mode));
> +       tree type;
> +       if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
> +         {
> +           /* Handle MVE predicates: they are internally stored as 16 bits,
> +              but are used as vectors of 1, 2 or 4-bit elements.  */
> +           type = build_truth_vector_type_for_mode (GET_MODE_NUNITS (mode), 
> mode);

Formatting nit: line too long.

OK with those changes, thanks.

Richard

> +           eltype = TREE_TYPE (type);
> +         }
> +       else
> +         type = build_vector_type (eltype, GET_MODE_NUNITS (mode));
> +
>         type = build_distinct_type_copy (type);
>         SET_TYPE_STRUCTURAL_EQUALITY (type);
>  
> @@ -1695,6 +1709,11 @@ arm_init_builtin (unsigned int fcode, 
> arm_builtin_datum *d,
>        if (qualifiers & qualifier_map_mode)
>       op_mode = d->mode;
>  
> +      /* MVE Predicates use HImode as mandated by the ABI: pred16_t is 
> unsigned
> +      short.  */
> +      if (qualifiers & qualifier_predicate)
> +     op_mode = HImode;
> +
>        /* For pointers, we want a pointer to the basic type
>        of the vector.  */
>        if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode))
> @@ -2939,6 +2958,11 @@ arm_expand_builtin_args (rtx target, machine_mode 
> map_mode, int fcode,
>           case ARG_BUILTIN_COPY_TO_REG:
>             if (POINTER_TYPE_P (TREE_TYPE (arg[argc])))
>               op[argc] = convert_memory_address (Pmode, op[argc]);
> +
> +           /* MVE uses mve_pred16_t (aka HImode) for vectors of predicates.  
> */
> +           if (GET_MODE_CLASS (mode[argc]) == MODE_VECTOR_BOOL)
> +             op[argc] = gen_lowpart (mode[argc], op[argc]);
> +
>             /*gcc_assert (GET_MODE (op[argc]) == mode[argc]); */
>             if (!(*insn_data[icode].operand[opno].predicate)
>                 (op[argc], mode[argc]))
> @@ -3144,6 +3168,13 @@ constant_arg:
>    else
>      emit_insn (insn);
>  
> +  if (GET_MODE_CLASS (tmode) == MODE_VECTOR_BOOL)
> +    {
> +      rtx HItarget = gen_reg_rtx (HImode);
> +      emit_move_insn (HItarget, gen_lowpart (HImode, target));
> +      return HItarget;
> +    }
> +
>    return target;
>  }
>  
> diff --git a/gcc/config/arm/arm-builtins.h b/gcc/config/arm/arm-builtins.h
> index e5130d6d286..a8ef8aef82d 100644
> --- a/gcc/config/arm/arm-builtins.h
> +++ b/gcc/config/arm/arm-builtins.h
> @@ -84,7 +84,9 @@ enum arm_type_qualifiers
>    qualifier_lane_pair_index = 0x1000,
>    /* Lane indices selected in quadtuplets - must be within range of previous
>       argument = a vector.  */
> -  qualifier_lane_quadtup_index = 0x2000
> +  qualifier_lane_quadtup_index = 0x2000,
> +  /* MVE vector predicates.  */
> +  qualifier_predicate = 0x4000
>  };
>  
>  struct arm_simd_type_info
> diff --git a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def
> index de689c8b45e..9ed0cd042c5 100644
> --- a/gcc/config/arm/arm-modes.def
> +++ b/gcc/config/arm/arm-modes.def
> @@ -84,6 +84,14 @@ VECTOR_MODE (FLOAT, BF, 2);   /*                 V2BF.  */
>  VECTOR_MODE (FLOAT, BF, 4);   /*              V4BF.  */
>  VECTOR_MODE (FLOAT, BF, 8);   /*              V8BF.  */
>  
> +/* Predicates for MVE.  */
> +BOOL_MODE (B2I, 2, 1);
> +BOOL_MODE (B4I, 4, 1);
> +
> +VECTOR_BOOL_MODE (V16BI, 16, BI, 2);
> +VECTOR_BOOL_MODE (V8BI, 8, B2I, 2);
> +VECTOR_BOOL_MODE (V4BI, 4, B4I, 2);
> +
>  /* Fraction and accumulator vector modes.  */
>  VECTOR_MODES (FRACT, 4);      /* V4QQ  V2HQ */
>  VECTOR_MODES (UFRACT, 4);     /* V4UQQ V2UHQ */
> diff --git a/gcc/config/arm/arm-simd-builtin-types.def 
> b/gcc/config/arm/arm-simd-builtin-types.def
> index 6ba6f211531..d1d6416dad1 100644
> --- a/gcc/config/arm/arm-simd-builtin-types.def
> +++ b/gcc/config/arm/arm-simd-builtin-types.def
> @@ -51,3 +51,7 @@
>    ENTRY (Bfloat16x2_t, V2BF, none, 32, bfloat16, 20)
>    ENTRY (Bfloat16x4_t, V4BF, none, 64, bfloat16, 20)
>    ENTRY (Bfloat16x8_t, V8BF, none, 128, bfloat16, 20)
> +
> +  ENTRY (Pred1x16_t, V16BI, predicate, 16, pred1, 16)
> +  ENTRY (Pred2x8_t, V8BI, predicate, 8, pred1, 15)
> +  ENTRY (Pred4x4_t, V4BI, predicate, 4, pred1, 15)
> diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
> index feeee16d320..5bf7d37cfa6 100644
> --- a/gcc/emit-rtl.c
> +++ b/gcc/emit-rtl.c
> @@ -6239,9 +6239,22 @@ init_emit_once (void)
>  
>    /* For BImode, 1 and -1 are unsigned and signed interpretations
>       of the same value.  */
> -  const_tiny_rtx[0][(int) BImode] = const0_rtx;
> -  const_tiny_rtx[1][(int) BImode] = const_true_rtx;
> -  const_tiny_rtx[3][(int) BImode] = const_true_rtx;
> +  for (mode = MIN_MODE_BOOL;
> +       mode <= MAX_MODE_BOOL;
> +       mode = (machine_mode)((int)(mode) + 1))
> +    {
> +      const_tiny_rtx[0][(int) mode] = const0_rtx;
> +      if (mode == BImode)
> +     {
> +       const_tiny_rtx[1][(int) mode] = const_true_rtx;
> +       const_tiny_rtx[3][(int) mode] = const_true_rtx;
> +     }
> +      else
> +     {
> +       const_tiny_rtx[1][(int) mode] = const1_rtx;
> +       const_tiny_rtx[3][(int) mode] = constm1_rtx;
> +     }
> +    }
>  
>    for (mode = MIN_MODE_PARTIAL_INT;
>         mode <= MAX_MODE_PARTIAL_INT;
> @@ -6260,13 +6273,16 @@ init_emit_once (void)
>        const_tiny_rtx[0][(int) mode] = gen_rtx_CONCAT (mode, inner, inner);
>      }
>  
> -  /* As for BImode, "all 1" and "all -1" are unsigned and signed
> -     interpretations of the same value.  */
>    FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_BOOL)
>      {
>        const_tiny_rtx[0][(int) mode] = gen_const_vector (mode, 0);
>        const_tiny_rtx[3][(int) mode] = gen_const_vector (mode, 3);
> -      const_tiny_rtx[1][(int) mode] = const_tiny_rtx[3][(int) mode];
> +      if (GET_MODE_INNER (mode) == BImode)
> +     /* As for BImode, "all 1" and "all -1" are unsigned and signed
> +        interpretations of the same value.  */
> +     const_tiny_rtx[1][(int) mode] = const_tiny_rtx[3][(int) mode];
> +      else
> +     const_tiny_rtx[1][(int) mode] = gen_const_vector (mode, 1);
>      }
>  
>    FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_INT)
> diff --git a/gcc/genmodes.c b/gcc/genmodes.c
> index 6001b854547..5881abd846c 100644
> --- a/gcc/genmodes.c
> +++ b/gcc/genmodes.c
> @@ -78,6 +78,7 @@ struct mode_data
>    bool need_bytesize_adj;    /* true if this mode needs dynamic size
>                                  adjustment */
>    unsigned int int_n;                /* If nonzero, then __int<INT_N> will 
> be defined */
> +  bool boolean;
>  };
>  
>  static struct mode_data *modes[MAX_MODE_CLASS];
> @@ -88,7 +89,8 @@ static const struct mode_data blank_mode = {
>    0, "<unknown>", MAX_MODE_CLASS,
>    0, -1U, -1U, -1U, -1U,
>    0, 0, 0, 0, 0, 0,
> -  "<unknown>", 0, 0, 0, 0, false, false, 0
> +  "<unknown>", 0, 0, 0, 0, false, false, 0,
> +  false
>  };
>  
>  static htab_t modes_by_name;
> @@ -456,7 +458,7 @@ make_complex_modes (enum mode_class cl,
>        size_t m_len;
>  
>        /* Skip BImode.  FIXME: BImode probably shouldn't be MODE_INT.  */
> -      if (m->precision == 1)
> +      if (m->boolean)
>       continue;
>  
>        m_len = strlen (m->name);
> @@ -528,7 +530,7 @@ make_vector_modes (enum mode_class cl, const char 
> *prefix, unsigned int width,
>        not be necessary.  */
>        if (cl == MODE_FLOAT && m->bytesize == 1)
>       continue;
> -      if (cl == MODE_INT && m->precision == 1)
> +      if (m->boolean)
>       continue;
>  
>        if ((size_t) snprintf (buf, sizeof buf, "%s%u%s", prefix,
> @@ -548,17 +550,18 @@ make_vector_modes (enum mode_class cl, const char 
> *prefix, unsigned int width,
>  
>  /* Create a vector of booleans called NAME with COUNT elements and
>     BYTESIZE bytes in total.  */
> -#define VECTOR_BOOL_MODE(NAME, COUNT, BYTESIZE) \
> -  make_vector_bool_mode (#NAME, COUNT, BYTESIZE, __FILE__, __LINE__)
> +#define VECTOR_BOOL_MODE(NAME, COUNT, COMPONENT, BYTESIZE)           \
> +  make_vector_bool_mode (#NAME, COUNT, #COMPONENT, BYTESIZE,         \
> +                      __FILE__, __LINE__)
>  static void ATTRIBUTE_UNUSED
>  make_vector_bool_mode (const char *name, unsigned int count,
> -                    unsigned int bytesize, const char *file,
> -                    unsigned int line)
> +                    const char *component, unsigned int bytesize,
> +                    const char *file, unsigned int line)
>  {
> -  struct mode_data *m = find_mode ("BI");
> +  struct mode_data *m = find_mode (component);
>    if (!m)
>      {
> -      error ("%s:%d: no mode \"BI\"", file, line);
> +      error ("%s:%d: no mode \"%s\"", file, line, component);
>        return;
>      }
>  
> @@ -596,6 +599,20 @@ make_int_mode (const char *name,
>    m->precision = precision;
>  }
>  
> +#define BOOL_MODE(N, B, Y) \
> +  make_bool_mode (#N, B, Y, __FILE__, __LINE__)
> +
> +static void
> +make_bool_mode (const char *name,
> +             unsigned int precision, unsigned int bytesize,
> +             const char *file, unsigned int line)
> +{
> +  struct mode_data *m = new_mode (MODE_INT, name, file, line);
> +  m->bytesize = bytesize;
> +  m->precision = precision;
> +  m->boolean = true;
> +}
> +
>  #define OPAQUE_MODE(N, B)                    \
>    make_opaque_mode (#N, -1U, B, __FILE__, __LINE__)
>  
> @@ -1298,9 +1315,21 @@ enum machine_mode\n{");
>        /* Don't use BImode for MIN_MODE_INT, since otherwise the middle
>        end will try to use it for bitfields in structures and the
>        like, which we do not want.  Only the target md file should
> -      generate BImode widgets.  */
> -      if (first && first->precision == 1 && c == MODE_INT)
> -     first = first->next;
> +      generate BImode widgets.  Since some targets such as ARM/MVE
> +      define boolean modes with multiple bits, handle those too.  */
> +      if (first && first->boolean)
> +     {
> +       struct mode_data *last_bool = first;
> +       printf ("  MIN_MODE_BOOL = E_%smode,\n", first->name);
> +
> +       while (first && first->boolean)
> +         {
> +           last_bool = first;
> +           first = first->next;
> +         }
> +
> +       printf ("  MAX_MODE_BOOL = E_%smode,\n\n", last_bool->name);
> +     }
>  
>        if (first && last)
>       printf ("  MIN_%s = E_%smode,\n  MAX_%s = E_%smode,\n\n",
> @@ -1679,15 +1708,15 @@ emit_class_narrowest_mode (void)
>    print_decl ("unsigned char", "class_narrowest_mode", "MAX_MODE_CLASS");
>  
>    for (c = 0; c < MAX_MODE_CLASS; c++)
> -    /* Bleah, all this to get the comment right for MIN_MODE_INT.  */
> -    tagged_printf ("MIN_%s", mode_class_names[c],
> -                modes[c]
> -                ? ((c != MODE_INT || modes[c]->precision != 1)
> -                   ? modes[c]->name
> -                   : (modes[c]->next
> -                      ? modes[c]->next->name
> -                      : void_mode->name))
> -                : void_mode->name);
> +    {
> +      /* Bleah, all this to get the comment right for MIN_MODE_INT.  */
> +      struct mode_data *m = modes[c];
> +      while (m && m->boolean)
> +     m = m->next;
> +      const char *comment_name = (m ? m : void_mode)->name;
> +
> +      tagged_printf ("MIN_%s", mode_class_names[c], comment_name);
> +    }
>  
>    print_closer ();
>  }
> diff --git a/gcc/machmode.def b/gcc/machmode.def
> index 866a2082d01..533cf6ab4b2 100644
> --- a/gcc/machmode.def
> +++ b/gcc/machmode.def
> @@ -146,12 +146,13 @@ along with GCC; see the file COPYING3.  If not see
>       Like VECTOR_MODES, but start the mode names with PREFIX instead
>       of the usual "V".
>  
> -     VECTOR_BOOL_MODE (NAME, COUNT, BYTESIZE)
> +     VECTOR_BOOL_MODE (NAME, COUNT, COMPONENT, BYTESIZE)
>          Create a vector mode called NAME that contains COUNT boolean
>          elements and occupies BYTESIZE bytes in total.  Each boolean
> -        element occupies (COUNT * BITS_PER_UNIT) / BYTESIZE bits, with
> -        the element at index 0 occupying the lsb of the first byte in
> -        memory.  Only the lowest bit of each element is significant.
> +        element is of COMPONENT type and occupies (COUNT * BITS_PER_UNIT) /
> +        BYTESIZE bits, with the element at index 0 occupying the lsb of the
> +        first byte in memory.  Only the lowest bit of each element is
> +        significant.
>  
>       OPAQUE_MODE (NAME, BYTESIZE)
>          Create an opaque mode called NAME that is BYTESIZE bytes wide.
> @@ -196,7 +197,7 @@ RANDOM_MODE (VOID);
>  RANDOM_MODE (BLK);
>  
>  /* Single bit mode used for booleans.  */
> -FRACTIONAL_INT_MODE (BI, 1, 1);
> +BOOL_MODE (BI, 1, 1);
>  
>  /* Basic integer modes.  We go up to TI in generic code (128 bits).
>     TImode is needed here because the some front ends now genericly
> diff --git a/gcc/rtx-vector-builder.c b/gcc/rtx-vector-builder.c
> index e36aba010a0..55ffe0d5a76 100644
> --- a/gcc/rtx-vector-builder.c
> +++ b/gcc/rtx-vector-builder.c
> @@ -90,8 +90,10 @@ rtx_vector_builder::find_cached_value ()
>  
>    if (GET_MODE_CLASS (m_mode) == MODE_VECTOR_BOOL)
>      {
> -      if (elt == const1_rtx || elt == constm1_rtx)
> +      if (elt == const1_rtx)
>       return CONST1_RTX (m_mode);
> +      else if (elt == constm1_rtx)
> +     return CONSTM1_RTX (m_mode);
>        else if (elt == const0_rtx)
>       return CONST0_RTX (m_mode);
>        else
> diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
> index c36c825f958..532537ea48d 100644
> --- a/gcc/simplify-rtx.c
> +++ b/gcc/simplify-rtx.c
> @@ -6876,12 +6876,13 @@ native_encode_rtx (machine_mode mode, rtx x, 
> vec<target_unit> &bytes,
>         /* This is the only case in which elements can be smaller than
>            a byte.  */
>         gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
> +       auto mask = GET_MODE_MASK (GET_MODE_INNER (mode));
>         for (unsigned int i = 0; i < num_bytes; ++i)
>           {
>             target_unit value = 0;
>             for (unsigned int j = 0; j < BITS_PER_UNIT; j += elt_bits)
>               {
> -               value |= (INTVAL (CONST_VECTOR_ELT (x, elt)) & 1) << j;
> +               value |= (INTVAL (CONST_VECTOR_ELT (x, elt)) & mask) << j;
>                 elt += 1;
>               }
>             bytes.quick_push (value);
> @@ -7025,9 +7026,8 @@ native_decode_vector_rtx (machine_mode mode, const 
> vec<target_unit> &bytes,
>         unsigned int bit_index = first_byte * BITS_PER_UNIT + i * elt_bits;
>         unsigned int byte_index = bit_index / BITS_PER_UNIT;
>         unsigned int lsb = bit_index % BITS_PER_UNIT;
> -       builder.quick_push (bytes[byte_index] & (1 << lsb)
> -                           ? CONST1_RTX (BImode)
> -                           : CONST0_RTX (BImode));
> +       unsigned int value = bytes[byte_index] >> lsb;
> +       builder.quick_push (gen_int_mode (value, GET_MODE_INNER (mode)));
>       }
>      }
>    else
> @@ -7994,17 +7994,23 @@ test_vector_ops_duplicate (machine_mode mode, rtx 
> scalar_reg)
>                                                   duplicate, last_par));
>  
>        /* Test a scalar subreg of a VEC_MERGE of a VEC_DUPLICATE.  */
> -      rtx vector_reg = make_test_reg (mode);
> -      for (unsigned HOST_WIDE_INT i = 0; i < const_nunits; i++)
> +      /* Skip this test for vectors of booleans, because offset is in bytes,
> +      while vec_merge indices are in elements (usually bits).  */
> +      if (GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL)
>       {
> -       if (i >= HOST_BITS_PER_WIDE_INT)
> -         break;
> -       rtx mask = GEN_INT ((HOST_WIDE_INT_1U << i) | (i + 1));
> -       rtx vm = gen_rtx_VEC_MERGE (mode, duplicate, vector_reg, mask);
> -       poly_uint64 offset = i * GET_MODE_SIZE (inner_mode);
> -       ASSERT_RTX_EQ (scalar_reg,
> -                      simplify_gen_subreg (inner_mode, vm,
> -                                           mode, offset));
> +       rtx vector_reg = make_test_reg (mode);
> +       for (unsigned HOST_WIDE_INT i = 0; i < const_nunits; i++)
> +         {
> +           if (i >= HOST_BITS_PER_WIDE_INT)
> +             break;
> +           rtx mask = GEN_INT ((HOST_WIDE_INT_1U << i) | (i + 1));
> +           rtx vm = gen_rtx_VEC_MERGE (mode, duplicate, vector_reg, mask);
> +           poly_uint64 offset = i * GET_MODE_SIZE (inner_mode);
> +
> +           ASSERT_RTX_EQ (scalar_reg,
> +                          simplify_gen_subreg (inner_mode, vm,
> +                                               mode, offset));
> +         }
>       }
>      }
>  
> diff --git a/gcc/varasm.c b/gcc/varasm.c
> index 76574be191f..5f59b6ace15 100644
> --- a/gcc/varasm.c
> +++ b/gcc/varasm.c
> @@ -4085,6 +4085,7 @@ output_constant_pool_2 (fixed_size_mode mode, rtx x, 
> unsigned int align)
>       unsigned int elt_bits = GET_MODE_BITSIZE (mode) / nelts;
>       unsigned int int_bits = MAX (elt_bits, BITS_PER_UNIT);
>       scalar_int_mode int_mode = int_mode_for_size (int_bits, 0).require ();
> +     unsigned int mask = GET_MODE_MASK (GET_MODE_INNER (mode));
>  
>       /* Build the constant up one integer at a time.  */
>       unsigned int elts_per_int = int_bits / elt_bits;
> @@ -4093,8 +4094,10 @@ output_constant_pool_2 (fixed_size_mode mode, rtx x, 
> unsigned int align)
>           unsigned HOST_WIDE_INT value = 0;
>           unsigned int limit = MIN (nelts - i, elts_per_int);
>           for (unsigned int j = 0; j < limit; ++j)
> -           if (INTVAL (CONST_VECTOR_ELT (x, i + j)) != 0)
> -             value |= 1 << (j * elt_bits);
> +         {
> +           auto elt = INTVAL (CONST_VECTOR_ELT (x, i + j));
> +           value |= (elt & mask) << (j * elt_bits);
> +         }
>           output_constant_pool_2 (int_mode, gen_int_mode (value, int_mode),
>                                   i != 0 ? MIN (align, int_bits) : align);
>         }

Reply via email to