[PATCH 1/3 v3] vect: generate suitable convert insn for int -> int, float -> float and int <-> float.

2024-06-10 Thread Hu, Lin1
I wrap a part of code about indirect conversion. The API refers to 
supportable_narrowing/widening_operations.

BRs,
Lin

gcc/ChangeLog:

PR target/107432
* tree-vect-generic.cc
(expand_vector_conversion): Support convert for int -> int,
float -> float and int <-> float.
* tree-vect-stmts.cc (vectorizable_conversion): Wrap the
indirect convert part.
(supportable_indirect_convert_operation): New function.
* tree-vectorizer.h (supportable_indirect_convert_operation):
Define the new function.

gcc/testsuite/ChangeLog:

PR target/107432
* gcc.target/i386/pr107432-1.c: New test.
* gcc.target/i386/pr107432-2.c: Ditto.
* gcc.target/i386/pr107432-3.c: Ditto.
* gcc.target/i386/pr107432-4.c: Ditto.
* gcc.target/i386/pr107432-5.c: Ditto.
* gcc.target/i386/pr107432-6.c: Ditto.
* gcc.target/i386/pr107432-7.c: Ditto.
---
 gcc/testsuite/gcc.target/i386/pr107432-1.c | 234 
 gcc/testsuite/gcc.target/i386/pr107432-2.c | 105 +
 gcc/testsuite/gcc.target/i386/pr107432-3.c |  55 +
 gcc/testsuite/gcc.target/i386/pr107432-4.c |  56 +
 gcc/testsuite/gcc.target/i386/pr107432-5.c |  72 ++
 gcc/testsuite/gcc.target/i386/pr107432-6.c | 139 
 gcc/testsuite/gcc.target/i386/pr107432-7.c | 156 +
 gcc/tree-vect-generic.cc   |  33 ++-
 gcc/tree-vect-stmts.cc | 244 +
 gcc/tree-vectorizer.h  |   9 +
 10 files changed, 1011 insertions(+), 92 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-7.c

diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c 
b/gcc/testsuite/gcc.target/i386/pr107432-1.c
new file mode 100644
index 000..a4f37447eb4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c
@@ -0,0 +1,234 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx512bw -mavx512vl -O3" } */
+/* { dg-final { scan-assembler-times "vpmovqd" 6 } } */
+/* { dg-final { scan-assembler-times "vpmovqw" 6 } } */
+/* { dg-final { scan-assembler-times "vpmovqb" 6 } } */
+/* { dg-final { scan-assembler-times "vpmovdw" 6 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
+
+#include 
+
+typedef short __v2hi __attribute__ ((__vector_size__ (4)));
+typedef char __v2qi __attribute__ ((__vector_size__ (2)));
+typedef char __v4qi __attribute__ ((__vector_size__ (4)));
+typedef char __v8qi __attribute__ ((__vector_size__ (8)));
+
+typedef unsigned short __v2hu __attribute__ ((__vector_size__ (4)));
+typedef unsigned short __v4hu __attribute__ ((__vector_size__ (8)));
+typedef unsigned char __v2qu __attribute__ ((__vector_size__ (2)));
+typedef unsigned char __v4qu __attribute__ ((__vector_size__ (4)));
+typedef unsigned char __v8qu __attribute__ ((__vector_size__ (8)));
+typedef unsigned int __v2su __attribute__ ((__vector_size__ (8)));
+
+__v2si mm_cvtepi64_epi32_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2di)a, __v2si);
+}
+
+__m128imm256_cvtepi64_epi32_builtin_convertvector(__m256i a)
+{
+  return (__m128i)__builtin_convertvector((__v4di)a, __v4si);
+}
+
+__m256imm512_cvtepi64_epi32_builtin_convertvector(__m512i a)
+{
+  return (__m256i)__builtin_convertvector((__v8di)a, __v8si);
+}
+
+__v2hi mm_cvtepi64_epi16_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2di)a, __v2hi);
+}
+
+__v4hi mm256_cvtepi64_epi16_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v4di)a, __v4hi);
+}
+
+__m128imm512_cvtepi64_epi16_builtin_convertvector(__m512i a)
+{
+  return (__m128i)__builtin_convertvector((__v8di)a, __v8hi);
+}
+
+__v2qi mm_cvtepi64_epi8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2di)a, __v2qi);
+}
+
+__v4qi mm256_cvtepi64_epi8_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v4di)a, __v4qi);
+}
+
+__v8qi mm512_cvtepi64_epi8_builtin_convertvector(__m512i a)
+{
+  return __builtin_convertvector((__v8di)a, __v8qi);
+}
+
+__v2hi mm64_cvtepi32_epi16_builtin_convertvector(__v2si a)
+{
+  return __builtin_convertvector((__v2si)a, __v2hi);
+}
+
+__v4hi mm_cvtepi32_epi16_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v4si)a, __v4hi);
+}
+
+__m1

RE: [PATCH 1/3 v3] vect: generate suitable convert insn for int -> int, float -> float and int <-> float.

2024-06-16 Thread Hu, Lin1
Ping this thread.

BRs,
Lin

-Original Message-
From: Hu, Lin1  
Sent: Tuesday, June 11, 2024 2:49 PM
To: gcc-patches@gcc.gnu.org
Cc: Liu, Hongtao ; ubiz...@gmail.com; rguent...@suse.de
Subject: [PATCH 1/3 v3] vect: generate suitable convert insn for int -> int, 
float -> float and int <-> float.

I wrap a part of code about indirect conversion. The API refers to 
supportable_narrowing/widening_operations.

BRs,
Lin

gcc/ChangeLog:

PR target/107432
* tree-vect-generic.cc
(expand_vector_conversion): Support convert for int -> int,
float -> float and int <-> float.
* tree-vect-stmts.cc (vectorizable_conversion): Wrap the
indirect convert part.
(supportable_indirect_convert_operation): New function.
* tree-vectorizer.h (supportable_indirect_convert_operation):
Define the new function.

gcc/testsuite/ChangeLog:

PR target/107432
* gcc.target/i386/pr107432-1.c: New test.
* gcc.target/i386/pr107432-2.c: Ditto.
* gcc.target/i386/pr107432-3.c: Ditto.
* gcc.target/i386/pr107432-4.c: Ditto.
* gcc.target/i386/pr107432-5.c: Ditto.
* gcc.target/i386/pr107432-6.c: Ditto.
* gcc.target/i386/pr107432-7.c: Ditto.
---
 gcc/testsuite/gcc.target/i386/pr107432-1.c | 234   
gcc/testsuite/gcc.target/i386/pr107432-2.c | 105 +  
gcc/testsuite/gcc.target/i386/pr107432-3.c |  55 +  
gcc/testsuite/gcc.target/i386/pr107432-4.c |  56 +  
gcc/testsuite/gcc.target/i386/pr107432-5.c |  72 ++  
gcc/testsuite/gcc.target/i386/pr107432-6.c | 139   
gcc/testsuite/gcc.target/i386/pr107432-7.c | 156 +
 gcc/tree-vect-generic.cc   |  33 ++-
 gcc/tree-vect-stmts.cc | 244 +
 gcc/tree-vectorizer.h  |   9 +
 10 files changed, 1011 insertions(+), 92 deletions(-)  create mode 100644 
gcc/testsuite/gcc.target/i386/pr107432-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-7.c

diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c 
b/gcc/testsuite/gcc.target/i386/pr107432-1.c
new file mode 100644
index 000..a4f37447eb4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c
@@ -0,0 +1,234 @@
+/* { dg-do compile } */
+/* { dg-options "-march=x86-64 -mavx512bw -mavx512vl -O3" } */
+/* { dg-final { scan-assembler-times "vpmovqd" 6 } } */
+/* { dg-final { scan-assembler-times "vpmovqw" 6 } } */
+/* { dg-final { scan-assembler-times "vpmovqb" 6 } } */
+/* { dg-final { scan-assembler-times "vpmovdw" 6 { target { ia32 } } } 
+} */
+/* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } 
+} */
+/* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } 
+} } */
+/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
+
+#include 
+
+typedef short __v2hi __attribute__ ((__vector_size__ (4))); typedef 
+char __v2qi __attribute__ ((__vector_size__ (2))); typedef char __v4qi 
+__attribute__ ((__vector_size__ (4))); typedef char __v8qi 
+__attribute__ ((__vector_size__ (8)));
+
+typedef unsigned short __v2hu __attribute__ ((__vector_size__ (4))); 
+typedef unsigned short __v4hu __attribute__ ((__vector_size__ (8))); 
+typedef unsigned char __v2qu __attribute__ ((__vector_size__ (2))); 
+typedef unsigned char __v4qu __attribute__ ((__vector_size__ (4))); 
+typedef unsigned char __v8qu __attribute__ ((__vector_size__ (8))); 
+typedef unsigned int __v2su __attribute__ ((__vector_size__ (8)));
+
+__v2si mm_cvtepi64_epi32_builtin_convertvector(__m128i a) {
+  return __builtin_convertvector((__v2di)a, __v2si); }
+
+__m128imm256_cvtepi64_epi32_builtin_convertvector(__m256i a)
+{
+  return (__m128i)__builtin_convertvector((__v4di)a, __v4si); }
+
+__m256imm512_cvtepi64_epi32_builtin_convertvector(__m512i a)
+{
+  return (__m256i)__builtin_convertvector((__v8di)a, __v8si); }
+
+__v2hi mm_cvtepi64_epi16_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2di)a, __v2hi); }
+
+__v4hi mm256_cvtepi64_epi16_builtin_convertvector(__m256i a)
+{
+  return __builtin_convertvector((__v4di)a, __v4hi); }
+
+__m128imm512_cvtepi64_epi16_builtin_convertvector(__m512i a)
+{
+  return (__m128i)__builtin_convertvector((__v8di)a, __v8hi); }
+
+__v2qi mm_cvtepi64_epi8_builtin_convertvector(__m128i a)
+{
+  return __builtin_convertvector((__v2di)a, __v2qi); }
+
+__v4qi mm256_cvtepi64_epi8_builtin_convertvector(__m256i

Re: [PATCH 1/3 v3] vect: generate suitable convert insn for int -> int, float -> float and int <-> float.

2024-06-18 Thread Richard Biener
On Tue, 11 Jun 2024, Hu, Lin1 wrote:

> I wrap a part of code about indirect conversion. The API refers to 
> supportable_narrowing/widening_operations.

Sorry for the delay - comments inline.

> BRs,
> Lin
> 
> gcc/ChangeLog:
> 
>   PR target/107432
>   * tree-vect-generic.cc
>   (expand_vector_conversion): Support convert for int -> int,
>   float -> float and int <-> float.
>   * tree-vect-stmts.cc (vectorizable_conversion): Wrap the
>   indirect convert part.
>   (supportable_indirect_convert_operation): New function.
>   * tree-vectorizer.h (supportable_indirect_convert_operation):
>   Define the new function.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR target/107432
>   * gcc.target/i386/pr107432-1.c: New test.
>   * gcc.target/i386/pr107432-2.c: Ditto.
>   * gcc.target/i386/pr107432-3.c: Ditto.
>   * gcc.target/i386/pr107432-4.c: Ditto.
>   * gcc.target/i386/pr107432-5.c: Ditto.
>   * gcc.target/i386/pr107432-6.c: Ditto.
>   * gcc.target/i386/pr107432-7.c: Ditto.
> ---
>  gcc/testsuite/gcc.target/i386/pr107432-1.c | 234 
>  gcc/testsuite/gcc.target/i386/pr107432-2.c | 105 +
>  gcc/testsuite/gcc.target/i386/pr107432-3.c |  55 +
>  gcc/testsuite/gcc.target/i386/pr107432-4.c |  56 +
>  gcc/testsuite/gcc.target/i386/pr107432-5.c |  72 ++
>  gcc/testsuite/gcc.target/i386/pr107432-6.c | 139 
>  gcc/testsuite/gcc.target/i386/pr107432-7.c | 156 +
>  gcc/tree-vect-generic.cc   |  33 ++-
>  gcc/tree-vect-stmts.cc | 244 +
>  gcc/tree-vectorizer.h  |   9 +
>  10 files changed, 1011 insertions(+), 92 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-4.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-5.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-6.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr107432-7.c
> 
> diff --git a/gcc/testsuite/gcc.target/i386/pr107432-1.c 
> b/gcc/testsuite/gcc.target/i386/pr107432-1.c
> new file mode 100644
> index 000..a4f37447eb4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr107432-1.c
> @@ -0,0 +1,234 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=x86-64 -mavx512bw -mavx512vl -O3" } */
> +/* { dg-final { scan-assembler-times "vpmovqd" 6 } } */
> +/* { dg-final { scan-assembler-times "vpmovqw" 6 } } */
> +/* { dg-final { scan-assembler-times "vpmovqb" 6 } } */
> +/* { dg-final { scan-assembler-times "vpmovdw" 6 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovdw" 8 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovdb" 6 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovdb" 8 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpmovwb" 8 } } */
> +
> +#include 
> +
> +typedef short __v2hi __attribute__ ((__vector_size__ (4)));
> +typedef char __v2qi __attribute__ ((__vector_size__ (2)));
> +typedef char __v4qi __attribute__ ((__vector_size__ (4)));
> +typedef char __v8qi __attribute__ ((__vector_size__ (8)));
> +
> +typedef unsigned short __v2hu __attribute__ ((__vector_size__ (4)));
> +typedef unsigned short __v4hu __attribute__ ((__vector_size__ (8)));
> +typedef unsigned char __v2qu __attribute__ ((__vector_size__ (2)));
> +typedef unsigned char __v4qu __attribute__ ((__vector_size__ (4)));
> +typedef unsigned char __v8qu __attribute__ ((__vector_size__ (8)));
> +typedef unsigned int __v2su __attribute__ ((__vector_size__ (8)));
> +
> +__v2si mm_cvtepi64_epi32_builtin_convertvector(__m128i a)
> +{
> +  return __builtin_convertvector((__v2di)a, __v2si);
> +}
> +
> +__m128i  mm256_cvtepi64_epi32_builtin_convertvector(__m256i a)
> +{
> +  return (__m128i)__builtin_convertvector((__v4di)a, __v4si);
> +}
> +
> +__m256i  mm512_cvtepi64_epi32_builtin_convertvector(__m512i a)
> +{
> +  return (__m256i)__builtin_convertvector((__v8di)a, __v8si);
> +}
> +
> +__v2hi   mm_cvtepi64_epi16_builtin_convertvector(__m128i a)
> +{
> +  return __builtin_convertvector((__v2di)a, __v2hi);
> +}
> +
> +__v4hi   mm256_cvtepi64_epi16_builtin_convertvector(__m256i a)
> +{
> +  return __builtin_convertvector((__v4di)a, __v4hi);
> +}
> +
> +__m128i  mm512_cvtepi64_epi16_builtin_convertvector(__m512i a)
> +{
> +  return (__m128i)__builtin_convertvector((__v8di)a, __v8hi);
> +}
> +
> +__v2qi   mm_cvtepi64_epi8_builtin_convertvector(__m128i a)
> +{
> +  return __builtin_convertvector((__v2di)a, __v2qi);
> +}
> +
> +__v4qi   mm256_cvtepi64_epi8_builtin_convertvector(__m256i a)
> +{
> +  return __builtin_convertvector((__v4di)a, __v4qi);
> +}
> +
> +__v8qi   mm512_cvtepi64_epi8_builtin_convertvector(__m512i a)
> 

RE: [PATCH 1/3 v3] vect: generate suitable convert insn for int -> int, float -> float and int <-> float.

2024-06-20 Thread Hu, Lin1
> >else if (ret_elt_bits > arg_elt_bits)
> >  modifier = WIDEN;
> >
> > +  if (supportable_convert_operation (code, ret_type, arg_type, &code1))
> > +{
> > +  g = gimple_build_assign (lhs, code1, arg);
> > +  gsi_replace (gsi, g, false);
> > +  return;
> > +}
> 
> Given the API change I suggest below it might make sense to have
> supportable_indirect_convert_operation do the above and represent it as 
> single-
> step conversion?
>

OK, if you want to supportable_indirect_convert_operation can do something like 
supportable_convert_operation, I'll give it a try. This functionality is really 
the part that this function can cover. But this would require some changes not 
only the API change, because supportable_indirect_convert_operation originally 
only supported Float -> Int or Int ->Float.
 
>
> > +  code_helper code2 = ERROR_MARK, code3 = ERROR_MARK;
> > +  int multi_step_cvt = 0;
> > +  vec interm_types = vNULL;
> > +  if (supportable_indirect_convert_operation (NULL,
> > + code,
> > + ret_type, arg_type,
> > + &code2, &code3,
> > + &multi_step_cvt,
> > + &interm_types, arg))
> > +{
> > +  new_rhs = make_ssa_name (interm_types[0]);
> > +  g = gimple_build_assign (new_rhs, (tree_code) code3, arg);
> > +  gsi_insert_before (gsi, g, GSI_SAME_STMT);
> > +  g = gimple_build_assign (lhs, (tree_code) code2, new_rhs);
> > +  gsi_replace (gsi, g, false);
> > +  return;
> > +}
> > +
> >if (modifier == NONE && (code == FIX_TRUNC_EXPR || code ==
> FLOAT_EXPR))
> >  {
> > -  if (supportable_convert_operation (code, ret_type, arg_type, &code1))
> > -   {
> > - g = gimple_build_assign (lhs, code1, arg);
> > - gsi_replace (gsi, g, false);
> > - return;
> > -   }
> >/* Can't use get_compute_type here, as supportable_convert_operation
> >  doesn't necessarily use an optab and needs two arguments.  */
> >tree vec_compute_type
> > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index
> > 05a169ecb2d..0aa608202ca 100644
> > --- a/gcc/tree-vect-stmts.cc
> > +++ b/gcc/tree-vect-stmts.cc
> > @@ -5175,7 +5175,7 @@ vectorizable_conversion (vec_info *vinfo,
> >tree scalar_dest;
> >tree op0, op1 = NULL_TREE;
> >loop_vec_info loop_vinfo = dyn_cast  (vinfo);
> > -  tree_code tc1, tc2;
> > +  tree_code tc1;
> >code_helper code, code1, code2;
> >code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
> >tree new_temp;
> > @@ -5384,92 +5384,17 @@ vectorizable_conversion (vec_info *vinfo,
> > break;
> >}
> >
> > -  /* For conversions between float and integer types try whether
> > -we can use intermediate signed integer types to support the
> > -conversion.  */
> > -  if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
> > - && (code == FLOAT_EXPR ||
> > - (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
> > -   {
> > - bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE
> (lhs_mode);
> > - bool float_expr_p = code == FLOAT_EXPR;
> > - unsigned short target_size;
> > - scalar_mode intermediate_mode;
> > - if (demotion)
> > -   {
> > - intermediate_mode = lhs_mode;
> > - target_size = GET_MODE_SIZE (rhs_mode);
> > -   }
> > - else
> > -   {
> > - target_size = GET_MODE_SIZE (lhs_mode);
> > - if (!int_mode_for_size
> > - (GET_MODE_BITSIZE (rhs_mode), 0).exists
> (&intermediate_mode))
> > -   goto unsupported;
> > -   }
> > - code1 = float_expr_p ? code : NOP_EXPR;
> > - codecvt1 = float_expr_p ? NOP_EXPR : code;
> > - opt_scalar_mode mode_iter;
> > - FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
> > -   {
> > - intermediate_mode = mode_iter.require ();
> > -
> > - if (GET_MODE_SIZE (intermediate_mode) > target_size)
> > -   break;
> > -
> > - scalar_mode cvt_mode;
> > - if (!int_mode_for_size
> > - (GET_MODE_BITSIZE (intermediate_mode), 0).exists
> (&cvt_mode))
> > -   break;
> > -
> > - cvt_type = build_nonstandard_integer_type
> > -   (GET_MODE_BITSIZE (cvt_mode), 0);
> > -
> > - /* Check if the intermediate type can hold OP0's range.
> > -When converting from float to integer this is not necessary
> > -because values that do not fit the (smaller) target type are
> > -unspecified anyway.  */
> > - if (demotion && float_expr_p)
> > -   {
> > - wide_int op_min_value, op_max_value;
> > - if (!vect_get_range_info (op0, &op_min_value,
> &op_max_value))
> > -   break;
> > -
> > - if (cvt_type == NULL_TREE
> > - || (wi::min_precision (op_max_v

RE: [PATCH 1/3 v3] vect: generate suitable convert insn for int -> int, float -> float and int <-> float.

2024-06-24 Thread Richard Biener
On Thu, 20 Jun 2024, Hu, Lin1 wrote:

> > >else if (ret_elt_bits > arg_elt_bits)
> > >  modifier = WIDEN;
> > >
> > > +  if (supportable_convert_operation (code, ret_type, arg_type, &code1))
> > > +{
> > > +  g = gimple_build_assign (lhs, code1, arg);
> > > +  gsi_replace (gsi, g, false);
> > > +  return;
> > > +}
> > 
> > Given the API change I suggest below it might make sense to have
> > supportable_indirect_convert_operation do the above and represent it as 
> > single-
> > step conversion?
> >
> 
> OK, if you want to supportable_indirect_convert_operation can do 
> something like supportable_convert_operation, I'll give it a try. This 
> functionality is really the part that this function can cover. But this 
> would require some changes not only the API change, because 
> supportable_indirect_convert_operation originally only supported Float 
> -> Int or Int ->Float.

I think I'd like to see a single API to handle direct and
(multi-)indirect-level converts that operate on vectors with all
the same number of lanes.

> >
> > > +  code_helper code2 = ERROR_MARK, code3 = ERROR_MARK;
> > > +  int multi_step_cvt = 0;
> > > +  vec interm_types = vNULL;
> > > +  if (supportable_indirect_convert_operation (NULL,
> > > +   code,
> > > +   ret_type, arg_type,
> > > +   &code2, &code3,
> > > +   &multi_step_cvt,
> > > +   &interm_types, arg))
> > > +{
> > > +  new_rhs = make_ssa_name (interm_types[0]);
> > > +  g = gimple_build_assign (new_rhs, (tree_code) code3, arg);
> > > +  gsi_insert_before (gsi, g, GSI_SAME_STMT);
> > > +  g = gimple_build_assign (lhs, (tree_code) code2, new_rhs);
> > > +  gsi_replace (gsi, g, false);
> > > +  return;
> > > +}
> > > +
> > >if (modifier == NONE && (code == FIX_TRUNC_EXPR || code ==
> > FLOAT_EXPR))
> > >  {
> > > -  if (supportable_convert_operation (code, ret_type, arg_type, 
> > > &code1))
> > > - {
> > > -   g = gimple_build_assign (lhs, code1, arg);
> > > -   gsi_replace (gsi, g, false);
> > > -   return;
> > > - }
> > >/* Can't use get_compute_type here, as 
> > > supportable_convert_operation
> > >doesn't necessarily use an optab and needs two arguments.  */
> > >tree vec_compute_type
> > > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index
> > > 05a169ecb2d..0aa608202ca 100644
> > > --- a/gcc/tree-vect-stmts.cc
> > > +++ b/gcc/tree-vect-stmts.cc
> > > @@ -5175,7 +5175,7 @@ vectorizable_conversion (vec_info *vinfo,
> > >tree scalar_dest;
> > >tree op0, op1 = NULL_TREE;
> > >loop_vec_info loop_vinfo = dyn_cast  (vinfo);
> > > -  tree_code tc1, tc2;
> > > +  tree_code tc1;
> > >code_helper code, code1, code2;
> > >code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
> > >tree new_temp;
> > > @@ -5384,92 +5384,17 @@ vectorizable_conversion (vec_info *vinfo,
> > >   break;
> > >}
> > >
> > > -  /* For conversions between float and integer types try whether
> > > -  we can use intermediate signed integer types to support the
> > > -  conversion.  */
> > > -  if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
> > > -   && (code == FLOAT_EXPR ||
> > > -   (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
> > > - {
> > > -   bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE
> > (lhs_mode);
> > > -   bool float_expr_p = code == FLOAT_EXPR;
> > > -   unsigned short target_size;
> > > -   scalar_mode intermediate_mode;
> > > -   if (demotion)
> > > - {
> > > -   intermediate_mode = lhs_mode;
> > > -   target_size = GET_MODE_SIZE (rhs_mode);
> > > - }
> > > -   else
> > > - {
> > > -   target_size = GET_MODE_SIZE (lhs_mode);
> > > -   if (!int_mode_for_size
> > > -   (GET_MODE_BITSIZE (rhs_mode), 0).exists
> > (&intermediate_mode))
> > > - goto unsupported;
> > > - }
> > > -   code1 = float_expr_p ? code : NOP_EXPR;
> > > -   codecvt1 = float_expr_p ? NOP_EXPR : code;
> > > -   opt_scalar_mode mode_iter;
> > > -   FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
> > > - {
> > > -   intermediate_mode = mode_iter.require ();
> > > -
> > > -   if (GET_MODE_SIZE (intermediate_mode) > target_size)
> > > - break;
> > > -
> > > -   scalar_mode cvt_mode;
> > > -   if (!int_mode_for_size
> > > -   (GET_MODE_BITSIZE (intermediate_mode), 0).exists
> > (&cvt_mode))
> > > - break;
> > > -
> > > -   cvt_type = build_nonstandard_integer_type
> > > - (GET_MODE_BITSIZE (cvt_mode), 0);
> > > -
> > > -   /* Check if the intermediate type can hold OP0's range.
> > > -  When converting from float to integer this is not necessary
> > > -  because values that do not fit the (smaller) target type are
> > > -  unspecified anywa

RE: [PATCH 1/3 v3] vect: generate suitable convert insn for int -> int, float -> float and int <-> float.

2024-06-24 Thread Tamar Christina
> -Original Message-
> From: Richard Biener 
> Sent: Monday, June 24, 2024 1:34 PM
> To: Hu, Lin1 
> Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao ;
> ubiz...@gmail.com
> Subject: RE: [PATCH 1/3 v3] vect: generate suitable convert insn for int -> 
> int, float
> -> float and int <-> float.
> 
> On Thu, 20 Jun 2024, Hu, Lin1 wrote:
> 
> > > >else if (ret_elt_bits > arg_elt_bits)
> > > >  modifier = WIDEN;
> > > >
> > > > +  if (supportable_convert_operation (code, ret_type, arg_type, &code1))
> > > > +{
> > > > +  g = gimple_build_assign (lhs, code1, arg);
> > > > +  gsi_replace (gsi, g, false);
> > > > +  return;
> > > > +}
> > >
> > > Given the API change I suggest below it might make sense to have
> > > supportable_indirect_convert_operation do the above and represent it as
> single-
> > > step conversion?
> > >
> >
> > OK, if you want to supportable_indirect_convert_operation can do
> > something like supportable_convert_operation, I'll give it a try. This
> > functionality is really the part that this function can cover. But this
> > would require some changes not only the API change, because
> > supportable_indirect_convert_operation originally only supported Float
> > -> Int or Int ->Float.
> 
> I think I'd like to see a single API to handle direct and
> (multi-)indirect-level converts that operate on vectors with all
> the same number of lanes.
> 
> > >
> > > > +  code_helper code2 = ERROR_MARK, code3 = ERROR_MARK;
> > > > +  int multi_step_cvt = 0;
> > > > +  vec interm_types = vNULL;
> > > > +  if (supportable_indirect_convert_operation (NULL,
> > > > + code,
> > > > + ret_type, arg_type,
> > > > + &code2, &code3,
> > > > + &multi_step_cvt,
> > > > + &interm_types, arg))
> > > > +{
> > > > +  new_rhs = make_ssa_name (interm_types[0]);
> > > > +  g = gimple_build_assign (new_rhs, (tree_code) code3, arg);
> > > > +  gsi_insert_before (gsi, g, GSI_SAME_STMT);
> > > > +  g = gimple_build_assign (lhs, (tree_code) code2, new_rhs);
> > > > +  gsi_replace (gsi, g, false);
> > > > +  return;
> > > > +}
> > > > +
> > > >if (modifier == NONE && (code == FIX_TRUNC_EXPR || code ==
> > > FLOAT_EXPR))
> > > >  {
> > > > -  if (supportable_convert_operation (code, ret_type, arg_type, 
> > > > &code1))
> > > > -   {
> > > > - g = gimple_build_assign (lhs, code1, arg);
> > > > - gsi_replace (gsi, g, false);
> > > > - return;
> > > > -   }
> > > >/* Can't use get_compute_type here, as 
> > > > supportable_convert_operation
> > > >  doesn't necessarily use an optab and needs two arguments.  */
> > > >tree vec_compute_type
> > > > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index
> > > > 05a169ecb2d..0aa608202ca 100644
> > > > --- a/gcc/tree-vect-stmts.cc
> > > > +++ b/gcc/tree-vect-stmts.cc
> > > > @@ -5175,7 +5175,7 @@ vectorizable_conversion (vec_info *vinfo,
> > > >tree scalar_dest;
> > > >tree op0, op1 = NULL_TREE;
> > > >loop_vec_info loop_vinfo = dyn_cast  (vinfo);
> > > > -  tree_code tc1, tc2;
> > > > +  tree_code tc1;
> > > >code_helper code, code1, code2;
> > > >code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
> > > >tree new_temp;
> > > > @@ -5384,92 +5384,17 @@ vectorizable_conversion (vec_info *vinfo,
> > > > break;
> > > >}
> > > >
> > > > -  /* For conversions between float and integer types try whether
> > > > -we can use intermediate signed integer types to support the
> > > > -conversion.  */
> > > > -  if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
> > > > - && (code == FLOAT_EXPR ||
> > > > - (code == FIX_TRUNC_EXPR && !flag_trapping_m

RE: [PATCH 1/3 v3] vect: generate suitable convert insn for int -> int, float -> float and int <-> float.

2024-06-24 Thread Hu, Lin1
> -Original Message-
> From: Tamar Christina 
> Sent: Monday, June 24, 2024 10:12 PM
> To: Richard Biener ; Hu, Lin1 
> Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao ;
> ubiz...@gmail.com
> Subject: RE: [PATCH 1/3 v3] vect: generate suitable convert insn for int -> 
> int,
> float -> float and int <-> float.
> 
> > -Original Message-
> > From: Richard Biener 
> > Sent: Monday, June 24, 2024 1:34 PM
> > To: Hu, Lin1 
> > Cc: gcc-patches@gcc.gnu.org; Liu, Hongtao ;
> > ubiz...@gmail.com
> > Subject: RE: [PATCH 1/3 v3] vect: generate suitable convert insn for
> > int -> int, float
> > -> float and int <-> float.
> >
> > On Thu, 20 Jun 2024, Hu, Lin1 wrote:
> >
> > > > >else if (ret_elt_bits > arg_elt_bits)
> > > > >  modifier = WIDEN;
> > > > >
> > > > > +  if (supportable_convert_operation (code, ret_type, arg_type, 
> > > > > &code1))
> > > > > +{
> > > > > +  g = gimple_build_assign (lhs, code1, arg);
> > > > > +  gsi_replace (gsi, g, false);
> > > > > +  return;
> > > > > +}
> > > >
> > > > Given the API change I suggest below it might make sense to have
> > > > supportable_indirect_convert_operation do the above and represent
> > > > it as
> > single-
> > > > step conversion?
> > > >
> > >
> > > OK, if you want to supportable_indirect_convert_operation can do
> > > something like supportable_convert_operation, I'll give it a try.
> > > This functionality is really the part that this function can cover.
> > > But this would require some changes not only the API change, because
> > > supportable_indirect_convert_operation originally only supported
> > > Float
> > > -> Int or Int ->Float.
> >
> > I think I'd like to see a single API to handle direct and
> > (multi-)indirect-level converts that operate on vectors with all the
> > same number of lanes.
> >
> > > >
> > > > > +  code_helper code2 = ERROR_MARK, code3 = ERROR_MARK;
> > > > > +  int multi_step_cvt = 0;
> > > > > +  vec interm_types = vNULL;
> > > > > +  if (supportable_indirect_convert_operation (NULL,
> > > > > +   code,
> > > > > +   ret_type, arg_type,
> > > > > +   &code2, &code3,
> > > > > +   &multi_step_cvt,
> > > > > +   &interm_types, arg))
> > > > > +{
> > > > > +  new_rhs = make_ssa_name (interm_types[0]);
> > > > > +  g = gimple_build_assign (new_rhs, (tree_code) code3, arg);
> > > > > +  gsi_insert_before (gsi, g, GSI_SAME_STMT);
> > > > > +  g = gimple_build_assign (lhs, (tree_code) code2, new_rhs);
> > > > > +  gsi_replace (gsi, g, false);
> > > > > +  return;
> > > > > +}
> > > > > +
> > > > >if (modifier == NONE && (code == FIX_TRUNC_EXPR || code ==
> > > > FLOAT_EXPR))
> > > > >  {
> > > > > -  if (supportable_convert_operation (code, ret_type, arg_type,
> &code1))
> > > > > - {
> > > > > -   g = gimple_build_assign (lhs, code1, arg);
> > > > > -   gsi_replace (gsi, g, false);
> > > > > -   return;
> > > > > - }
> > > > >/* Can't use get_compute_type here, as
> supportable_convert_operation
> > > > >doesn't necessarily use an optab and needs two arguments.  */
> > > > >tree vec_compute_type
> > > > > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> > > > > index 05a169ecb2d..0aa608202ca 100644
> > > > > --- a/gcc/tree-vect-stmts.cc
> > > > > +++ b/gcc/tree-vect-stmts.cc
> > > > > @@ -5175,7 +5175,7 @@ vectorizable_conversion (vec_info *vinfo,
> > > > >tree scalar_dest;
> > > > >tree op0, op1 = NULL_TREE;
> > > > >loop_vec_info loop_vinfo = dyn_cast  (vinfo);
> > > > > -  tree_code tc1, tc2;
> > > > > +  tree_code tc1;
> > > > >code_helper code, code1, c