From: Pan Li <pan2...@intel.com>

Update in v2:

* Fix one ICE of type assertion.
* Adjust some test cases for aarch64 sve and riscv vector.

Original log:

The vectoriable_call has one restriction of the size of data type.
Aka DF to DI is allowed but SF to DI isn't. You may see below message
when try to vectorize function call like lrintf.

void
test_lrintf (long *out, float *in, unsigned count)
{
  for (unsigned i = 0; i < count; i++)
    out[i] = __builtin_lrintf (in[i]);
}

lrintf.c:5:26: missed: couldn't vectorize loop
lrintf.c:5:26: missed: not vectorized: unsupported data-type

Then the standard name pattern like lrintmn2 cannot work for different
data type size like SF => DI. This patch would like to remove this data
type size check and unblock the standard name like lrintmn2.

The below test are passed for this patch.

* The x86 bootstrap and regression test.
* The aarch64 regression test.
* The risc-v regression tests.

gcc/ChangeLog:

        * internal-fn.cc (expand_fn_using_insn): Add vector int assertion.
        * tree-vect-stmts.cc (vectorizable_call): Remove size check.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/sve/clrsb_1.c: Adjust checker.
        * gcc.target/aarch64/sve/clz_1.c: Ditto.
        * gcc.target/aarch64/sve/popcount_1.c: Ditto.
        * gcc.target/riscv/rvv/autovec/unop/popcount.c: Ditto.

Signed-off-by: Pan Li <pan2...@intel.com>
---
 gcc/internal-fn.cc                                  |  3 ++-
 gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c      |  3 +--
 gcc/testsuite/gcc.target/aarch64/sve/clz_1.c        |  3 +--
 gcc/testsuite/gcc.target/aarch64/sve/popcount_1.c   |  3 +--
 .../gcc.target/riscv/rvv/autovec/unop/popcount.c    |  2 +-
 gcc/tree-vect-stmts.cc                              | 13 -------------
 6 files changed, 6 insertions(+), 21 deletions(-)

diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index 61d5a9e4772..17c0f4c3805 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -281,7 +281,8 @@ expand_fn_using_insn (gcall *stmt, insn_code icode, 
unsigned int noutputs,
        emit_move_insn (lhs_rtx, ops[0].value);
       else
        {
-         gcc_checking_assert (INTEGRAL_TYPE_P (TREE_TYPE (lhs)));
+         gcc_checking_assert (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
+                              || VECTOR_INTEGER_TYPE_P (TREE_TYPE (lhs)));
          convert_move (lhs_rtx, ops[0].value, 0);
        }
     }
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c
index bdc9856faaf..940d08bbc7b 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clrsb_1.c
@@ -18,5 +18,4 @@ clrsb_64 (unsigned int *restrict dst, uint64_t *restrict src, 
int size)
 }
 
 /* { dg-final { scan-assembler-times {\tcls\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tcls\tz[0-9]+\.d, p[0-7]/m, 
z[0-9]+\.d\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tcls\tz[0-9]+\.d, p[0-7]/m, 
z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c
index 0c7a4e6d768..58b8ff406d2 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/clz_1.c
@@ -18,5 +18,4 @@ clz_64 (unsigned int *restrict dst, uint64_t *restrict src, 
int size)
 }
 
 /* { dg-final { scan-assembler-times {\tclz\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tclz\tz[0-9]+\.d, p[0-7]/m, 
z[0-9]+\.d\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tclz\tz[0-9]+\.d, p[0-7]/m, 
z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/popcount_1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/popcount_1.c
index dfb6f4ac7a5..0eba898307c 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/popcount_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/popcount_1.c
@@ -18,5 +18,4 @@ popcount_64 (unsigned int *restrict dst, uint64_t *restrict 
src, int size)
 }
 
 /* { dg-final { scan-assembler-times {\tcnt\tz[0-9]+\.s, p[0-7]/m, 
z[0-9]+\.s\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tcnt\tz[0-9]+\.d, p[0-7]/m, 
z[0-9]+\.d\n} 2 } } */
-/* { dg-final { scan-assembler-times {\tuzp1\tz[0-9]+\.s, z[0-9]+\.s, 
z[0-9]+\.s\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tcnt\tz[0-9]+\.d, p[0-7]/m, 
z[0-9]+\.d\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/popcount.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/popcount.c
index 585a522aa81..e6e3c70f927 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/popcount.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/popcount.c
@@ -1461,4 +1461,4 @@ main ()
   RUN_ALL ()
 }
 
-/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 229 "vect" } } */
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 384 "vect" } } */
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index a9200767f67..fa4ca0634e8 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -3361,19 +3361,6 @@ vectorizable_call (vec_info *vinfo,
 
       return false;
     }
-  /* FORNOW: we don't yet support mixtures of vector sizes for calls,
-     just mixtures of nunits.  E.g. DI->SI versions of __builtin_ctz*
-     are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
-     by a pack of the two vectors into an SI vector.  We would need
-     separate code to handle direct VnDI->VnSI IFN_CTZs.  */
-  if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
-    {
-      if (dump_enabled_p ())
-       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                        "mismatched vector sizes %T and %T\n",
-                        vectype_in, vectype_out);
-      return false;
-    }
 
   if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
       != VECTOR_BOOLEAN_TYPE_P (vectype_in))
-- 
2.34.1

Reply via email to