Re: [AArch64] Enable generation of FRINTNZ instructions

Andre Vieira (lists) via Gcc-patches Thu, 25 Nov 2021 05:53:21 -0800


On 22/11/2021 11:41, Richard Biener wrote:

On 18/11/2021 11:05, Richard Biener wrote:

This is a good shout and made me think about something I hadn't before... I
thought I could handle the vector forms later, but the problem is if I add
support for the scalar, it will stop the vectorizer. It seems
vectorizable_call expects all arguments to have the same type, which doesn't
work with passing the integer type as an operand work around.

We already special case some IFNs there (masked load/store and gather)
to ignore some args, so that would just add to this set.

Richard.

Hi,

Reworked it to add support of the new IFN to the vectorizer. Wasinitially trying to make vectorizable_call andvectorizable_internal_function handle IFNs with different inputs moregenerically, using the information we have in the <IFN>_direct structsregarding what operands to get the modes from. Unfortunately, thatwasn't straightforward because of how vectorizable_call assumes operandshave the same type and uses the type of the DEF_STMT_INFO of thenon-constant operands (either output operand or non-constant inputs) todetermine the type of constants. I assume there is some reason why weuse the DEF_STMT_INFO and not always use get_vectype_for_scalar_type onthe argument types. That is why I ended up with this sort of half-waymix of both, which still allows room to add more IFNs that don't takeinputs of the same type, but require adding a bit of special casingsimilar to the IFN_FTRUNC_INT and masking ones.


Bootstrapped on aarch64-none-linux.

OK for trunk?

gcc/ChangeLog:

* config/aarch64/aarch64.md (ftrunc<mode><frintnz_mode>2): Newpattern.

        * config/aarch64/iterators.md (FRINTNZ): New iterator.
        (frintnz_mode): New int attribute.
        (VSFDF): Make iterator conditional.
        * internal-fn.def (FTRUNC_INT): New IFN.
        * internal-fn.c (ftrunc_int_direct): New define.
        (expand_ftrunc_int_optab_fn): New custom expander.
        (direct_ftrunc_int_optab_supported_p): New supported_p.
        * match.pd: Add to the existing TRUNC pattern match.
        * optabs.def (ftrunc_int): New entry.
        * stor-layout.h (element_precision): Moved from here...
        * tree.h (element_precision): ... to here.
        (element_type): New declaration.
        * tree.c (element_type): New function.
        (element_precision): Changed to use element_type.

* tree-vect-stmts.c (vectorizable_internal_function): Addsupport for

        IFNs with different input types.
        (vectorizable_call): Teach to handle IFN_FTRUNC_INT.
        * doc/md.texi: New entry for ftrunc pattern name.
        * doc/sourcebuild.texi (aarch64_frintzx_ok): New target.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/merge_trunc1.c: Adapted to skip if frintNzinstruction available.

        * lib/target-supports.exp: Added arm_v8_5a_frintnzx_ok target.
        * gcc.target/aarch64/frintnz.c: New test.
        * gcc.target/aarch64/frintnz_vec.c: New test.

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 
4035e061706793849c68ae09bcb2e4b9580ab7b6..c5c60e7a810e22b0ea9ed6bf056ddd6431d60269
 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -7345,12 +7345,18 @@ (define_insn "despeculate_simpleti"
    (set_attr "speculation_barrier" "true")]
 )
 
+(define_expand "ftrunc<mode><frintnz_mode>2"
+  [(set (match_operand:VSFDF 0 "register_operand" "=w")
+        (unspec:VSFDF [(match_operand:VSFDF 1 "register_operand" "w")]
+                     FRINTNZ))]
+  "TARGET_FRINT"
+)
+
 (define_insn "aarch64_<frintnzs_op><mode>"
   [(set (match_operand:VSFDF 0 "register_operand" "=w")
        (unspec:VSFDF [(match_operand:VSFDF 1 "register_operand" "w")]
                      FRINTNZX))]
-  "TARGET_FRINT && TARGET_FLOAT
-   && !(VECTOR_MODE_P (<MODE>mode) && !TARGET_SIMD)"
+  "TARGET_FRINT"
   "<frintnzs_op>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
   [(set_attr "type" "f_rint<stype>")]
 )
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 
bdc8ba3576cf2c9b4ae96b45a382234e4e25b13f..51f00344b02d0d1d4adf97463f6a46f9fd0fb43f
 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -160,7 +160,11 @@ (define_mode_iterator VHSDF_HSDF [(V4HF 
"TARGET_SIMD_F16INST")
                                  SF DF])
 
 ;; Scalar and vetor modes for SF, DF.
-(define_mode_iterator VSFDF [V2SF V4SF V2DF DF SF])
+(define_mode_iterator VSFDF [ (V2SF "TARGET_SIMD")
+                             (V4SF "TARGET_SIMD")
+                             (V2DF "TARGET_SIMD")
+                             (DF "TARGET_FLOAT")
+                             (SF "TARGET_FLOAT")])
 
 ;; Advanced SIMD single Float modes.
 (define_mode_iterator VDQSF [V2SF V4SF])
@@ -3067,6 +3071,8 @@ (define_int_iterator FCMLA [UNSPEC_FCMLA
 (define_int_iterator FRINTNZX [UNSPEC_FRINT32Z UNSPEC_FRINT32X
                               UNSPEC_FRINT64Z UNSPEC_FRINT64X])
 
+(define_int_iterator FRINTNZ [UNSPEC_FRINT32Z UNSPEC_FRINT64Z])
+
 (define_int_iterator SVE_BRK_UNARY [UNSPEC_BRKA UNSPEC_BRKB])
 
 (define_int_iterator SVE_BRK_BINARY [UNSPEC_BRKN UNSPEC_BRKPA UNSPEC_BRKPB])
@@ -3482,6 +3488,8 @@ (define_int_attr f16mac1 [(UNSPEC_FMLAL "a") 
(UNSPEC_FMLSL "s")
 (define_int_attr frintnzs_op [(UNSPEC_FRINT32Z "frint32z") (UNSPEC_FRINT32X 
"frint32x")
                              (UNSPEC_FRINT64Z "frint64z") (UNSPEC_FRINT64X 
"frint64x")])
 
+(define_int_attr frintnz_mode [(UNSPEC_FRINT32Z "si") (UNSPEC_FRINT64Z "di")])
+
 ;; The condition associated with an UNSPEC_COND_<xx>.
 (define_int_attr cmp_op [(UNSPEC_COND_CMPEQ_WIDE "eq")
                         (UNSPEC_COND_CMPGE_WIDE "ge")
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 
41f1850bf6e95005647ca97a495a97d7e184d137..d50d09b0ae60d98537b9aece4396a490f33f174c
 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6175,6 +6175,15 @@ operands; otherwise, it may not.
 
 This pattern is not allowed to @code{FAIL}.
 
+@cindex @code{ftrunc@var{m}@var{n}2} instruction pattern
+@item @samp{ftrunc@var{m}@var{n}2}
+Truncate operand 1 to a @var{n} mode signed integer, towards zero, and store
+the result in operand 0. Both operands have mode @var{m}, which is a scalar or
+vector floating-point mode.  Exception must be thrown if operand 1 does not fit
+in a @var{n} mode signed integer as it would have if the truncation happened
+through separate floating point to integer conversion.
+
+
 @cindex @code{round@var{m}2} instruction pattern
 @item @samp{round@var{m}2}
 Round operand 1 to the nearest integer, rounding away from zero in the
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 
40b1e0d816789b225089c4143fb63e62a6af817a..15d4de24d15cce6793b3bb61d728e61cea00924d
 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -2282,6 +2282,10 @@ Like @code{aarch64_sve_hw}, but also test for an exact 
hardware vector length.
 @item aarch64_fjcvtzs_hw
 AArch64 target that is able to generate and execute armv8.3-a FJCVTZS
 instruction.
+
+@item aarch64_frintzx_ok
+AArch64 target that is able to generate the Armv8.5-a FRINT32Z, FRINT64Z,
+FRINT32X and FRINT64X instructions.
 @end table
 
 @subsubsection MIPS-specific attributes
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 
0cba95411a63423484dda5b1251f47de24e926ba..60b404ef44360c8ae0cda1176fb888302ddbc98d
 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -130,6 +130,7 @@ init_internal_fns ()
 #define fold_left_direct { 1, 1, false }
 #define mask_fold_left_direct { 1, 1, false }
 #define check_ptrs_direct { 0, 0, false }
+#define ftrunc_int_direct { 0, 1, true }
 
 const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
 #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct,
@@ -156,6 +157,29 @@ get_multi_vector_move (tree array_type, convert_optab 
optab)
   return convert_optab_handler (optab, imode, vmode);
 }
 
+/* Expand FTRUNC_INT call STMT using optab OPTAB.  */
+
+static void
+expand_ftrunc_int_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
+{
+  class expand_operand ops[2];
+  tree lhs, float_type, int_type;
+  rtx target, op;
+
+  lhs = gimple_call_lhs (stmt);
+  target = expand_normal (lhs);
+  op = expand_normal (gimple_call_arg (stmt, 0));
+
+  float_type = TREE_TYPE (lhs);
+  int_type = element_type (gimple_call_arg (stmt, 1));
+
+  create_output_operand (&ops[0], target, TYPE_MODE (float_type));
+  create_input_operand (&ops[1], op, TYPE_MODE (float_type));
+
+  expand_insn (convert_optab_handler (optab, TYPE_MODE (float_type),
+                                     TYPE_MODE (int_type)), 2, ops);
+}
+
 /* Expand LOAD_LANES call STMT using optab OPTAB.  */
 
 static void
@@ -3688,6 +3712,15 @@ multi_vector_optab_supported_p (convert_optab optab, 
tree_pair types,
          != CODE_FOR_nothing);
 }
 
+static bool direct_ftrunc_int_optab_supported_p (convert_optab optab,
+                                                tree_pair types,
+                                                optimization_type opt_type)
+{
+  return (convert_optab_handler (optab, TYPE_MODE (types.first),
+                               TYPE_MODE (element_type (types.second)),
+                               opt_type) != CODE_FOR_nothing);
+}
+
 #define direct_unary_optab_supported_p direct_optab_supported_p
 #define direct_binary_optab_supported_p direct_optab_supported_p
 #define direct_ternary_optab_supported_p direct_optab_supported_p
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 
bb13c6cce1bf55633760bc14980402f1f0ac1689..e58891e3d3ebc805dd55ac6f70bbda617b7302b7
 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -66,6 +66,9 @@ along with GCC; see the file COPYING3.  If not see
 
    - fold_left: for scalar = FN (scalar, vector), keyed off the vector mode
    - check_ptrs: used for check_{raw,war}_ptrs
+   - ftrunc_int: a unary conversion optab that takes and returns values of the
+   same mode, but internally converts via another mode.  This second mode is
+   specified using a dummy final function argument.
 
    DEF_INTERNAL_SIGNED_OPTAB_FN defines an internal function that
    maps to one of two optabs, depending on the signedness of an input.
@@ -269,6 +272,7 @@ DEF_INTERNAL_FLT_FLOATN_FN (RINT, ECF_CONST, rint, unary)
 DEF_INTERNAL_FLT_FLOATN_FN (ROUND, ECF_CONST, round, unary)
 DEF_INTERNAL_FLT_FLOATN_FN (ROUNDEVEN, ECF_CONST, roundeven, unary)
 DEF_INTERNAL_FLT_FLOATN_FN (TRUNC, ECF_CONST, btrunc, unary)
+DEF_INTERNAL_OPTAB_FN (FTRUNC_INT, ECF_CONST, ftruncint, ftrunc_int)
 
 /* Binary math functions.  */
 DEF_INTERNAL_FLT_FN (ATAN2, ECF_CONST, atan2, binary)
diff --git a/gcc/match.pd b/gcc/match.pd
index 
a319aefa8081ac177981ad425c461f8a771128f4..80660e6fd40bc6934e1fa0329c0fbcab1658ed44
 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3713,12 +3713,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
    trapping behaviour, so require !flag_trapping_math. */
 #if GIMPLE
 (simplify
-   (float (fix_trunc @0))
-   (if (!flag_trapping_math
-       && types_match (type, TREE_TYPE (@0))
-       && direct_internal_fn_supported_p (IFN_TRUNC, type,
-                                         OPTIMIZE_FOR_BOTH))
-      (IFN_TRUNC @0)))
+   (float (fix_trunc@1 @0))
+   (if (types_match (type, TREE_TYPE (@0)))
+    (with {
+      tree int_type = element_type (@1);
+     }
+     (if (TYPE_SIGN (TREE_TYPE (@1)) == SIGNED
+         && direct_internal_fn_supported_p (IFN_FTRUNC_INT, type, int_type,
+                                            OPTIMIZE_FOR_BOTH))
+      (IFN_FTRUNC_INT @0 {
+       wide_int_to_tree (int_type, wi::max_value (TYPE_PRECISION (int_type),
+                                                 SIGNED)); })
+      (if (!flag_trapping_math
+          && direct_internal_fn_supported_p (IFN_TRUNC, type,
+                                             OPTIMIZE_FOR_BOTH))
+       (IFN_TRUNC @0))))))
 #endif
 
 /* If we have a narrowing conversion to an integral type that is fed by a
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 
b889ad2e5a08613db51d16d072080ac6cb48404f..57d259d33409265df3af1646d123e4ab216c34c8
 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -63,6 +63,7 @@ OPTAB_CX(fractuns_optab, "fractuns$Q$b$I$a2")
 OPTAB_CL(satfract_optab, "satfract$b$Q$a2", SAT_FRACT, "satfract", 
gen_satfract_conv_libfunc)
 OPTAB_CL(satfractuns_optab, "satfractuns$I$b$Q$a2", UNSIGNED_SAT_FRACT, 
"satfractuns", gen_satfractuns_conv_libfunc)
 
+OPTAB_CD(ftruncint_optab, "ftrunc$a$b2")
 OPTAB_CD(sfixtrunc_optab, "fix_trunc$F$b$I$a2")
 OPTAB_CD(ufixtrunc_optab, "fixuns_trunc$F$b$I$a2")
 
diff --git a/gcc/stor-layout.h b/gcc/stor-layout.h
index 
9e892e50c8559e497fcae1b77a36401df82fabe2..165a592d4d2c7bf525060dd51ce6094eb4f4f68a
 100644
--- a/gcc/stor-layout.h
+++ b/gcc/stor-layout.h
@@ -36,7 +36,6 @@ extern void place_field (record_layout_info, tree);
 extern void compute_record_mode (tree);
 extern void finish_bitfield_layout (tree);
 extern void finish_record_layout (record_layout_info, int);
-extern unsigned int element_precision (const_tree);
 extern void finalize_size_functions (void);
 extern void fixup_unsigned_type (tree);
 extern void initialize_sizetypes (void);
diff --git a/gcc/testsuite/gcc.target/aarch64/frintnz.c 
b/gcc/testsuite/gcc.target/aarch64/frintnz.c
new file mode 100644
index 
0000000000000000000000000000000000000000..008e1cf9f4a1b0148128c65c9ea0d1bb111467b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/frintnz.c
@@ -0,0 +1,91 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8.5-a" } */
+/* { dg-require-effective-target aarch64_frintnzx_ok } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/*
+** f1:
+**     frint32z        s0, s0
+**     ret
+*/
+float
+f1 (float x)
+{
+  int y = x;
+  return (float) y;
+}
+
+/*
+** f2:
+**     frint64z        s0, s0
+**     ret
+*/
+float
+f2 (float x)
+{
+  long long int y = x;
+  return (float) y;
+}
+
+/*
+** f3:
+**     frint32z        d0, d0
+**     ret
+*/
+double
+f3 (double x)
+{
+  int y = x;
+  return (double) y;
+}
+
+/*
+** f4:
+**     frint64z        d0, d0
+**     ret
+*/
+double
+f4 (double x)
+{
+  long long int y = x;
+  return (double) y;
+}
+
+float
+f1_dont (float x)
+{
+  unsigned int y = x;
+  return (float) y;
+}
+
+float
+f2_dont (float x)
+{
+  unsigned long long int y = x;
+  return (float) y;
+}
+
+double
+f3_dont (double x)
+{
+  unsigned int y = x;
+  return (double) y;
+}
+
+double
+f4_dont (double x)
+{
+  unsigned long long int y = x;
+  return (double) y;
+}
+
+double
+f5_dont (double x)
+{
+  signed short y = x;
+  return (double) y;
+}
+
+/* Make sure the 'dont's don't generate any frintNz.  */
+/* { dg-final { scan-assembler-times {frint32z} 2 } } */
+/* { dg-final { scan-assembler-times {frint64z} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/frintnz_vec.c 
b/gcc/testsuite/gcc.target/aarch64/frintnz_vec.c
new file mode 100644
index 
0000000000000000000000000000000000000000..b93304eb2acb3d3d954eebee51d77ff23fee68ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/frintnz_vec.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.5-a" } */
+/* { dg-require-effective-target aarch64_frintnzx_ok } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#define TEST(name,float_type,int_type)                                 \
+void                                                                   \
+name (float_type * __restrict__ x, float_type * __restrict__ y, int n)  \
+{                                                                      \
+  for (int i = 0; i < n; ++i)                                        \
+    {                                                                \
+      int_type x_i = x[i];                                           \
+      y[i] = (float_type) x_i;                                       \
+    }                                                                \
+}
+
+/*
+** f1:
+**     ...
+**     frint32z        v0.4s, v0.4s
+**     ...
+*/
+TEST(f1, float, int)
+
+/*
+** f2:
+**     ...
+**     frint64z        v0.4s, v0.4s
+**     ...
+*/
+TEST(f2, float, long long)
+
+/*
+** f3:
+**     ...
+**     frint32z        v0.2d, v0.2d
+**     ...
+*/
+TEST(f3, double, int)
+
+/*
+** f4:
+**     ...
+**     frint64z        v0.2d, v0.2d
+**     ...
+*/
+TEST(f4, double, long long)
diff --git a/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c 
b/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c
index 
07217064e2ba54fcf4f5edc440e6ec19ddae66e1..3d80871c4cebd5fb5cac0714b3feee27038f05fd
 100644
--- a/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c
+++ b/gcc/testsuite/gcc.target/aarch64/merge_trunc1.c
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -ffast-math" } */
+/* { dg-skip-if "" { aarch64_frintnzx_ok } } */
 
 float
 f1 (float x)
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 
8cbda192fe0fae59ea208ee43696b4d22c43e61e..450ca78230faeba40b89fc7987af27b6bf0a0d53
 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -11365,6 +11365,32 @@ proc check_effective_target_arm_v8_3a_bkey_directive { 
} {
        }]
 }
 
+# Return 1 if the target supports Armv8.5-A scalar and Advanced SIMD
+# FRINT32[ZX] andd FRINT64[ZX] instructions, 0 otherwise. The test is valid for
+# AArch64.
+proc check_effective_target_aarch64_frintnzx_ok_nocache { } {
+
+    if { ![istarget aarch64*-*-*] } {
+        return 0;
+    }
+
+    if { [check_no_compiler_messages_nocache \
+             aarch64_frintnzx_ok assembly {
+       #if !defined (__ARM_FEATURE_FRINT)
+       #error "__ARM_FEATURE_FRINT not defined"
+       #endif
+    } [current_compiler_flags]] } {
+       return 1;
+    }
+
+    return 0;
+}
+
+proc check_effective_target_aarch64_frintnzx_ok { } {
+    return [check_cached_effective_target aarch64_frintnzx_ok \
+                check_effective_target_aarch64_frintnzx_ok_nocache] 
+}
+
 # Return 1 if the target supports executing the Armv8.1-M Mainline Low
 # Overhead Loop, 0 otherwise.  The test is valid for ARM.
 
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 
03cc7267cf80d4ce73c0d89ab86b07e84752456a..35bb1f70f7b173ad0d1e9f70ce0ac9da891dbe62
 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1625,7 +1625,8 @@ vect_finish_stmt_generation (vec_info *vinfo,
 
 static internal_fn
 vectorizable_internal_function (combined_fn cfn, tree fndecl,
-                               tree vectype_out, tree vectype_in)
+                               tree vectype_out, tree vectype_in,
+                               tree *vectypes)
 {
   internal_fn ifn;
   if (internal_fn_p (cfn))
@@ -1637,8 +1638,12 @@ vectorizable_internal_function (combined_fn cfn, tree 
fndecl,
       const direct_internal_fn_info &info = direct_internal_fn (ifn);
       if (info.vectorizable)
        {
-         tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
-         tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
+         tree type0 = (info.type0 < 0 ? vectype_out : vectypes[info.type0]);
+         if (!type0)
+           type0 = vectype_in;
+         tree type1 = (info.type1 < 0 ? vectype_out : vectypes[info.type1]);
+         if (!type1)
+           type1 = vectype_in;
          if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
                                              OPTIMIZE_FOR_SPEED))
            return ifn;
@@ -3252,16 +3257,31 @@ vectorizable_call (vec_info *vinfo,
       rhs_type = unsigned_type_node;
     }
 
-  int mask_opno = -1;
+  /* The argument that is not of the same type as the others.  */
+  int diff_opno = -1;
+  bool masked = false;
   if (internal_fn_p (cfn))
-    mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
+    {
+      if (cfn == CFN_FTRUNC_INT)
+       /* For FTRUNC this represents the argument that carries the type of the
+          intermediate signed integer.  */
+       diff_opno = 1;
+      else
+       {
+         /* For masked operations this represents the argument that carries the
+            mask.  */
+         diff_opno = internal_fn_mask_index (as_internal_fn (cfn));
+         masked = diff_opno >=  0;
+       }
+    }
 
   for (i = 0; i < nargs; i++)
     {
-      if ((int) i == mask_opno)
+      if ((int) i == diff_opno && masked)
        {
-         if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
-                                      &op, &slp_op[i], &dt[i], &vectypes[i]))
+         if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node,
+                                      diff_opno, &op, &slp_op[i], &dt[i],
+                                      &vectypes[i]))
            return false;
          continue;
        }
@@ -3275,27 +3295,35 @@ vectorizable_call (vec_info *vinfo,
          return false;
        }
 
-      /* We can only handle calls with arguments of the same type.  */
-      if (rhs_type
-         && !types_compatible_p (rhs_type, TREE_TYPE (op)))
+      if ((int) i != diff_opno)
        {
-         if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                             "argument types differ.\n");
-         return false;
-       }
-      if (!rhs_type)
-       rhs_type = TREE_TYPE (op);
+         /* We can only handle calls with arguments of the same type.  */
+         if (rhs_type
+             && !types_compatible_p (rhs_type, TREE_TYPE (op)))
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "argument types differ.\n");
+             return false;
+           }
+         if (!rhs_type)
+           rhs_type = TREE_TYPE (op);
 
-      if (!vectype_in)
-       vectype_in = vectypes[i];
-      else if (vectypes[i]
-              && !types_compatible_p (vectypes[i], vectype_in))
+         if (!vectype_in)
+           vectype_in = vectypes[i];
+         else if (vectypes[i]
+                  && !types_compatible_p (vectypes[i], vectype_in))
+           {
+             if (dump_enabled_p ())
+               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                                "argument vector types differ.\n");
+             return false;
+           }
+       }
+      else
        {
-         if (dump_enabled_p ())
-           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                             "argument vector types differ.\n");
-         return false;
+         vectypes[i] = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op),
+                                                    slp_node);
        }
     }
   /* If all arguments are external or constant defs, infer the vector type
@@ -3371,8 +3399,8 @@ vectorizable_call (vec_info *vinfo,
          || (modifier == NARROW
              && simple_integer_narrowing (vectype_out, vectype_in,
                                           &convert_code))))
-    ifn = vectorizable_internal_function (cfn, callee, vectype_out,
-                                         vectype_in);
+    ifn = vectorizable_internal_function (cfn, callee, vectype_out, vectype_in,
+                                         &vectypes[0]);
 
   /* If that fails, try asking for a target-specific built-in function.  */
   if (ifn == IFN_LAST)
@@ -3446,12 +3474,12 @@ vectorizable_call (vec_info *vinfo,
        record_stmt_cost (cost_vec, ncopies / 2,
                          vec_promote_demote, stmt_info, 0, vect_body);
 
-      if (loop_vinfo && mask_opno >= 0)
+      if (loop_vinfo && masked)
        {
          unsigned int nvectors = (slp_node
                                   ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
                                   : ncopies);
-         tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
+         tree scalar_mask = gimple_call_arg (stmt_info->stmt, diff_opno);
          vect_record_loop_mask (loop_vinfo, masks, nvectors,
                                 vectype_out, scalar_mask);
        }
@@ -3499,7 +3527,7 @@ vectorizable_call (vec_info *vinfo,
                    {
                      /* We don't define any narrowing conditional functions
                         at present.  */
-                     gcc_assert (mask_opno < 0);
+                     gcc_assert (!masked);
                      tree half_res = make_ssa_name (vectype_in);
                      gcall *call
                        = gimple_build_call_internal_vec (ifn, vargs);
@@ -3519,15 +3547,15 @@ vectorizable_call (vec_info *vinfo,
                    }
                  else
                    {
-                     if (mask_opno >= 0 && masked_loop_p)
+                     if (masked && masked_loop_p)
                        {
                          unsigned int vec_num = vec_oprnds0.length ();
                          /* Always true for SLP.  */
                          gcc_assert (ncopies == 1);
                          tree mask = vect_get_loop_mask (gsi, masks, vec_num,
                                                          vectype_out, i);
-                         vargs[mask_opno] = prepare_load_store_mask
-                           (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
+                         vargs[diff_opno] = prepare_load_store_mask
+                           (TREE_TYPE (mask), mask, vargs[diff_opno], gsi);
                        }
 
                      gcall *call;
@@ -3559,13 +3587,13 @@ vectorizable_call (vec_info *vinfo,
              orig_vargs[i] = vargs[i] = vec_defs[i][j];
            }
 
-         if (mask_opno >= 0 && masked_loop_p)
+         if (masked && masked_loop_p)
            {
              tree mask = vect_get_loop_mask (gsi, masks, ncopies,
                                              vectype_out, j);
-             vargs[mask_opno]
+             vargs[diff_opno]
                = prepare_load_store_mask (TREE_TYPE (mask), mask,
-                                          vargs[mask_opno], gsi);
+                                          vargs[diff_opno], gsi);
            }
 
          gimple *new_stmt;
@@ -3584,7 +3612,7 @@ vectorizable_call (vec_info *vinfo,
            {
              /* We don't define any narrowing conditional functions at
                 present.  */
-             gcc_assert (mask_opno < 0);
+             gcc_assert (!masked);
              tree half_res = make_ssa_name (vectype_in);
              gcall *call = gimple_build_call_internal_vec (ifn, vargs);
              gimple_call_set_lhs (call, half_res);
@@ -3628,7 +3656,7 @@ vectorizable_call (vec_info *vinfo,
     {
       auto_vec<vec<tree> > vec_defs (nargs);
       /* We don't define any narrowing conditional functions at present.  */
-      gcc_assert (mask_opno < 0);
+      gcc_assert (!masked);
       for (j = 0; j < ncopies; ++j)
        {
          /* Build argument list for the vectorized call.  */
diff --git a/gcc/tree.h b/gcc/tree.h
index 
f62c00bc8707029db52e2f3fe529948755235d3d..31ce45a84cc267ea2022c8ca6323368fbe15eb8b
 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -6547,4 +6547,12 @@ extern unsigned fndecl_dealloc_argno (tree);
    object or pointer.  Otherwise return null.  */
 extern tree get_attr_nonstring_decl (tree, tree * = NULL);
 
+/* Return the type, or for a complex or vector type the type of its
+   elements.  */
+extern tree element_type (const_tree);
+
+/* Return the precision of the type, or for a complex or vector type the
+   precision of the type of its elements.  */
+extern unsigned int element_precision (const_tree);
+
 #endif  /* GCC_TREE_H  */
diff --git a/gcc/tree.c b/gcc/tree.c
index 
845228a055b2cfac0c9ca8c0cda1b9df4b0095c6..f1e9a1eb48769cb11aa69730e2480ed5522f78c1
 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -6645,11 +6645,11 @@ valid_constant_size_p (const_tree size, cst_size_error 
*perr /* = NULL */)
   return true;
 }
 
-/* Return the precision of the type, or for a complex or vector type the
-   precision of the type of its elements.  */
+/* Return the type, or for a complex or vector type the type of its
+   elements.  */
 
-unsigned int
-element_precision (const_tree type)
+tree
+element_type (const_tree type)
 {
   if (!TYPE_P (type))
     type = TREE_TYPE (type);
@@ -6657,7 +6657,16 @@ element_precision (const_tree type)
   if (code == COMPLEX_TYPE || code == VECTOR_TYPE)
     type = TREE_TYPE (type);
 
-  return TYPE_PRECISION (type);
+  return (tree) type;
+}
+
+/* Return the precision of the type, or for a complex or vector type the
+   precision of the type of its elements.  */
+
+unsigned int
+element_precision (const_tree type)
+{
+  return TYPE_PRECISION (element_type (type));
 }
 
 /* Return true if CODE represents an associative tree code.  Otherwise

Re: [AArch64] Enable generation of FRINTNZ instructions

Reply via email to