Patch updated with two more tests to check if the vfmadd insn is being
produced when possible.

Thanks
Sri

On Fri, Dec 6, 2013 at 11:12 AM, Sriraman Tallam <tmsri...@google.com> wrote:
> Hi,
>
>     I have attached a patch to fix
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=59390. Please review.
>
> Here is the problem. GCC adds target-specific builtins on demand. The
> FMA target-specific builtin __builtin_ia32_vfmaddpd gets added via
> this declaration:
>
> void fun() __attribute__((target("fma")));
>
> Specifically, the builtin __builtin_ia32_vfmaddpd gets added when
> ix86_add_new_builtins is called from ix86_valid_target_attribute_tree
> when processing this target attribute.
>
> Now, when the vectorizer is processing the builtin "__builtin_fma" in
> function other_fn(), it checks to see if this function is vectorizable
> and calls ix86_builtin_vectorized_function in i386.c. That returns the
> builtin stored here:
>
>
> case BUILT_IN_FMA:
> if (out_mode == DFmode && in_mode == DFmode)
> {
>  if (out_n == 2 && in_n == 2)
>    return ix86_builtins[IX86_BUILTIN_VFMADDPD];
>           ....
>
> ix86_builtins[IX86_BUILTIN_VFMADDPD] would have contained NULL_TREE
> had the builtin not been added by the previous target attribute. That
> is why the code works if we remove the previous declaration.
>
> The fix is to not just return the builtin but to also check if the
> current function's isa allows the use of the builtin. For instance,
> this patch would solve the problem:
>
> @@ -33977,7 +33977,13 @@ ix86_builtin_vectorized_function (tree fndecl, tre
>        if (out_mode == DFmode && in_mode == DFmode)
>   {
>    if (out_n == 2 && in_n == 2)
> -    return ix86_builtins[IX86_BUILTIN_VFMADDPD];
> +    {
> +      if (ix86_builtins_isa[IX86_BUILTIN_VFMADDPD].isa
> +  & global_options.x_ix86_isa_flags)
> +        return ix86_builtins[IX86_BUILTIN_VFMADDPD];
> +      else
> + return NULL_TREE;
> +    }
>
>
> but there are many instances of this usage in
> ix86_builtin_vectorized_function. This patch covers all the cases.
>
>
> Thanks,
> Sri
        PR target/59390
        * gcc.target/i386/pr59390.c: New test.
        * gcc.target/i386/pr59390_1.c: New test.
        * gcc.target/i386/pr59390_2.c: New test.
        * config/i386/i386.c (get_builtin): New function.
        (ix86_builtin_vectorized_function): Replace all instances of
        ix86_builtins[...] with get_builtin(...).
        (ix86_builtin_reciprocal): Ditto.


Index: testsuite/gcc.target/i386/pr59390.c
===================================================================
--- testsuite/gcc.target/i386/pr59390.c (revision 0)
+++ testsuite/gcc.target/i386/pr59390.c (revision 0)
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c99 -O3" } */
+
+#include "math.h"
+void fun() __attribute__((target("fma")));
+
+void 
+other_fun(double *restrict out, double * restrict a, double * restrict b, 
double * restrict c, int n)
+{
+    int i;
+    for (i = 0; i < n; i++) {
+        out[i] = fma(a[i], b[i], c[i]);
+    }   
+}
+
+/* { dg-final { scan-assembler-not "vfmadd" } } */
Index: testsuite/gcc.target/i386/pr59390_1.c
===================================================================
--- testsuite/gcc.target/i386/pr59390_1.c       (revision 0)
+++ testsuite/gcc.target/i386/pr59390_1.c       (revision 0)
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c99 -O3" } */
+
+#include "math.h"
+void fun() __attribute__((target("fma")));
+
+__attribute__((target("fma")))
+void 
+other_fun(double *restrict out, double * restrict a, double * restrict b, 
double * restrict c, int n)
+{
+    int i;
+    for (i = 0; i < n; i++) {
+        out[i] = fma(a[i], b[i], c[i]);
+    }   
+}
+
+/* { dg-final { scan-assembler "vfmadd" } } */
Index: testsuite/gcc.target/i386/pr59390_2.c
===================================================================
--- testsuite/gcc.target/i386/pr59390_2.c       (revision 0)
+++ testsuite/gcc.target/i386/pr59390_2.c       (revision 0)
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c99 -O3 -mfma" } */
+
+#include "math.h"
+void fun() __attribute__((target("fma")));
+
+void 
+other_fun(double *restrict out, double * restrict a, double * restrict b, 
double * restrict c, int n)
+{
+    int i;
+    for (i = 0; i < n; i++) {
+        out[i] = fma(a[i], b[i], c[i]);
+    }   
+}
+
+/* { dg-final { scan-assembler "vfmadd" } } */
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c  (revision 205616)
+++ config/i386/i386.c  (working copy)
@@ -33649,6 +33649,30 @@ addcarryx:
   gcc_unreachable ();
 }
 
+/* This returns the target-specific builtin with code CODE if
+   current_function_decl has visibility on this builtin, which is checked
+   using isa flags.  Returns NULL_TREE otherwise.  */
+
+static tree get_builtin (enum ix86_builtins code)
+{
+  struct cl_target_option *opts;
+  tree target_tree = NULL_TREE;
+
+  /* Determine the isa flags of current_function_decl.  */
+
+  if (current_function_decl)
+    target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
+  if (target_tree)
+    opts = TREE_TARGET_OPTION (target_tree);
+  else
+    opts = TREE_TARGET_OPTION (target_option_default_node);
+
+  if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
+    return ix86_builtin_decl (code, true);
+  else
+    return NULL_TREE;
+}
+
 /* Returns a function decl for a vectorized version of the builtin function
    with builtin function code FN and the result vector type TYPE, or NULL_TREE
    if it is not available.  */
@@ -33677,9 +33701,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == DFmode && in_mode == DFmode)
        {
          if (out_n == 2 && in_n == 2)
-           return ix86_builtins[IX86_BUILTIN_SQRTPD];
+           get_builtin (IX86_BUILTIN_SQRTPD);
          else if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_SQRTPD256];
+           get_builtin (IX86_BUILTIN_SQRTPD256);
        }
       break;
 
@@ -33687,9 +33711,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SFmode && in_mode == SFmode)
        {
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
+           get_builtin (IX86_BUILTIN_SQRTPS_NR);
          else if (out_n == 8 && in_n == 8)
-           return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
+           get_builtin (IX86_BUILTIN_SQRTPS_NR256);
        }
       break;
 
@@ -33703,9 +33727,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SImode && in_mode == DFmode)
        {
          if (out_n == 4 && in_n == 2)
-           return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX];
+           get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
          else if (out_n == 8 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256];
+           get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
        }
       break;
 
@@ -33719,9 +33743,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SImode && in_mode == SFmode)
        {
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX];
+           get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
          else if (out_n == 8 && in_n == 8)
-           return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX256];
+           get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
        }
       break;
 
@@ -33735,9 +33759,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SImode && in_mode == DFmode)
        {
          if (out_n == 4 && in_n == 2)
-           return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX];
+           get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
          else if (out_n == 8 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256];
+           get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
        }
       break;
 
@@ -33751,9 +33775,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SImode && in_mode == SFmode)
        {
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX];
+           get_builtin (IX86_BUILTIN_CEILPS_SFIX);
          else if (out_n == 8 && in_n == 8)
-           return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX256];
+           get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
        }
       break;
 
@@ -33763,9 +33787,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SImode && in_mode == DFmode)
        {
          if (out_n == 4 && in_n == 2)
-           return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
+           get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
          else if (out_n == 8 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX256];
+           get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
        }
       break;
 
@@ -33775,9 +33799,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SImode && in_mode == SFmode)
        {
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
+           get_builtin (IX86_BUILTIN_CVTPS2DQ);
          else if (out_n == 8 && in_n == 8)
-           return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
+           get_builtin (IX86_BUILTIN_CVTPS2DQ256);
        }
       break;
 
@@ -33791,9 +33815,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SImode && in_mode == DFmode)
        {
          if (out_n == 4 && in_n == 2)
-           return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX];
+           get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
          else if (out_n == 8 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256];
+           get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
        }
       break;
 
@@ -33807,9 +33831,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SImode && in_mode == SFmode)
        {
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX];
+           get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
          else if (out_n == 8 && in_n == 8)
-           return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX256];
+           get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
        }
       break;
 
@@ -33817,9 +33841,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == DFmode && in_mode == DFmode)
        {
          if (out_n == 2 && in_n == 2)
-           return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
+           get_builtin (IX86_BUILTIN_CPYSGNPD);
          else if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
+           get_builtin (IX86_BUILTIN_CPYSGNPD256);
        }
       break;
 
@@ -33827,9 +33851,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SFmode && in_mode == SFmode)
        {
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
+           get_builtin (IX86_BUILTIN_CPYSGNPS);
          else if (out_n == 8 && in_n == 8)
-           return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
+           get_builtin (IX86_BUILTIN_CPYSGNPS256);
        }
       break;
 
@@ -33841,9 +33865,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == DFmode && in_mode == DFmode)
        {
          if (out_n == 2 && in_n == 2)
-           return ix86_builtins[IX86_BUILTIN_FLOORPD];
+           get_builtin (IX86_BUILTIN_FLOORPD);
          else if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_FLOORPD256];
+           get_builtin (IX86_BUILTIN_FLOORPD256);
        }
       break;
 
@@ -33855,9 +33879,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SFmode && in_mode == SFmode)
        {
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_FLOORPS];
+           get_builtin (IX86_BUILTIN_FLOORPS);
          else if (out_n == 8 && in_n == 8)
-           return ix86_builtins[IX86_BUILTIN_FLOORPS256];
+           get_builtin (IX86_BUILTIN_FLOORPS256);
        }
       break;
 
@@ -33869,9 +33893,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == DFmode && in_mode == DFmode)
        {
          if (out_n == 2 && in_n == 2)
-           return ix86_builtins[IX86_BUILTIN_CEILPD];
+           get_builtin (IX86_BUILTIN_CEILPD);
          else if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_CEILPD256];
+           get_builtin (IX86_BUILTIN_CEILPD256);
        }
       break;
 
@@ -33883,9 +33907,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SFmode && in_mode == SFmode)
        {
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_CEILPS];
+           get_builtin (IX86_BUILTIN_CEILPS);
          else if (out_n == 8 && in_n == 8)
-           return ix86_builtins[IX86_BUILTIN_CEILPS256];
+           get_builtin (IX86_BUILTIN_CEILPS256);
        }
       break;
 
@@ -33897,9 +33921,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == DFmode && in_mode == DFmode)
        {
          if (out_n == 2 && in_n == 2)
-           return ix86_builtins[IX86_BUILTIN_TRUNCPD];
+           get_builtin (IX86_BUILTIN_TRUNCPD);
          else if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
+           get_builtin (IX86_BUILTIN_TRUNCPD256);
        }
       break;
 
@@ -33911,9 +33935,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SFmode && in_mode == SFmode)
        {
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_TRUNCPS];
+           get_builtin (IX86_BUILTIN_TRUNCPS);
          else if (out_n == 8 && in_n == 8)
-           return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
+           get_builtin (IX86_BUILTIN_TRUNCPS256);
        }
       break;
 
@@ -33925,9 +33949,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == DFmode && in_mode == DFmode)
        {
          if (out_n == 2 && in_n == 2)
-           return ix86_builtins[IX86_BUILTIN_RINTPD];
+           get_builtin (IX86_BUILTIN_RINTPD);
          else if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_RINTPD256];
+           get_builtin (IX86_BUILTIN_RINTPD256);
        }
       break;
 
@@ -33939,9 +33963,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SFmode && in_mode == SFmode)
        {
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_RINTPS];
+           get_builtin (IX86_BUILTIN_RINTPS);
          else if (out_n == 8 && in_n == 8)
-           return ix86_builtins[IX86_BUILTIN_RINTPS256];
+           get_builtin (IX86_BUILTIN_RINTPS256);
        }
       break;
 
@@ -33953,9 +33977,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == DFmode && in_mode == DFmode)
        {
          if (out_n == 2 && in_n == 2)
-           return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ];
+           get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
          else if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ256];
+           get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
        }
       break;
 
@@ -33967,9 +33991,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SFmode && in_mode == SFmode)
        {
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ];
+           get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
          else if (out_n == 8 && in_n == 8)
-           return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ256];
+           get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
        }
       break;
 
@@ -33977,9 +34001,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == DFmode && in_mode == DFmode)
        {
          if (out_n == 2 && in_n == 2)
-           return ix86_builtins[IX86_BUILTIN_VFMADDPD];
+           return get_builtin (IX86_BUILTIN_VFMADDPD);
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
+           get_builtin (IX86_BUILTIN_VFMADDPD256);
        }
       break;
 
@@ -33987,9 +34011,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SFmode && in_mode == SFmode)
        {
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_VFMADDPS];
+           get_builtin (IX86_BUILTIN_VFMADDPS);
          if (out_n == 8 && in_n == 8)
-           return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
+           get_builtin (IX86_BUILTIN_VFMADDPS256);
        }
       break;
 
@@ -34269,7 +34293,7 @@ ix86_vectorize_builtin_gather (const_tree mem_vect
       return NULL_TREE;
     }
 
-  return ix86_builtins[code];
+  return get_builtin (code);
 }
 
 /* Returns a code for a target-specific builtin that implements
@@ -34290,10 +34314,10 @@ ix86_builtin_reciprocal (unsigned int fn, bool md_
       {
        /* Vectorized version of sqrt to rsqrt conversion.  */
       case IX86_BUILTIN_SQRTPS_NR:
-       return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
+       return get_builtin (IX86_BUILTIN_RSQRTPS_NR);
 
       case IX86_BUILTIN_SQRTPS_NR256:
-       return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
+       return get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
 
       default:
        return NULL_TREE;
@@ -34304,7 +34328,7 @@ ix86_builtin_reciprocal (unsigned int fn, bool md_
       {
        /* Sqrt to rsqrt conversion.  */
       case BUILT_IN_SQRTF:
-       return ix86_builtins[IX86_BUILTIN_RSQRTF];
+       return get_builtin (IX86_BUILTIN_RSQRTF);
 
       default:
        return NULL_TREE;

Reply via email to