Hi,

    I have attached a patch to fix
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=59390. Please review.

Here is the problem. GCC adds target-specific builtins on demand. The
FMA target-specific builtin __builtin_ia32_vfmaddpd gets added via
this declaration:

void fun() __attribute__((target("fma")));

Specifically, the builtin __builtin_ia32_vfmaddpd gets added when
ix86_add_new_builtins is called from ix86_valid_target_attribute_tree
when processing this target attribute.

Now, when the vectorizer is processing the builtin "__builtin_fma" in
function other_fn(), it checks to see if this function is vectorizable
and calls ix86_builtin_vectorized_function in i386.c. That returns the
builtin stored here:


case BUILT_IN_FMA:
if (out_mode == DFmode && in_mode == DFmode)
{
 if (out_n == 2 && in_n == 2)
   return ix86_builtins[IX86_BUILTIN_VFMADDPD];
          ....

ix86_builtins[IX86_BUILTIN_VFMADDPD] would have contained NULL_TREE
had the builtin not been added by the previous target attribute. That
is why the code works if we remove the previous declaration.

The fix is to not just return the builtin but to also check if the
current function's isa allows the use of the builtin. For instance,
this patch would solve the problem:

@@ -33977,7 +33977,13 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == DFmode && in_mode == DFmode)
  {
   if (out_n == 2 && in_n == 2)
-    return ix86_builtins[IX86_BUILTIN_VFMADDPD];
+    {
+      if (ix86_builtins_isa[IX86_BUILTIN_VFMADDPD].isa
+  & global_options.x_ix86_isa_flags)
+        return ix86_builtins[IX86_BUILTIN_VFMADDPD];
+      else
+ return NULL_TREE;
+    }


but there are many instances of this usage in
ix86_builtin_vectorized_function. This patch covers all the cases.


Thanks,
Sri
        PR target/59390
        * gcc.target/i386/pr59390.c: New test.
        * config/i386/i386.c (get_builtin): New function.
        (ix86_builtin_vectorized_function): Replace all instances of
        ix86_builtins[...] with get_builtin(...).
        (ix86_builtin_reciprocal): Ditto.


Index: testsuite/gcc.target/i386/pr59390.c
===================================================================
--- testsuite/gcc.target/i386/pr59390.c (revision 0)
+++ testsuite/gcc.target/i386/pr59390.c (revision 0)
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c99 -O3" } */
+
+#include "math.h"
+void fun() __attribute__((target("fma")));
+
+void 
+other_fun(double *restrict out, double * restrict a, double * restrict b, 
double * restrict c, int n)
+{
+    int i;
+    for (i = 0; i < n; i++) {
+        out[i] = fma(a[i], b[i], c[i]);
+    }   
+}
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c  (revision 205616)
+++ config/i386/i386.c  (working copy)
@@ -33649,6 +33649,30 @@ addcarryx:
   gcc_unreachable ();
 }
 
+/* This returns the target-specific builtin with code CODE if
+   current_function_decl has visibility on this builtin, which is checked
+   using isa flags.  Returns NULL_TREE otherwise.  */
+
+static tree get_builtin (enum ix86_builtins code)
+{
+  struct cl_target_option *opts;
+  tree target_tree = NULL_TREE;
+
+  /* Determine the isa flags of current_function_decl.  */
+
+  if (current_function_decl)
+    target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
+  if (target_tree)
+    opts = TREE_TARGET_OPTION (target_tree);
+  else
+    opts = TREE_TARGET_OPTION (target_option_default_node);
+
+  if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
+    return ix86_builtin_decl (code, true);
+  else
+    return NULL_TREE;
+}
+
 /* Returns a function decl for a vectorized version of the builtin function
    with builtin function code FN and the result vector type TYPE, or NULL_TREE
    if it is not available.  */
@@ -33677,9 +33701,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == DFmode && in_mode == DFmode)
        {
          if (out_n == 2 && in_n == 2)
-           return ix86_builtins[IX86_BUILTIN_SQRTPD];
+           get_builtin (IX86_BUILTIN_SQRTPD);
          else if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_SQRTPD256];
+           get_builtin (IX86_BUILTIN_SQRTPD256);
        }
       break;
 
@@ -33687,9 +33711,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SFmode && in_mode == SFmode)
        {
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
+           get_builtin (IX86_BUILTIN_SQRTPS_NR);
          else if (out_n == 8 && in_n == 8)
-           return ix86_builtins[IX86_BUILTIN_SQRTPS_NR256];
+           get_builtin (IX86_BUILTIN_SQRTPS_NR256);
        }
       break;
 
@@ -33703,9 +33727,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SImode && in_mode == DFmode)
        {
          if (out_n == 4 && in_n == 2)
-           return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX];
+           get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
          else if (out_n == 8 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256];
+           get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
        }
       break;
 
@@ -33719,9 +33743,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SImode && in_mode == SFmode)
        {
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX];
+           get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
          else if (out_n == 8 && in_n == 8)
-           return ix86_builtins[IX86_BUILTIN_FLOORPS_SFIX256];
+           get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
        }
       break;
 
@@ -33735,9 +33759,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SImode && in_mode == DFmode)
        {
          if (out_n == 4 && in_n == 2)
-           return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX];
+           get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
          else if (out_n == 8 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256];
+           get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
        }
       break;
 
@@ -33751,9 +33775,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SImode && in_mode == SFmode)
        {
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX];
+           get_builtin (IX86_BUILTIN_CEILPS_SFIX);
          else if (out_n == 8 && in_n == 8)
-           return ix86_builtins[IX86_BUILTIN_CEILPS_SFIX256];
+           get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
        }
       break;
 
@@ -33763,9 +33787,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SImode && in_mode == DFmode)
        {
          if (out_n == 4 && in_n == 2)
-           return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
+           get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
          else if (out_n == 8 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX256];
+           get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
        }
       break;
 
@@ -33775,9 +33799,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SImode && in_mode == SFmode)
        {
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
+           get_builtin (IX86_BUILTIN_CVTPS2DQ);
          else if (out_n == 8 && in_n == 8)
-           return ix86_builtins[IX86_BUILTIN_CVTPS2DQ256];
+           get_builtin (IX86_BUILTIN_CVTPS2DQ256);
        }
       break;
 
@@ -33791,9 +33815,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SImode && in_mode == DFmode)
        {
          if (out_n == 4 && in_n == 2)
-           return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX];
+           get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
          else if (out_n == 8 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256];
+           get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
        }
       break;
 
@@ -33807,9 +33831,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SImode && in_mode == SFmode)
        {
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX];
+           get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
          else if (out_n == 8 && in_n == 8)
-           return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ_SFIX256];
+           get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
        }
       break;
 
@@ -33817,9 +33841,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == DFmode && in_mode == DFmode)
        {
          if (out_n == 2 && in_n == 2)
-           return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
+           get_builtin (IX86_BUILTIN_CPYSGNPD);
          else if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_CPYSGNPD256];
+           get_builtin (IX86_BUILTIN_CPYSGNPD256);
        }
       break;
 
@@ -33827,9 +33851,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SFmode && in_mode == SFmode)
        {
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
+           get_builtin (IX86_BUILTIN_CPYSGNPS);
          else if (out_n == 8 && in_n == 8)
-           return ix86_builtins[IX86_BUILTIN_CPYSGNPS256];
+           get_builtin (IX86_BUILTIN_CPYSGNPS256);
        }
       break;
 
@@ -33841,9 +33865,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == DFmode && in_mode == DFmode)
        {
          if (out_n == 2 && in_n == 2)
-           return ix86_builtins[IX86_BUILTIN_FLOORPD];
+           get_builtin (IX86_BUILTIN_FLOORPD);
          else if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_FLOORPD256];
+           get_builtin (IX86_BUILTIN_FLOORPD256);
        }
       break;
 
@@ -33855,9 +33879,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SFmode && in_mode == SFmode)
        {
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_FLOORPS];
+           get_builtin (IX86_BUILTIN_FLOORPS);
          else if (out_n == 8 && in_n == 8)
-           return ix86_builtins[IX86_BUILTIN_FLOORPS256];
+           get_builtin (IX86_BUILTIN_FLOORPS256);
        }
       break;
 
@@ -33869,9 +33893,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == DFmode && in_mode == DFmode)
        {
          if (out_n == 2 && in_n == 2)
-           return ix86_builtins[IX86_BUILTIN_CEILPD];
+           get_builtin (IX86_BUILTIN_CEILPD);
          else if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_CEILPD256];
+           get_builtin (IX86_BUILTIN_CEILPD256);
        }
       break;
 
@@ -33883,9 +33907,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SFmode && in_mode == SFmode)
        {
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_CEILPS];
+           get_builtin (IX86_BUILTIN_CEILPS);
          else if (out_n == 8 && in_n == 8)
-           return ix86_builtins[IX86_BUILTIN_CEILPS256];
+           get_builtin (IX86_BUILTIN_CEILPS256);
        }
       break;
 
@@ -33897,9 +33921,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == DFmode && in_mode == DFmode)
        {
          if (out_n == 2 && in_n == 2)
-           return ix86_builtins[IX86_BUILTIN_TRUNCPD];
+           get_builtin (IX86_BUILTIN_TRUNCPD);
          else if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_TRUNCPD256];
+           get_builtin (IX86_BUILTIN_TRUNCPD256);
        }
       break;
 
@@ -33911,9 +33935,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SFmode && in_mode == SFmode)
        {
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_TRUNCPS];
+           get_builtin (IX86_BUILTIN_TRUNCPS);
          else if (out_n == 8 && in_n == 8)
-           return ix86_builtins[IX86_BUILTIN_TRUNCPS256];
+           get_builtin (IX86_BUILTIN_TRUNCPS256);
        }
       break;
 
@@ -33925,9 +33949,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == DFmode && in_mode == DFmode)
        {
          if (out_n == 2 && in_n == 2)
-           return ix86_builtins[IX86_BUILTIN_RINTPD];
+           get_builtin (IX86_BUILTIN_RINTPD);
          else if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_RINTPD256];
+           get_builtin (IX86_BUILTIN_RINTPD256);
        }
       break;
 
@@ -33939,9 +33963,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SFmode && in_mode == SFmode)
        {
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_RINTPS];
+           get_builtin (IX86_BUILTIN_RINTPS);
          else if (out_n == 8 && in_n == 8)
-           return ix86_builtins[IX86_BUILTIN_RINTPS256];
+           get_builtin (IX86_BUILTIN_RINTPS256);
        }
       break;
 
@@ -33953,9 +33977,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == DFmode && in_mode == DFmode)
        {
          if (out_n == 2 && in_n == 2)
-           return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ];
+           get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
          else if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_ROUNDPD_AZ256];
+           get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
        }
       break;
 
@@ -33967,9 +33991,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SFmode && in_mode == SFmode)
        {
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ];
+           get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
          else if (out_n == 8 && in_n == 8)
-           return ix86_builtins[IX86_BUILTIN_ROUNDPS_AZ256];
+           get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
        }
       break;
 
@@ -33977,9 +34001,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == DFmode && in_mode == DFmode)
        {
          if (out_n == 2 && in_n == 2)
-           return ix86_builtins[IX86_BUILTIN_VFMADDPD];
+           return get_builtin (IX86_BUILTIN_VFMADDPD);
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
+           get_builtin (IX86_BUILTIN_VFMADDPD256);
        }
       break;
 
@@ -33987,9 +34011,9 @@ ix86_builtin_vectorized_function (tree fndecl, tre
       if (out_mode == SFmode && in_mode == SFmode)
        {
          if (out_n == 4 && in_n == 4)
-           return ix86_builtins[IX86_BUILTIN_VFMADDPS];
+           get_builtin (IX86_BUILTIN_VFMADDPS);
          if (out_n == 8 && in_n == 8)
-           return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
+           get_builtin (IX86_BUILTIN_VFMADDPS256);
        }
       break;
 
@@ -34269,7 +34293,7 @@ ix86_vectorize_builtin_gather (const_tree mem_vect
       return NULL_TREE;
     }
 
-  return ix86_builtins[code];
+  return get_builtin (code);
 }
 
 /* Returns a code for a target-specific builtin that implements
@@ -34290,10 +34314,10 @@ ix86_builtin_reciprocal (unsigned int fn, bool md_
       {
        /* Vectorized version of sqrt to rsqrt conversion.  */
       case IX86_BUILTIN_SQRTPS_NR:
-       return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
+       return get_builtin (IX86_BUILTIN_RSQRTPS_NR);
 
       case IX86_BUILTIN_SQRTPS_NR256:
-       return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR256];
+       return get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
 
       default:
        return NULL_TREE;
@@ -34304,7 +34328,7 @@ ix86_builtin_reciprocal (unsigned int fn, bool md_
       {
        /* Sqrt to rsqrt conversion.  */
       case BUILT_IN_SQRTF:
-       return ix86_builtins[IX86_BUILTIN_RSQRTF];
+       return get_builtin (IX86_BUILTIN_RSQRTF);
 
       default:
        return NULL_TREE;

Reply via email to