[PATCH 20/57] rs6000: Write output to the builtins init file, part 2 of 3

2021-04-27 Thread Bill Schmidt via Gcc-patches
2021-03-03  Bill Schmidt  

gcc/
* config/rs6000/rs6000-gen-builtins.c (write_init_bif_table):
Implement.
---
 gcc/config/rs6000/rs6000-gen-builtins.c | 71 +
 1 file changed, 71 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c 
b/gcc/config/rs6000/rs6000-gen-builtins.c
index 28810f56ec2..82c0567756b 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -2272,6 +2272,77 @@ write_header_file ()
 static void
 write_init_bif_table ()
 {
+  for (int i = 0; i <= curr_bif; i++)
+{
+  fprintf (init_file,
+  "  rs6000_builtin_info_x[RS6000_BIF_%s].fntype"
+  "\n= %s;\n",
+  bifs[i].idname, bifs[i].fndecl);
+
+  /* Check whether we have a "tf" token in this string, representing
+a float128_type_node.  It's possible that float128_type_node is
+undefined (occurs for -maltivec -mno-vsx, for example), so we
+must guard against that.  */
+  int tf_found = strstr (bifs[i].fndecl, "tf") != NULL;
+
+  fprintf (init_file,
+  "  if (new_builtins_are_live)\n");
+  fprintf (init_file, "{\n");
+
+  if (tf_found)
+   {
+ fprintf (init_file, "  if (float128_type_node)\n");
+ fprintf (init_file, "{\n");
+   }
+
+  fprintf (init_file,
+  "  rs6000_builtin_decls_x[(int)RS6000_BIF_%s] = t\n",
+  bifs[i].idname);
+  fprintf (init_file,
+  "= add_builtin_function (\"%s\",\n",
+  bifs[i].proto.bifname);
+  fprintf (init_file,
+  "%s,\n",
+  bifs[i].fndecl);
+  fprintf (init_file,
+  "(int)RS6000_BIF_%s,"
+  " BUILT_IN_MD,\n",
+  bifs[i].idname);
+  fprintf (init_file,
+  "NULL, NULL_TREE);\n");
+  if (bifs[i].kind == FNK_CONST)
+   {
+ fprintf (init_file, "  TREE_READONLY (t) = 1;\n");
+ fprintf (init_file, "  TREE_NOTHROW (t) = 1;\n");
+   }
+  else if (bifs[i].kind == FNK_PURE)
+   {
+ fprintf (init_file, "  DECL_PURE_P (t) = 1;\n");
+ fprintf (init_file, "  TREE_NOTHROW (t) = 1;\n");
+   }
+  else if (bifs[i].kind == FNK_FPMATH)
+   {
+ fprintf (init_file, "  TREE_NOTHROW (t) = 1;\n");
+ fprintf (init_file, "  if (flag_rounding_math)\n");
+ fprintf (init_file, "{\n");
+ fprintf (init_file, "  DECL_PURE_P (t) = 1;\n");
+ fprintf (init_file, "  DECL_IS_NOVOPS (t) = 1;\n");
+ fprintf (init_file, "}\n");
+ fprintf (init_file, "  else\n");
+ fprintf (init_file, "TREE_READONLY (t) = 1;\n");
+   }
+
+  if (tf_found)
+   {
+ fprintf (init_file, "}\n");
+ fprintf (init_file, "  else\n");
+ fprintf (init_file, "{\n");
+ fprintf (init_file, "  rs6000_builtin_decls_x"
+  "[(int)RS6000_BIF_%s] = NULL_TREE;\n", bifs[i].idname);
+ fprintf (init_file, "}\n");
+   }
+  fprintf (init_file, "}\n\n");
+}
 }
 
 /* Write code to initialize the overload table.  */
-- 
2.27.0



[PATCH 13/57] rs6000: Parsing built-in input file, part 2 of 3

2021-04-27 Thread Bill Schmidt via Gcc-patches
2021-03-24  Bill Schmidt  

gcc/
* config/rs6000/rs6000-gen-builtins.c (parse_args): New function.
(parse_prototype): Implement.
---
 gcc/config/rs6000/rs6000-gen-builtins.c | 143 
 1 file changed, 143 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c 
b/gcc/config/rs6000/rs6000-gen-builtins.c
index 23d80b9ee43..0f0f8eaecf2 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -1094,6 +1094,91 @@ match_type (typeinfo *typedata, int voidok)
   return 1;
 }
 
+/* Parse the argument list.  */
+static parse_codes
+parse_args (prototype *protoptr)
+{
+  typelist **argptr = >args;
+  int *nargs = >nargs;
+  int *restr_opnd = protoptr->restr_opnd;
+  restriction *restr = protoptr->restr;
+  int *val1 = protoptr->restr_val1;
+  int *val2 = protoptr->restr_val2;
+  int restr_cnt = 0;
+
+  int success;
+  *nargs = 0;
+
+  /* Start the argument list.  */
+  consume_whitespace ();
+  if (linebuf[pos] != '(')
+{
+  (*diag) ("missing '(' at column %d.\n", pos + 1);
+  return PC_PARSEFAIL;
+}
+  safe_inc_pos ();
+
+  do {
+consume_whitespace ();
+int oldpos = pos;
+typelist *argentry = (typelist *) malloc (sizeof (typelist));
+memset (argentry, 0, sizeof (*argentry));
+typeinfo *argtype = >info;
+success = match_type (argtype, VOID_NOTOK);
+if (success)
+  {
+   if (argtype->restr)
+ {
+   if (restr_cnt >= MAXRESTROPNDS)
+ {
+   (*diag) ("More than two %d operands\n", MAXRESTROPNDS);
+   return PC_PARSEFAIL;
+ }
+   restr_opnd[restr_cnt] = *nargs + 1;
+   restr[restr_cnt] = argtype->restr;
+   val1[restr_cnt] = argtype->val1;
+   val2[restr_cnt++] = argtype->val2;
+ }
+   (*nargs)++;
+   *argptr = argentry;
+   argptr = >next;
+   consume_whitespace ();
+   if (linebuf[pos] == ',')
+ safe_inc_pos ();
+   else if (linebuf[pos] != ')')
+ {
+   (*diag) ("arg not followed by ',' or ')' at column %d.\n",
+pos + 1);
+   return PC_PARSEFAIL;
+ }
+
+#ifdef DEBUG
+   (*diag) ("argument type: isvoid = %d, isconst = %d, isvector = %d, \
+issigned = %d, isunsigned = %d, isbool = %d, ispixel = %d, ispointer = %d, \
+base = %d, restr = %d, val1 = %d, val2 = %d, pos = %d.\n",
+argtype->isvoid, argtype->isconst, argtype->isvector,
+argtype->issigned, argtype->isunsigned, argtype->isbool,
+argtype->ispixel, argtype->ispointer, argtype->base,
+argtype->restr, argtype->val1, argtype->val2, pos + 1);
+#endif
+  }
+else
+  {
+   free (argentry);
+   *argptr = NULL;
+   pos = oldpos;
+   if (linebuf[pos] != ')')
+ {
+   (*diag) ("badly terminated arg list at column %d.\n", pos + 1);
+   return PC_PARSEFAIL;
+ }
+   safe_inc_pos ();
+  }
+  } while (success);
+
+  return PC_OK;
+}
+
 /* Parse the attribute list.  */
 static parse_codes
 parse_bif_attrs (attrinfo *attrptr)
@@ -1106,6 +1191,64 @@ parse_bif_attrs (attrinfo *attrptr)
 static parse_codes
 parse_prototype (prototype *protoptr)
 {
+  typeinfo *ret_type = >rettype;
+  char **bifname = >bifname;
+
+  /* Get the return type.  */
+  consume_whitespace ();
+  int oldpos = pos;
+  int success = match_type (ret_type, VOID_OK);
+  if (!success)
+{
+  (*diag) ("missing or badly formed return type at column %d.\n",
+  oldpos + 1);
+  return PC_PARSEFAIL;
+}
+
+#ifdef DEBUG
+  (*diag) ("return type: isvoid = %d, isconst = %d, isvector = %d, \
+issigned = %d, isunsigned = %d, isbool = %d, ispixel = %d, ispointer = %d, \
+base = %d, restr[0] = %d, val1[0] = %d, val2[0] = %d, restr1[1] = %d, \
+val1[1] = %d, val2[1] = %d, pos = %d.\n",
+  ret_type->isvoid, ret_type->isconst, ret_type->isvector,
+  ret_type->issigned, ret_type->isunsigned, ret_type->isbool,
+  ret_type->ispixel, ret_type->ispointer, ret_type->base,
+  ret_type->restr, ret_type->val1, ret_type->val2, pos + 1);
+#endif
+
+  /* Get the bif name.  */
+  consume_whitespace ();
+  oldpos = pos;
+  *bifname = match_identifier ();
+  if (!*bifname)
+{
+  (*diag) ("missing function name at column %d.\n", oldpos + 1);
+  return PC_PARSEFAIL;
+}
+
+#ifdef DEBUG
+  (*diag) ("function name is '%s'.\n", *bifname);
+#endif
+
+  /* Process arguments.  */
+  if (parse_args (protoptr) == PC_PARSEFAIL)
+return PC_PARSEFAIL;
+
+  /* Process terminating semicolon.  */
+  consume_whitespace ();
+  if (linebuf[pos] != ';')
+{
+  (*diag) ("missing semicolon at column %d.\n", pos + 1);
+  return PC_PARSEFAIL;
+}
+  safe_inc_pos ();
+  consume_whitespace ();
+  if (linebuf[pos] != '\n')
+{
+  (*diag) ("garbage at end of line at column %d.\n", pos + 1);
+  return PC_PARSEFAIL;
+ 

[PATCH 18/57] rs6000: Write output to the builtins header file

2021-04-27 Thread Bill Schmidt via Gcc-patches
2021-03-24  Bill Schmidt  

gcc/
* config/rs6000/rs6000-gen-builtins.c
(write_autogenerated_header): New function.
(write_decls): Likewise.
(write_extern_fntype): New callback function.
(write_header_file): Implement.
---
 gcc/config/rs6000/rs6000-gen-builtins.c | 227 
 1 file changed, 227 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c 
b/gcc/config/rs6000/rs6000-gen-builtins.c
index 4fe2748e947..48d28af6366 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -2022,10 +2022,237 @@ parse_ovld ()
   return result;
 }
 
+/* Write a comment at the top of FILE about how the code was generated.  */
+static void
+write_autogenerated_header (FILE *file)
+{
+  fprintf (file, "/* Automatically generated by the program '%s'\n",
+  pgm_path);
+  fprintf (file, "   from the files '%s' and '%s'.  */\n\n",
+  bif_path, ovld_path);
+}
+
+/* Write declarations into the header file.  */
+static void
+write_decls ()
+{
+  fprintf (header_file, "enum rs6000_gen_builtins\n{\n  RS6000_BIF_NONE,\n");
+  for (int i = 0; i <= curr_bif; i++)
+fprintf (header_file, "  RS6000_BIF_%s,\n", bifs[bif_order[i]].idname);
+  fprintf (header_file, "  RS6000_BIF_MAX,\n");
+  fprintf (header_file, "  RS6000_OVLD_NONE,\n");
+  for (int i = 0; i < num_ovld_stanzas; i++)
+fprintf (header_file, "  RS6000_OVLD_%s,\n", ovld_stanzas[i].stanza_id);
+  fprintf (header_file, "  RS6000_OVLD_MAX\n};\n\n");
+
+  fprintf (header_file,
+  "extern GTY(()) tree rs6000_builtin_decls_x[RS6000_OVLD_MAX];\n\n");
+
+  fprintf (header_file,
+  "enum rs6000_ovld_instances\n{\n  RS6000_INST_NONE,\n");
+  for (int i = 0; i <= curr_ovld; i++)
+fprintf (header_file, "  RS6000_INST_%s,\n", ovlds[i].ovld_id_name);
+  fprintf (header_file, "  RS6000_INST_MAX\n};\n\n");
+
+  fprintf (header_file, "#define MAX_OVLD_ARGS %d\n", max_ovld_args);
+
+  fprintf (header_file, "enum restriction {\n");
+  fprintf (header_file, "  RES_NONE,\n");
+  fprintf (header_file, "  RES_BITS,\n");
+  fprintf (header_file, "  RES_RANGE,\n");
+  fprintf (header_file, "  RES_VAR_RANGE,\n");
+  fprintf (header_file, "  RES_VALUES\n");
+  fprintf (header_file, "};\n\n");
+
+  fprintf (header_file, "enum bif_enable {\n");
+  fprintf (header_file, "  ENB_ALWAYS,\n");
+  fprintf (header_file, "  ENB_P5,\n");
+  fprintf (header_file, "  ENB_P6,\n");
+  fprintf (header_file, "  ENB_ALTIVEC,\n");
+  fprintf (header_file, "  ENB_CELL,\n");
+  fprintf (header_file, "  ENB_VSX,\n");
+  fprintf (header_file, "  ENB_P7,\n");
+  fprintf (header_file, "  ENB_P7_64,\n");
+  fprintf (header_file, "  ENB_P8,\n");
+  fprintf (header_file, "  ENB_P8V,\n");
+  fprintf (header_file, "  ENB_P9,\n");
+  fprintf (header_file, "  ENB_P9_64,\n");
+  fprintf (header_file, "  ENB_P9V,\n");
+  fprintf (header_file, "  ENB_IEEE128_HW,\n");
+  fprintf (header_file, "  ENB_DFP,\n");
+  fprintf (header_file, "  ENB_CRYPTO,\n");
+  fprintf (header_file, "  ENB_HTM,\n");
+  fprintf (header_file, "  ENB_P10,\n");
+  fprintf (header_file, "  ENB_P10_64,\n");
+  fprintf (header_file, "  ENB_MMA\n");
+  fprintf (header_file, "};\n\n");
+
+  fprintf (header_file, "#define PPC_MAXRESTROPNDS 3\n");
+  fprintf (header_file, "struct GTY((user)) bifdata\n");
+  fprintf (header_file, "{\n");
+  fprintf (header_file, "  const char *bifname;\n");
+  fprintf (header_file, "  bif_enable enable;\n");
+  fprintf (header_file, "  tree fntype;\n");
+  fprintf (header_file, "  insn_code icode;\n");
+  fprintf (header_file, "  int  nargs;\n");
+  fprintf (header_file, "  int  bifattrs;\n");
+  fprintf (header_file, "  int  restr_opnd[PPC_MAXRESTROPNDS];\n");
+  fprintf (header_file, "  restriction restr[PPC_MAXRESTROPNDS];\n");
+  fprintf (header_file, "  int  restr_val1[PPC_MAXRESTROPNDS];\n");
+  fprintf (header_file, "  int  restr_val2[PPC_MAXRESTROPNDS];\n");
+  fprintf (header_file, "  const char *attr_string;\n");
+  fprintf (header_file, "  rs6000_gen_builtins assoc_bif;\n");
+  fprintf (header_file, "};\n\n");
+
+  fprintf (header_file, "#define bif_init_bit\t\t(0x0001)\n");
+  fprintf (header_file, "#define bif_set_bit\t\t(0x0002)\n");
+  fprintf (header_file, "#define bif_extract_bit\t\t(0x0004)\n");
+  fprintf (header_file, "#define bif_nosoft_bit\t\t(0x0008)\n");
+  fprintf (header_file, "#define bif_ldvec_bit\t\t(0x0010)\n");
+  fprintf (header_file, "#define bif_stvec_bit\t\t(0x0020)\n");
+  fprintf (header_file, "#define bif_reve_bit\t\t(0x0040)\n");
+  fprintf (header_file, "#define bif_pred_bit\t\t(0x0080)\n");
+  fprintf (header_file, "#define bif_htm_bit\t\t(0x0100)\n");
+  fprintf (header_file, "#define bif_htmspr_bit\t\t(0x0200)\n");
+  fprintf (header_file, "#define bif_htmcr_bit\t\t(0x0400)\n");
+  fprintf (header_file, "#define bif_mma_bit\t\t(0x0800)\n");
+  fprintf (header_file, "#define 

[PATCH 17/57] rs6000: Write output to the builtin definition include file

2021-04-27 Thread Bill Schmidt via Gcc-patches
2021-04-02  Bill Schmidt  

gcc/
* config/rs6000/rs6000-gen-builtins.c (write_defines_file):
Implement.
---
 gcc/config/rs6000/rs6000-gen-builtins.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c 
b/gcc/config/rs6000/rs6000-gen-builtins.c
index 0358ff26414..4fe2748e947 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -2040,6 +2040,23 @@ write_init_file ()
 static int
 write_defines_file ()
 {
+  fprintf (defines_file, "#ifndef _RS6000_VECDEFINES_H\n");
+  fprintf (defines_file, "#define _RS6000_VECDEFINES_H 1\n\n");
+  fprintf (defines_file, "#if defined(_ARCH_PPC64) && defined (_ARCH_PWR9)\n");
+  fprintf (defines_file, "  #define _ARCH_PPC64_PWR9 1\n");
+  fprintf (defines_file, "#endif\n\n");
+  for (int i = 0; i < num_ovld_stanzas; i++)
+if (strcmp (ovld_stanzas[i].extern_name, "SKIP"))
+  {
+   if (ovld_stanzas[i].ifdef)
+ fprintf (defines_file, "#ifdef %s\n", ovld_stanzas[i].ifdef);
+   fprintf (defines_file, "#define %s %s\n",
+ovld_stanzas[i].extern_name,
+ovld_stanzas[i].intern_name);
+   if (ovld_stanzas[i].ifdef)
+ fprintf (defines_file, "#endif\n");
+  }
+  fprintf (defines_file, "\n#endif\n");
   return 1;
 }
 
-- 
2.27.0



[PATCH 16/57] rs6000: Build and store function type identifiers

2021-04-27 Thread Bill Schmidt via Gcc-patches
2021-04-02  Bill Schmidt  

gcc/
* config/rs6000/rs6000-gen-builtins.c (complete_vector_type): New
function.
(complete_base_type): Likewise.
(construct_fntype_id): Likewise.
(parse_bif_entry): Call construct_fntype_id.
(parse_ovld_entry): Likewise.
---
 gcc/config/rs6000/rs6000-gen-builtins.c | 233 
 1 file changed, 233 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c 
b/gcc/config/rs6000/rs6000-gen-builtins.c
index d5deefbfd3b..0358ff26414 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -1313,6 +1313,231 @@ endian = %d.\n",
   return PC_OK;
 }
 
+/* Convert a vector type into a mode string.  */
+static void
+complete_vector_type (typeinfo *typeptr, char *buf, int *bufi)
+{
+  if (typeptr->isbool)
+buf[(*bufi)++] = 'b';
+  buf[(*bufi)++] = 'v';
+  if (typeptr->ispixel)
+{
+  memcpy ([*bufi], "p8hi", 4);
+  *bufi += 4;
+}
+  else
+{
+  switch (typeptr->base)
+   {
+   case BT_CHAR:
+ memcpy ([*bufi], "16qi", 4);
+ *bufi += 4;
+ break;
+   case BT_SHORT:
+ memcpy ([*bufi], "8hi", 3);
+ *bufi += 3;
+ break;
+   case BT_INT:
+ memcpy ([*bufi], "4si", 3);
+ *bufi += 3;
+ break;
+   case BT_LONGLONG:
+ memcpy ([*bufi], "2di", 3);
+ *bufi += 3;
+ break;
+   case BT_FLOAT:
+ memcpy ([*bufi], "4sf", 3);
+ *bufi += 3;
+ break;
+   case BT_DOUBLE:
+ memcpy ([*bufi], "2df", 3);
+ *bufi += 3;
+ break;
+   case BT_INT128:
+ memcpy ([*bufi], "1ti", 3);
+ *bufi += 3;
+ break;
+   case BT_FLOAT128:
+ memcpy ([*bufi], "1tf", 3);
+ *bufi += 3;
+ break;
+   case BT_VPAIR:
+ memcpy ([*bufi], "1poi", 4);
+ *bufi += 4;
+ break;
+   case BT_VQUAD:
+ memcpy ([*bufi], "1pxi", 4);
+ *bufi += 4;
+ break;
+   default:
+ (*diag) ("unhandled basetype %d.\n", typeptr->base);
+ exit (EC_INTERR);
+   }
+}
+}
+
+/* Convert a base type into a mode string.  */
+static void
+complete_base_type (typeinfo *typeptr, char *buf, int *bufi)
+{
+  switch (typeptr->base)
+{
+case BT_CHAR:
+  memcpy ([*bufi], "qi", 2);
+  break;
+case BT_SHORT:
+  memcpy ([*bufi], "hi", 2);
+  break;
+case BT_INT:
+  memcpy ([*bufi], "si", 2);
+  break;
+case BT_LONG:
+  memcpy ([*bufi], "lg", 2);
+  break;
+case BT_LONGLONG:
+  memcpy ([*bufi], "di", 2);
+  break;
+case BT_FLOAT:
+  memcpy ([*bufi], "sf", 2);
+  break;
+case BT_DOUBLE:
+  memcpy ([*bufi], "df", 2);
+  break;
+case BT_LONGDOUBLE:
+  memcpy ([*bufi], "ld", 2);
+  break;
+case BT_INT128:
+  memcpy ([*bufi], "ti", 2);
+  break;
+case BT_FLOAT128:
+  memcpy ([*bufi], "tf", 2);
+  break;
+case BT_BOOL:
+  memcpy ([*bufi], "bi", 2);
+  break;
+case BT_STRING:
+  memcpy ([*bufi], "st", 2);
+  break;
+case BT_DECIMAL32:
+  memcpy ([*bufi], "sd", 2);
+  break;
+case BT_DECIMAL64:
+  memcpy ([*bufi], "dd", 2);
+  break;
+case BT_DECIMAL128:
+  memcpy ([*bufi], "td", 2);
+  break;
+case BT_IBM128:
+  memcpy ([*bufi], "if", 2);
+  break;
+default:
+  (*diag) ("unhandled basetype %d.\n", typeptr->base);
+  exit (EC_INTERR);
+}
+
+  *bufi += 2;
+}
+
+/* Build a function type descriptor identifier from the return type
+   and argument types described by PROTOPTR, and store it if it does
+   not already exist.  Return the identifier.  */
+static char *
+construct_fntype_id (prototype *protoptr)
+{
+  /* Determine the maximum space for a function type descriptor id.
+ Each type requires at most 9 characters (6 for the mode*, 1 for
+ the optional 'u' preceding the mode, 1 for the optional 'p'
+ preceding the mode, and 1 for an underscore following the mode).
+ We also need 5 characters for the string "ftype" that separates
+ the return mode from the argument modes.  The last argument doesn't
+ need a trailing underscore, but we count that as the one trailing
+ "ftype" instead.  For the special case of zero arguments, we need 9
+ for the return type and 7 for "ftype_v".  Finally, we need one
+ character for the terminating null.  Thus for a function with N
+ arguments, we need at most 9N+15 characters for N>0, otherwise 17.
+ 
+   *Worst case is bv16qi for "vector bool char".  */
+  int len = protoptr->nargs ? (protoptr->nargs + 1) * 9 + 6 : 17;
+  char *buf = (char *) malloc (len);
+  int bufi = 0;
+
+  if (protoptr->rettype.ispointer)
+buf[bufi++] = 'p';
+
+  if (protoptr->rettype.isvoid)
+buf[bufi++] = 'v';
+  else
+{
+  if (protoptr->rettype.isunsigned)
+   

[PATCH 15/57] rs6000: Parsing of overload input file

2021-04-27 Thread Bill Schmidt via Gcc-patches
2021-03-03  Bill Schmidt  

gcc/
* config/rs6000/rs6000-gen-builtins.c (ovld_stanza): New struct.
(MAXOVLDSTANZAS): New defined constant.
(ovld_stanzas): New filescope variable.
(curr_ovld_stanza): Likewise.
(MAXOVLDS): New defined constant.
(ovlddata): New struct.
(ovlds): New filescope variable.
(curr_ovld): Likewise.
(max_ovld_args): Likewise.
(parse_ovld_entry): New function.
(parse_ovld_stanza): Likewise.
(parse_ovld): Implement.
---
 gcc/config/rs6000/rs6000-gen-builtins.c | 233 +++-
 1 file changed, 232 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c 
b/gcc/config/rs6000/rs6000-gen-builtins.c
index 68569bda50e..d5deefbfd3b 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -405,8 +405,33 @@ static int curr_bif;
 static int *bif_order;
 static int bif_index = 0;
 
+/* Stanzas are groupings of built-in functions and overloads by some
+   common feature/attribute.  These definitions are for overload stanzas.  */
+struct ovld_stanza {
+  char *stanza_id;
+  char *extern_name;
+  char *intern_name;
+  char *ifdef;
+};
+
+#define MAXOVLDSTANZAS 512
+static ovld_stanza ovld_stanzas[MAXOVLDSTANZAS];
 static int num_ovld_stanzas;
+static int curr_ovld_stanza;
+
+#define MAXOVLDS 16384
+struct ovlddata {
+  int stanza;
+  prototype proto;
+  char *bif_id_name;
+  char *ovld_id_name;
+  char *fndecl;
+};
+
+static ovlddata ovlds[MAXOVLDS];
 static int num_ovlds;
+static int curr_ovld;
+static int max_ovld_args = 0;
 
 /* Exit codes for the shell.  */
 enum exit_codes {
@@ -1551,11 +1576,217 @@ create_bif_order ()
   rbt_inorder_callback (_rbt, bifo_rbt.rbt_root, set_bif_order);
 }
 
+/* Parse one two-line entry in the overload file.  */
+static parse_codes
+parse_ovld_entry ()
+{
+  /* Check for end of stanza.  */
+  pos = 0;
+  consume_whitespace ();
+  if (linebuf[pos] == '[')
+return PC_EOSTANZA;
+
+  /* Allocate an entry in the overload table.  */
+  if (num_ovlds >= MAXOVLDS - 1)
+{
+  (*diag) ("too many overloads.\n");
+  return PC_PARSEFAIL;
+}
+
+  curr_ovld = num_ovlds++;
+  ovlds[curr_ovld].stanza = curr_ovld_stanza;
+
+  if (parse_prototype ([curr_ovld].proto) == PC_PARSEFAIL)
+return PC_PARSEFAIL;
+
+  if (ovlds[curr_ovld].proto.nargs > max_ovld_args)
+max_ovld_args = ovlds[curr_ovld].proto.nargs;
+
+  /* Now process line 2, which just contains the builtin id and an
+ optional overload id.  */
+  if (!advance_line (ovld_file))
+{
+  (*diag) ("unexpected EOF.\n");
+  return PC_EOFILE;
+}
+
+  pos = 0;
+  consume_whitespace ();
+  int oldpos = pos;
+  char *id = match_identifier ();
+  ovlds[curr_ovld].bif_id_name = id;
+  ovlds[curr_ovld].ovld_id_name = id;
+  if (!id)
+{
+  (*diag) ("missing overload id at column %d.\n", pos + 1);
+  return PC_PARSEFAIL;
+}
+
+#ifdef DEBUG
+  (*diag) ("ID name is '%s'.\n", id);
+#endif
+
+  /* The builtin id has to match one from the bif file.  */
+  if (!rbt_find (_rbt, id))
+{
+  (*diag) ("builtin ID '%s' not found in bif file.\n", id);
+  return PC_PARSEFAIL;
+}
+
+  /* Check for an optional overload id.  Usually we use the builtin
+ function id for that purpose, but sometimes we need multiple
+ overload entries for the same builtin id, and it needs to be unique.  */
+  consume_whitespace ();
+  if (linebuf[pos] != '\n')
+{
+  id = match_identifier ();
+  ovlds[curr_ovld].ovld_id_name = id;
+  consume_whitespace ();
+}
+
+ /* Save the overload ID in a lookup structure.  */
+  if (!rbt_insert (_rbt, id))
+{
+  (*diag) ("duplicate overload ID '%s' at column %d.\n", id, oldpos + 1);
+  return PC_PARSEFAIL;
+}
+
+  if (linebuf[pos] != '\n')
+{
+  (*diag) ("garbage at end of line at column %d.\n", pos + 1);
+  return PC_PARSEFAIL;
+}
+  return PC_OK;
+}
+
+/* Parse one stanza of the input overload file.  linebuf already contains the
+   first line to parse.  */
+static parse_codes
+parse_ovld_stanza ()
+{
+  /* Parse the stanza header.  */
+  pos = 0;
+  consume_whitespace ();
+
+  if (linebuf[pos] != '[')
+{
+  (*diag) ("ill-formed stanza header at column %d.\n", pos + 1);
+  return PC_PARSEFAIL;
+}
+  safe_inc_pos ();
+
+  char *stanza_name = match_identifier ();
+  if (!stanza_name)
+{
+  (*diag) ("no identifier found in stanza header.\n");
+  return PC_PARSEFAIL;
+}
+
+  /* Add the identifier to a table and set the number to be recorded
+ with subsequent overload entries.  */
+  if (num_ovld_stanzas >= MAXOVLDSTANZAS)
+{
+  (*diag) ("too many stanza headers.\n");
+  return PC_PARSEFAIL;
+}
+
+  curr_ovld_stanza = num_ovld_stanzas++;
+  ovld_stanza *stanza = _stanzas[curr_ovld_stanza];
+  stanza->stanza_id = stanza_name;
+
+  consume_whitespace ();
+  if (linebuf[pos] != ',')

[PATCH 12/57] rs6000: Parsing built-in input file, part 1 of 3

2021-04-27 Thread Bill Schmidt via Gcc-patches
2021-04-02  Bill Schmidt  

gcc/
* config/rs6000/rs6000-gen-builtins.c (bif_stanza): New enum.
(curr_bif_stanza): New filescope variable.
(stanza_entry): New struct.
(stanza_map): New initialized filescope variable.
(enable_string): Likewise.
(fnkinds): New enum.
(typelist): New struct.
(attrinfo): Likewise.
(MAXRESTROPNDS): New defined constant.
(prototype): New struct.
(MAXBIFS): New defined constant.
(bifdata): New struct.
(bifs): New filescope variable.
(curr_bif): Likewise.
(bif_order): Likewise.
(bif_index): Likewise.
(stanza_name_to_stanza): New function.
(parse_bif_attrs): New stub function.
(parse_prototype): Likewise.
(parse_bif_entry): New function.
(parse_bif_stanza): Likewise.
(parse_bif): Implement.
(set_bif_order): New function.
(create_bif_order): Implement.
---
 gcc/config/rs6000/rs6000-gen-builtins.c | 363 +++-
 1 file changed, 362 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c 
b/gcc/config/rs6000/rs6000-gen-builtins.c
index 3a6aea85847..23d80b9ee43 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -197,6 +197,99 @@ enum void_status {
   VOID_OK
 };
 
+/* Stanzas are groupings of built-in functions and overloads by some
+   common feature/attribute.  These definitions are for built-in function
+   stanzas.  */
+enum bif_stanza {
+  BSTZ_ALWAYS,
+  BSTZ_P5,
+  BSTZ_P6,
+  BSTZ_ALTIVEC,
+  BSTZ_CELL,
+  BSTZ_VSX,
+  BSTZ_P7,
+  BSTZ_P7_64,
+  BSTZ_P8,
+  BSTZ_P8V,
+  BSTZ_P9,
+  BSTZ_P9_64,
+  BSTZ_P9V,
+  BSTZ_IEEE128_HW,
+  BSTZ_DFP,
+  BSTZ_CRYPTO,
+  BSTZ_HTM,
+  BSTZ_P10,
+  BSTZ_P10_64,
+  BSTZ_MMA,
+  NUMBIFSTANZAS
+};
+
+static bif_stanza curr_bif_stanza;
+
+struct stanza_entry
+{
+  const char *stanza_name;
+  bif_stanza stanza;
+};
+
+static stanza_entry stanza_map[NUMBIFSTANZAS] =
+  {
+{ "always",BSTZ_ALWAYS },
+{ "power5",BSTZ_P5 },
+{ "power6",BSTZ_P6 },
+{ "altivec",   BSTZ_ALTIVEC},
+{ "cell",  BSTZ_CELL   },
+{ "vsx",   BSTZ_VSX},
+{ "power7",BSTZ_P7 },
+{ "power7-64", BSTZ_P7_64  },
+{ "power8",BSTZ_P8 },
+{ "power8-vector", BSTZ_P8V},
+{ "power9",BSTZ_P9 },
+{ "power9-64", BSTZ_P9_64  },
+{ "power9-vector", BSTZ_P9V},
+{ "ieee128-hw",BSTZ_IEEE128_HW },
+{ "dfp",   BSTZ_DFP},
+{ "crypto",BSTZ_CRYPTO },
+{ "htm",   BSTZ_HTM},
+{ "power10",   BSTZ_P10},
+{ "power10-64",BSTZ_P10_64 },
+{ "mma",   BSTZ_MMA}
+  };
+
+static const char *enable_string[NUMBIFSTANZAS] =
+  {
+"ENB_ALWAYS",
+"ENB_P5",
+"ENB_P6",
+"ENB_ALTIVEC",
+"ENB_CELL",
+"ENB_VSX",
+"ENB_P7",
+"ENB_P7_64",
+"ENB_P8",
+"ENB_P8V",
+"ENB_P9",
+"ENB_P9_64",
+"ENB_P9V",
+"ENB_IEEE128_HW",
+"ENB_DFP",
+"ENB_CRYPTO",
+"ENB_HTM",
+"ENB_P10",
+"ENB_P10_64",
+"ENB_MMA"
+  };
+
+/* Function modifiers provide special handling for const, pure, and fpmath
+   functions.  These are mutually exclusive, and therefore kept separate
+   from other bif attributes.  */
+enum fnkinds {
+  FNK_NONE,
+  FNK_CONST,
+  FNK_PURE,
+  FNK_FPMATH
+};
+
 /* Legal base types for an argument or return type.  */
 enum basetype {
   BT_CHAR,
@@ -250,7 +343,68 @@ struct typeinfo {
   int val2;
 };
 
+/* A list of argument types.  */
+struct typelist {
+  typeinfo info;
+  typelist *next;
+};
+
+/* Attributes of a builtin function.  */
+struct attrinfo {
+  char isinit;
+  char isset;
+  char isextract;
+  char isnosoft;
+  char isldvec;
+  char isstvec;
+  char isreve;
+  char ispred;
+  char ishtm;
+  char ishtmspr;
+  char ishtmcr;
+  char ismma;
+  char isquad;
+  char ispair;
+  char isno32bit;
+  char is32bit;
+  char iscpu;
+  char isldstmask;
+  char islxvrse;
+  char islxvrze;
+  char isendian;
+};
+
+/* Fields associated with a function prototype (bif or overload).  */
+#define MAXRESTROPNDS 3
+struct prototype {
+  typeinfo rettype;
+  char *bifname;
+  int nargs;
+  typelist *args;
+  int restr_opnd[MAXRESTROPNDS];
+  restriction restr[MAXRESTROPNDS];
+  int restr_val1[MAXRESTROPNDS];
+  int restr_val2[MAXRESTROPNDS];
+};
+
+/* Data associated with a builtin function, and a table of such data.  */
+#define MAXBIFS 16384
+struct bifdata {
+  int stanza;
+  fnkinds kind;
+  prototype proto;
+  char *idname;
+  char *patname;
+  attrinfo attrs;
+  char *fndecl;
+};
+
+static bifdata bifs[MAXBIFS];
 static int num_bifs;
+static int curr_bif;
+static int *bif_order;
+static int bif_index = 0;
+

[PATCH 14/57] rs6000: Parsing built-in input file, part 3 of 3

2021-04-27 Thread Bill Schmidt via Gcc-patches
2021-03-24  Bill Schmidt  

gcc/
* config/rs6000/rs6000-gen-builtins.c (parse_bif_attrs):
Implement.
---
 gcc/config/rs6000/rs6000-gen-builtins.c | 102 
 1 file changed, 102 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c 
b/gcc/config/rs6000/rs6000-gen-builtins.c
index 0f0f8eaecf2..68569bda50e 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -1183,6 +1183,108 @@ base = %d, restr = %d, val1 = %d, val2 = %d, pos = 
%d.\n",
 static parse_codes
 parse_bif_attrs (attrinfo *attrptr)
 {
+  consume_whitespace ();
+  if (linebuf[pos] != '{')
+{
+  (*diag) ("missing attribute set at column %d.\n", pos + 1);
+  return PC_PARSEFAIL;
+}
+  safe_inc_pos ();
+
+  memset (attrptr, 0, sizeof (*attrptr));
+  char *attrname = NULL;
+
+  do {
+consume_whitespace ();
+int oldpos = pos;
+attrname = match_identifier ();
+if (attrname)
+  {
+   if (!strcmp (attrname, "init"))
+ attrptr->isinit = 1;
+   else if (!strcmp (attrname, "set"))
+ attrptr->isset = 1;
+   else if (!strcmp (attrname, "extract"))
+ attrptr->isextract = 1;
+   else if (!strcmp (attrname, "nosoft"))
+ attrptr->isnosoft = 1;
+   else if (!strcmp (attrname, "ldvec"))
+ attrptr->isldvec = 1;
+   else if (!strcmp (attrname, "stvec"))
+ attrptr->isstvec = 1;
+   else if (!strcmp (attrname, "reve"))
+ attrptr->isreve = 1;
+   else if (!strcmp (attrname, "pred"))
+ attrptr->ispred = 1;
+   else if (!strcmp (attrname, "htm"))
+ attrptr->ishtm = 1;
+   else if (!strcmp (attrname, "htmspr"))
+ attrptr->ishtmspr = 1;
+   else if (!strcmp (attrname, "htmcr"))
+ attrptr->ishtmcr = 1;
+   else if (!strcmp (attrname, "mma"))
+ attrptr->ismma = 1;
+   else if (!strcmp (attrname, "quad"))
+ attrptr->isquad = 1;
+   else if (!strcmp (attrname, "pair"))
+ attrptr->ispair = 1;
+   else if (!strcmp (attrname, "no32bit"))
+ attrptr->isno32bit = 1;
+   else if (!strcmp (attrname, "32bit"))
+ attrptr->is32bit = 1;
+   else if (!strcmp (attrname, "cpu"))
+ attrptr->iscpu = 1;
+   else if (!strcmp (attrname, "ldstmask"))
+ attrptr->isldstmask = 1;
+   else if (!strcmp (attrname, "lxvrse"))
+ attrptr->islxvrse = 1;
+   else if (!strcmp (attrname, "lxvrze"))
+ attrptr->islxvrze = 1;
+   else if (!strcmp (attrname, "endian"))
+ attrptr->isendian = 1;
+   else
+ {
+   (*diag) ("unknown attribute at column %d.\n", oldpos + 1);
+   return PC_PARSEFAIL;
+ }
+
+   consume_whitespace ();
+   if (linebuf[pos] == ',')
+ safe_inc_pos ();
+   else if (linebuf[pos] != '}')
+ {
+   (*diag) ("arg not followed by ',' or '}' at column %d.\n",
+pos + 1);
+   return PC_PARSEFAIL;
+ }
+  }
+else
+  {
+   pos = oldpos;
+   if (linebuf[pos] != '}')
+ {
+   (*diag) ("badly terminated attr set at column %d.\n", pos + 1);
+   return PC_PARSEFAIL;
+ }
+   safe_inc_pos ();
+  }
+  } while (attrname);
+
+#ifdef DEBUG
+  (*diag) ("attribute set: init = %d, set = %d, extract = %d, \
+nosoft = %d, ldvec = %d, stvec = %d, reve = %d, pred = %d, htm = %d, \
+htmspr = %d, htmcr = %d, mma = %d, quad = %d, pair = %d, no32bit = %d, \
+32bit = %d, cpu = %d, ldstmask = %d, lxvrse = %d, lxvrze = %d, \
+endian = %d.\n",
+  attrptr->isinit, attrptr->isset, attrptr->isextract,
+  attrptr->isnosoft, attrptr->isldvec, attrptr->isstvec,
+  attrptr->isreve, attrptr->ispred, attrptr->ishtm, attrptr->ishtmspr,
+  attrptr->ishtmcr, attrptr->ismma, attrptr->isquad, attrptr->ispair,
+  attrptr->isno32bit, attrptr->is32bit, attrptr->iscpu,
+  attrptr->isldstmask, attrptr->islxvrse, attrptr->islxvrze,
+  attrptr->isendian);
+#endif
+
   return PC_OK;
 }
 
-- 
2.27.0



[PATCH 10/57] rs6000: Red-black tree implementation for balanced tree search

2021-04-27 Thread Bill Schmidt via Gcc-patches
2021-03-03  Bill Schmidt  

gcc/
* config/rs6000/rbtree.c: New file.
* config/rs6000/rbtree.h: New file.
---
 gcc/config/rs6000/rbtree.c | 233 +
 gcc/config/rs6000/rbtree.h |  51 
 2 files changed, 284 insertions(+)
 create mode 100644 gcc/config/rs6000/rbtree.c
 create mode 100644 gcc/config/rs6000/rbtree.h

diff --git a/gcc/config/rs6000/rbtree.c b/gcc/config/rs6000/rbtree.c
new file mode 100644
index 000..14d5cfa3dc9
--- /dev/null
+++ b/gcc/config/rs6000/rbtree.c
@@ -0,0 +1,233 @@
+/* Partial red-black tree implementation for rs6000-gen-builtins.c.
+   Copyright (C) 2020-21 Free Software Foundation, Inc.
+   Contributed by Bill Schmidt, IBM 
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+#include 
+#include 
+#include 
+#include 
+#include "rbtree.h"
+
+/* Create a new node to be inserted into the red-black tree.  An inserted
+   node starts out red.  */
+static struct rbt_string_node *
+rbt_create_node (struct rbt_strings *t, char *str)
+{
+  struct rbt_string_node *nodeptr
+= (struct rbt_string_node *) malloc (sizeof (struct rbt_string_node));
+  nodeptr->str = str;
+  nodeptr->left = t->rbt_nil;
+  nodeptr->right = t->rbt_nil;
+  nodeptr->par = NULL;
+  nodeptr->color = RBT_RED;
+  return nodeptr;
+}
+
+/* Perform a left-rotate operation on NODE in the red-black tree.  */
+static void
+rbt_left_rotate (struct rbt_strings *t, struct rbt_string_node *node)
+{
+  struct rbt_string_node *right = node->right;
+  assert (right);
+
+  /* Turn RIGHT's left subtree into NODE's right subtree.  */
+  node->right = right->left;
+  if (right->left != t->rbt_nil)
+right->left->par = node;
+
+  /* Link NODE's parent to RIGHT.  */
+  right->par = node->par;
+
+  if (node->par == t->rbt_nil)
+t->rbt_root = right;
+  else if (node == node->par->left)
+node->par->left = right;
+  else
+node->par->right = right;
+
+  /* Put NODE on RIGHT's left.  */
+  right->left = node;
+  node->par = right;
+}
+
+/* Perform a right-rotate operation on NODE in the red-black tree.  */
+static void
+rbt_right_rotate (struct rbt_strings *t, struct rbt_string_node *node)
+{
+  struct rbt_string_node *left = node->left;
+  assert (left);
+
+  /* Turn LEFT's right subtree into NODE's left subtree.  */
+  node->left = left->right;
+  if (left->right != t->rbt_nil)
+left->right->par = node;
+
+  /* Link NODE's parent to LEFT.  */
+  left->par = node->par;
+
+  if (node->par == t->rbt_nil)
+t->rbt_root = left;
+  else if (node == node->par->right)
+node->par->right = left;
+  else
+node->par->left = left;
+
+  /* Put NODE on LEFT's right.  */
+  left->right = node;
+  node->par = left;
+}
+
+/* Insert STR into the tree, returning 1 for success and 0 if STR already
+   appears in the tree.  */
+int
+rbt_insert (struct rbt_strings *t, char *str)
+{
+  struct rbt_string_node *curr = t->rbt_root;
+  struct rbt_string_node *trail = t->rbt_nil;
+
+  while (curr != t->rbt_nil)
+{
+  trail = curr;
+  int cmp = strcmp (str, curr->str);
+  if (cmp < 0)
+   curr = curr->left;
+  else if (cmp > 0)
+   curr = curr->right;
+  else
+   return 0;
+}
+
+  struct rbt_string_node *fresh = rbt_create_node (t, str);
+  fresh->par = trail;
+
+  if (trail == t->rbt_nil)
+t->rbt_root = fresh;
+  else if (strcmp (fresh->str, trail->str) < 0)
+trail->left = fresh;
+  else
+trail->right = fresh;
+
+  fresh->left = t->rbt_nil;
+  fresh->right = t->rbt_nil;
+
+  /* FRESH has now been inserted as a red leaf.  If we have invalidated
+ one of the following preconditions, we must fix things up:
+  (a) If a node is red, both of its children are black.
+  (b) The root must be black.
+ Note that only (a) or (b) applies at any given time during the
+ process.  This algorithm works up the tree from NEW looking
+ for a red child with a red parent, and cleaning that up.  If the
+ root ends up red, it gets turned black at the end.  */
+  curr = fresh;
+  while (curr->par->color == RBT_RED)
+if (curr->par == curr->par->par->left)
+  {
+   struct rbt_string_node *uncle = curr->par->par->right;
+   if (uncle->color == RBT_RED)
+ {
+   curr->par->color = RBT_BLACK;
+   uncle->color = RBT_BLACK;
+   curr->par->par->color = RBT_RED;
+   curr = curr->par->par;
+ 

[PATCH 11/57] rs6000: Main function with stubs for parsing and output

2021-04-27 Thread Bill Schmidt via Gcc-patches
2021-03-03  Bill Schmidt  

gcc/
* config/rs6000/rs6000-gen-builtins.c (rbtree.h): New #include.
(num_bifs): New filescope variable.
(num_ovld_stanzas): Likewise.
(num_ovlds): Likewise.
(exit_codes): Add more enum values.
(parse_codes): New enum.
(bif_rbt): New filescope variable.
(ovld_rbt): Likewise.
(fntype_rbt): Likewise.
(bifo_rbt): Likewise.
(parse_bif): New stub function.
(create_bif_order): Likewise.
(parse_ovld): Likewise.
(write_header_file): Likewise.
(write_init_file): Likewise.
(write_defines_file): Likewise.
(delete_output_files): New function.
(main): Likewise.
---
 gcc/config/rs6000/rs6000-gen-builtins.c | 232 
 1 file changed, 232 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c 
b/gcc/config/rs6000/rs6000-gen-builtins.c
index 1c4bf20bf3b..3a6aea85847 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -163,6 +163,7 @@ along with GCC; see the file COPYING3.  If not see
 #include 
 #include 
 #include 
+#include "rbtree.h"
 
 /* Used as a sentinel for range constraints on integer fields.  No field can
be 32 bits wide, so this is a safe sentinel value.  */
@@ -249,11 +250,45 @@ struct typeinfo {
   int val2;
 };
 
+static int num_bifs;
+static int num_ovld_stanzas;
+static int num_ovlds;
+
 /* Exit codes for the shell.  */
 enum exit_codes {
+  EC_OK,
+  EC_BADARGS,
+  EC_NOBIF,
+  EC_NOOVLD,
+  EC_NOHDR,
+  EC_NOINIT,
+  EC_NODEFINES,
+  EC_PARSEBIF,
+  EC_PARSEOVLD,
+  EC_WRITEHDR,
+  EC_WRITEINIT,
+  EC_WRITEDEFINES,
   EC_INTERR
 };
 
+/* Return codes for parsing routines.  */
+enum parse_codes {
+  PC_OK,
+  PC_EOFILE,
+  PC_EOSTANZA,
+  PC_PARSEFAIL
+};
+
+/* The red-black trees for built-in function identifiers, built-in
+   overload identifiers, and function type descriptors.  */
+static rbt_strings bif_rbt;
+static rbt_strings ovld_rbt;
+static rbt_strings fntype_rbt;
+
+/* Another red-black tree containing a mapping from built-in function
+   identifiers to the order in which they were encountered.  */
+static rbt_strings bifo_rbt;
+
 /* Pointer to a diagnostic function.  */
 void (*diag) (const char *, ...) __attribute__ ((format (printf, 1, 2)))
   = NULL;
@@ -895,3 +930,200 @@ match_type (typeinfo *typedata, int voidok)
 
   return 1;
 }
+
+/* Parse the built-in file.  */
+static parse_codes
+parse_bif ()
+{
+  return PC_OK;
+}
+
+/* Create a mapping from function IDs in their final order to the order
+   they appear in the built-in function file.  */
+static void
+create_bif_order ()
+{
+}
+
+/* Parse the overload file.  */
+static parse_codes
+parse_ovld ()
+{
+  return PC_OK;
+}
+
+/* Write everything to the header file (rs6000-builtins.h).  */
+static int
+write_header_file ()
+{
+  return 1;
+}
+
+/* Write everything to the initialization file (rs6000-builtins.c).  */
+static int
+write_init_file ()
+{
+  return 1;
+}
+
+/* Write everything to the include file (rs6000-vecdefines.h).  */
+static int
+write_defines_file ()
+{
+  return 1;
+}
+
+/* Close and delete output files after any failure, so that subsequent
+   build dependencies will fail.  */
+static void
+delete_output_files ()
+{
+  /* Depending on whence we're called, some of these may already be
+ closed.  Don't check for errors.  */
+  fclose (header_file);
+  fclose (init_file);
+  fclose (defines_file);
+
+  unlink (header_path);
+  unlink (init_path);
+  unlink (defines_path);
+}
+
+/* Main program to convert flat files into built-in initialization code.  */
+int
+main (int argc, const char **argv)
+{
+  if (argc != 6)
+{
+  fprintf (stderr,
+  "Five arguments required: two input file and three output "
+  "files.\n");
+  exit (EC_BADARGS);
+}
+
+  pgm_path = argv[0];
+  bif_path = argv[1];
+  ovld_path = argv[2];
+  header_path = argv[3];
+  init_path = argv[4];
+  defines_path = argv[5];
+
+  bif_file = fopen (bif_path, "r");
+  if (!bif_file)
+{
+  fprintf (stderr, "Cannot find input built-in file '%s'.\n", bif_path);
+  exit (EC_NOBIF);
+}
+  ovld_file = fopen (ovld_path, "r");
+  if (!ovld_file)
+{
+  fprintf (stderr, "Cannot find input overload file '%s'.\n", ovld_path);
+  exit (EC_NOOVLD);
+}
+  header_file = fopen (header_path, "w");
+  if (!header_file)
+{
+  fprintf (stderr, "Cannot open header file '%s' for output.\n",
+  header_path);
+  exit (EC_NOHDR);
+}
+  init_file = fopen (init_path, "w");
+  if (!init_file)
+{
+  fprintf (stderr, "Cannot open init file '%s' for output.\n", init_path);
+  exit (EC_NOINIT);
+}
+  defines_file = fopen (defines_path, "w");
+  if (!defines_file)
+{
+  fprintf (stderr, "Cannot open defines file '%s' for output.\n",
+  defines_path);
+  exit (EC_NODEFINES);
+}
+
+  /* Initialize 

[PATCH 09/57] rs6000: Add functions for matching types, part 3 of 3

2021-04-27 Thread Bill Schmidt via Gcc-patches
2021-03-24  Bill Schmidt  

gcc/
* config/rs6000/rs6000-gen-builtins.c (restriction): New enum.
(typeinfo): Add restr field.
(match_const_restriction): Implement.
---
 gcc/config/rs6000/rs6000-gen-builtins.c | 136 
 1 file changed, 136 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c 
b/gcc/config/rs6000/rs6000-gen-builtins.c
index 96e74e6048a..1c4bf20bf3b 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -218,6 +218,21 @@ enum basetype {
   BT_VQUAD
 };
 
+/* Ways in which a const int value can be restricted.  RES_BITS indicates
+   that the integer is restricted to val1 bits, interpreted as an unsigned
+   number.  RES_RANGE indicates that the integer is restricted to values
+   between val1 and val2, inclusive.  RES_VAR_RANGE is like RES_RANGE, but
+   the argument may be variable, so it can only be checked if it is constant.
+   RES_VALUES indicates that the integer must have one of the values val1
+   or val2.  */
+enum restriction {
+  RES_NONE,
+  RES_BITS,
+  RES_RANGE,
+  RES_VAR_RANGE,
+  RES_VALUES
+};
+
 /* Type modifiers for an argument or return type.  */
 struct typeinfo {
   char isvoid;
@@ -229,6 +244,7 @@ struct typeinfo {
   char ispixel;
   char ispointer;
   basetype base;
+  restriction restr;
   int val1;
   int val2;
 };
@@ -477,6 +493,126 @@ match_basetype (typeinfo *typedata)
 static int
 match_const_restriction (typeinfo *typedata)
 {
+  int oldpos = pos;
+  if (linebuf[pos] == '<')
+{
+  safe_inc_pos ();
+  oldpos = pos;
+  int x = match_integer ();
+  if (x == MININT)
+   {
+ (*diag) ("malformed integer at column %d.\n", oldpos + 1);
+ return 0;
+   }
+  consume_whitespace ();
+  if (linebuf[pos] == '>')
+   {
+ typedata->restr = RES_BITS;
+ typedata->val1 = x;
+ safe_inc_pos ();
+ return 1;
+   }
+  else if (linebuf[pos] != ',')
+   {
+ (*diag) ("malformed restriction at column %d.\n", pos + 1);
+ return 0;
+   }
+  safe_inc_pos ();
+  oldpos = pos;
+  int y = match_integer ();
+  if (y == MININT)
+   {
+ (*diag) ("malformed integer at column %d.\n", oldpos + 1);
+ return 0;
+   }
+  typedata->restr = RES_RANGE;
+  typedata->val1 = x;
+  typedata->val2 = y;
+
+  consume_whitespace ();
+  if (linebuf[pos] != '>')
+   {
+ (*diag) ("malformed restriction at column %d.\n", pos + 1);
+ return 0;
+   }
+  safe_inc_pos ();
+}
+  else if (linebuf[pos] == '{')
+{
+  safe_inc_pos ();
+  oldpos = pos;
+  int x = match_integer ();
+  if (x == MININT)
+   {
+ (*diag) ("malformed integer at column %d.\n", oldpos + 1);
+ return 0;
+   }
+  consume_whitespace ();
+  if (linebuf[pos] != ',')
+   {
+ (*diag) ("missing comma at column %d.\n", pos + 1);
+ return 0;
+   }
+  safe_inc_pos ();
+  consume_whitespace ();
+  oldpos = pos;
+  int y = match_integer ();
+  if (y == MININT)
+   {
+ (*diag) ("malformed integer at column %d.\n", oldpos + 1);
+ return 0;
+   }
+  typedata->restr = RES_VALUES;
+  typedata->val1 = x;
+  typedata->val2 = y;
+
+  consume_whitespace ();
+  if (linebuf[pos] != '}')
+   {
+ (*diag) ("malformed restriction at column %d.\n", pos + 1);
+ return 0;
+   }
+  safe_inc_pos ();
+}
+  else
+{
+  assert (linebuf[pos] == '[');
+  safe_inc_pos ();
+  oldpos = pos;
+  int x = match_integer ();
+  if (x == MININT)
+   {
+ (*diag) ("malformed integer at column %d.\n", oldpos + 1);
+ return 0;
+   }
+  consume_whitespace ();
+  if (linebuf[pos] != ',')
+   {
+ (*diag) ("missing comma at column %d.\n", pos + 1);
+ return 0;
+   }
+  safe_inc_pos ();
+  consume_whitespace ();
+  oldpos = pos;
+  int y = match_integer ();
+  if (y == MININT)
+   {
+ (*diag) ("malformed integer at column %d.\n", oldpos + 1);
+ return 0;
+   }
+  typedata->restr = RES_VAR_RANGE;
+  typedata->val1 = x;
+  typedata->val2 = y;
+
+  consume_whitespace ();
+  if (linebuf[pos] != ']')
+   {
+ (*diag) ("malformed restriction at column %d.\n", pos + 1);
+ return 0;
+   }
+  safe_inc_pos ();
+}
+
   return 1;
 }
 
-- 
2.27.0



[PATCH 08/57] rs6000: Add functions for matching types, part 2 of 3

2021-04-27 Thread Bill Schmidt via Gcc-patches
2021-04-02  Bill Schmidt  

gcc/
* config/rs6000/rs6000-gen-builtins.c (match_basetype): Implement.
---
 gcc/config/rs6000/rs6000-gen-builtins.c | 64 +
 1 file changed, 64 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c 
b/gcc/config/rs6000/rs6000-gen-builtins.c
index ac061d092e7..96e74e6048a 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -393,6 +393,70 @@ handle_pointer (typeinfo *typedata)
 static int
 match_basetype (typeinfo *typedata)
 {
+  consume_whitespace ();
+  int oldpos = pos;
+  char *token = match_identifier ();
+  if (!token)
+{
+  (*diag) ("missing base type in return type at column %d\n", pos + 1);
+  return 0;
+}
+
+  if (!strcmp (token, "char"))
+typedata->base = BT_CHAR;
+  else if (!strcmp (token, "short"))
+typedata->base = BT_SHORT;
+  else if (!strcmp (token, "int"))
+typedata->base = BT_INT;
+  else if (!strcmp (token, "long"))
+{
+  consume_whitespace ();
+  oldpos = pos;
+  char *mustbelongordbl = match_identifier ();
+  if (!mustbelongordbl)
+   typedata->base = BT_LONG;
+  else if (!strcmp (mustbelongordbl, "long"))
+   typedata->base = BT_LONGLONG;
+  else if (!strcmp (mustbelongordbl, "double"))
+   typedata->base = BT_LONGDOUBLE;
+  else
+   /* Speculatively accept "long" here and push back the token.
+  This occurs when "long" is a return type and the next token
+  is the function name.  */
+   {
+ typedata->base = BT_LONG;
+ pos = oldpos;
+   }
+}
+  else if (!strcmp (token, "float"))
+typedata->base = BT_FLOAT;
+  else if (!strcmp (token, "double"))
+typedata->base = BT_DOUBLE;
+  else if (!strcmp (token, "__int128"))
+typedata->base = BT_INT128;
+  else if (!strcmp (token, "_Float128"))
+typedata->base = BT_FLOAT128;
+  else if (!strcmp (token, "bool"))
+typedata->base = BT_BOOL;
+  /* A "string" is a special "const char *" -- we need it because it
+ cannot match either signed or unsigned char *.  */
+  else if (!strcmp (token, "string"))
+typedata->base = BT_STRING;
+  else if (!strcmp (token, "_Decimal32"))
+typedata->base = BT_DECIMAL32;
+  else if (!strcmp (token, "_Decimal64"))
+typedata->base = BT_DECIMAL64;
+  else if (!strcmp (token, "_Decimal128"))
+typedata->base = BT_DECIMAL128;
+  else if (!strcmp (token, "__ibm128"))
+typedata->base = BT_IBM128;
+  else
+{
+  (*diag) ("unrecognized base type at column %d\n", oldpos + 1);
+  return 0;
+}
+
+  handle_pointer (typedata);
   return 1;
 }
 
-- 
2.27.0



[PATCH 07/57] rs6000: Add functions for matching types, part 1 of 3

2021-04-27 Thread Bill Schmidt via Gcc-patches
2021-04-02  Bill Schmidt  

gcc/
* config/rs6000/rs6000-gen-builtins.c (void_status): New enum.
(basetype): Likewise.
(typeinfo): New struct.
(handle_pointer): New function.
(match_basetype): New stub function.
(match_const_restriction): Likewise.
(match_type): New function.
---
 gcc/config/rs6000/rs6000-gen-builtins.c | 364 
 1 file changed, 364 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c 
b/gcc/config/rs6000/rs6000-gen-builtins.c
index f3e1d31c225..ac061d092e7 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -190,6 +190,49 @@ static char linebuf[LINELEN];
 static int line;
 static int pos;
 
+/* Used to determine whether a type can be void (only return types).  */
+enum void_status {
+  VOID_NOTOK,
+  VOID_OK
+};
+
+/* Legal base types for an argument or return type.  */
+enum basetype {
+  BT_CHAR,
+  BT_SHORT,
+  BT_INT,
+  BT_LONG,
+  BT_LONGLONG,
+  BT_FLOAT,
+  BT_DOUBLE,
+  BT_LONGDOUBLE,
+  BT_INT128,
+  BT_FLOAT128,
+  BT_BOOL,
+  BT_STRING,
+  BT_DECIMAL32,
+  BT_DECIMAL64,
+  BT_DECIMAL128,
+  BT_IBM128,
+  BT_VPAIR,
+  BT_VQUAD
+};
+
+/* Type modifiers for an argument or return type.  */
+struct typeinfo {
+  char isvoid;
+  char isconst;
+  char isvector;
+  char issigned;
+  char isunsigned;
+  char isbool;
+  char ispixel;
+  char ispointer;
+  basetype base;
+  int val1;
+  int val2;
+};
+
 /* Exit codes for the shell.  */
 enum exit_codes {
   EC_INTERR
@@ -331,3 +374,324 @@ match_to_right_bracket ()
   pos = lastpos + 1;
   return buf;
 }
+
+static inline void
+handle_pointer (typeinfo *typedata)
+{
+  consume_whitespace ();
+  if (linebuf[pos] == '*')
+{
+  typedata->ispointer = 1;
+  safe_inc_pos ();
+}
+}
+
+/* Match one of the allowable base types.  Consumes one token unless the
+   token is "long", which must be paired with a second "long".  Optionally
+   consumes a following '*' token for pointers.  Return 1 for success,
+   0 for failure.  */
+static int
+match_basetype (typeinfo *typedata)
+{
+  return 1;
+}
+
+/* A const int argument may be restricted to certain values.  This is
+   indicated by one of the following occurring after the "int' token:
+
+restricts the constant to x bits, interpreted as unsigned
+  restricts the constant to the inclusive range [x,y]
+ [x,y] restricts the constant to the inclusive range [x,y],
+  but only applies if the argument is constant.
+ {x,y} restricts the constant to one of two values, x or y.
+
+   Here x and y are integer tokens.  Note that the "const" token is a
+   lie when the restriction is [x,y], but this simplifies the parsing
+   significantly and is hopefully forgivable.
+
+   Return 1 for success, else 0.  */
+static int
+match_const_restriction (typeinfo *typedata)
+{
+  return 1;
+}
+
+/* Look for a type, which can be terminated by a token that is not part of
+   a type, a comma, or a closing parenthesis.  Place information about the
+   type in TYPEDATA.  Return 1 for success, 0 for failure.  */
+static int
+match_type (typeinfo *typedata, int voidok)
+{
+  /* A legal type is of the form:
+
+   [const] [[signed|unsigned]  | ] [*]
+
+ Legal values of  are (for now):
+
+   char
+   short
+   int
+   long
+   long double
+   long long
+   float
+   double
+   __int128
+   _Float128
+   bool
+   string
+   _Decimal32
+   _Decimal64
+   _Decimal128
+   __ibm128
+
+ Legal values of  are as follows, and are shorthand for
+ the associated meaning:
+
+   vsc vector signed char
+   vuc vector unsigned char
+   vbc vector bool char
+   vss vector signed short
+   vus vector unsigned short
+   vbs vector bool short
+   vsi vector signed int
+   vui vector unsigned int
+   vbi vector bool int
+   vsllvector signed long long
+   vullvector unsigned long long
+   vbllvector bool long long
+   vsq vector signed __int128
+   vuq vector unsigned __int128
+   vbq vector bool __int128
+   vp  vector pixel
+   vf  vector float
+   vd  vector double
+   v256__vector_pair
+   v512__vector_quad
+
+ For simplicity, We don't support "short int" and "long long int".
+ We don't currently support a  of "_Float16".  "signed"
+ and "unsigned" only apply to integral base types.  The optional *
+ indicates a pointer type.  */
+
+  consume_whitespace ();
+  memset (typedata, 0, sizeof(*typedata));
+  int oldpos = pos;
+
+  char *token = match_identifier ();
+  if (!token)
+return 0;
+
+  if (!strcmp (token, "const"))
+{
+  typedata->isconst = 1;
+  consume_whitespace ();
+  oldpos = pos;
+  token = match_identifier ();
+}
+
+  if (!strcmp (token, "void"))
+typedata->isvoid = 1;
+

[PATCH 06/57] rs6000: Add helper functions for parsing

2021-04-27 Thread Bill Schmidt via Gcc-patches
2021-03-03  Bill Schmidt  

gcc/
* config/rs6000/rs6000-gen-builtins.c (MININT): New defined
constant.
(exit_codes): New enum.
(consume_whitespace): New function.
(advance_line): Likewise.
(safe_inc_pos): Likewise.
(match_identifier): Likewise.
(match_integer): Likewise.
(match_to_right_bracket): Likewise.
---
 gcc/config/rs6000/rs6000-gen-builtins.c | 121 
 1 file changed, 121 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c 
b/gcc/config/rs6000/rs6000-gen-builtins.c
index 0e8b315208b..f3e1d31c225 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -164,6 +164,10 @@ along with GCC; see the file COPYING3.  If not see
 #include 
 #include 
 
+/* Used as a sentinel for range constraints on integer fields.  No field can
+   be 32 bits wide, so this is a safe sentinel value.  */
+#define MININT INT32_MIN
+
 /* Input and output file descriptors and pathnames.  */
 static FILE *bif_file;
 static FILE *ovld_file;
@@ -186,6 +190,11 @@ static char linebuf[LINELEN];
 static int line;
 static int pos;
 
+/* Exit codes for the shell.  */
+enum exit_codes {
+  EC_INTERR
+};
+
 /* Pointer to a diagnostic function.  */
 void (*diag) (const char *, ...) __attribute__ ((format (printf, 1, 2)))
   = NULL;
@@ -210,3 +219,115 @@ ovld_diag (const char * fmt, ...)
   vfprintf (stderr, fmt, args);
   va_end (args);
 }
+
+/* Pass over unprintable characters and whitespace (other than a newline,
+   which terminates the scan).  */
+static void
+consume_whitespace ()
+{
+  while (pos < LINELEN && isspace(linebuf[pos]) && linebuf[pos] != '\n')
+pos++;
+  return;
+}
+
+/* Get the next nonblank, noncomment line, returning 0 on EOF, 1 otherwise.  */
+static int
+advance_line (FILE *file)
+{
+  while (1)
+{
+  /* Read ahead one line and check for EOF.  */
+  if (!fgets (linebuf, sizeof(linebuf), file))
+   return 0;
+  line++;
+  pos = 0;
+  consume_whitespace ();
+  if (linebuf[pos] != '\n' && linebuf[pos] != ';')
+   return 1;
+}
+}
+
+static inline void
+safe_inc_pos ()
+{
+  if (pos++ >= LINELEN)
+{
+  (*diag) ("line length overrun.\n");
+  exit (EC_INTERR);
+}
+}
+
+/* Match an identifier, returning NULL on failure, else a pointer to a
+   buffer containing the identifier.  */
+static char *
+match_identifier ()
+{
+  int lastpos = pos - 1;
+  while (isalnum (linebuf[lastpos + 1]) || linebuf[lastpos + 1] == '_')
+if (++lastpos >= LINELEN - 1)
+  {
+   (*diag) ("line length overrun.\n");
+   exit (EC_INTERR);
+  }
+
+  if (lastpos < pos)
+return 0;
+
+  char *buf = (char *) malloc (lastpos - pos + 2);
+  memcpy (buf, [pos], lastpos - pos + 1);
+  buf[lastpos - pos + 1] = '\0';
+
+  pos = lastpos + 1;
+  return buf;
+}
+
+/* Match an integer and return its value, or MININT on failure.  */
+static int
+match_integer ()
+{
+  int startpos = pos;
+  if (linebuf[pos] == '-')
+safe_inc_pos ();
+
+  int lastpos = pos - 1;
+  while (isdigit (linebuf[lastpos + 1]))
+if (++lastpos >= LINELEN - 1)
+  {
+   (*diag) ("line length overrun in match_integer.\n");
+   exit (EC_INTERR);
+  }
+
+  if (lastpos < pos)
+return MININT;
+
+  pos = lastpos + 1;
+  char *buf = (char *) malloc (lastpos - startpos + 2);
+  memcpy (buf, [startpos], lastpos - startpos + 1);
+  buf[lastpos - startpos + 1] = '\0';
+
+  int x;
+  sscanf (buf, "%d", );
+  return x;
+}
+
+static const char *
+match_to_right_bracket ()
+{
+  int lastpos = pos - 1;
+  while (linebuf[lastpos + 1] != ']')
+if (++lastpos >= LINELEN - 1)
+  {
+   (*diag) ("line length overrun.\n");
+   exit (EC_INTERR);
+  }
+
+  if (lastpos < pos)
+return 0;
+
+  char *buf = (char *) malloc (lastpos - pos + 2);
+  memcpy (buf, [pos], lastpos - pos + 1);
+  buf[lastpos - pos + 1] = '\0';
+
+  pos = lastpos + 1;
+  return buf;
+}
-- 
2.27.0



[PATCH 03/57] rs6000: Initial create of rs6000-gen-builtins.c

2021-04-27 Thread Bill Schmidt via Gcc-patches
2021-04-02  Bill Schmidt  

gcc/
* config/rs6000/rs6000-gen-builtins.c: New.
---
 gcc/config/rs6000/rs6000-gen-builtins.c | 165 
 1 file changed, 165 insertions(+)
 create mode 100644 gcc/config/rs6000/rs6000-gen-builtins.c

diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c 
b/gcc/config/rs6000/rs6000-gen-builtins.c
new file mode 100644
index 000..0afbff8e3ab
--- /dev/null
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -0,0 +1,165 @@
+/* Generate built-in function initialization and recognition for Power.
+   Copyright (C) 2020-21 Free Software Foundation, Inc.
+   Contributed by Bill Schmidt, IBM 
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+/* This program generates built-in function initialization and
+   recognition code for Power targets, based on text files that
+   describe the built-in functions and vector overloads:
+
+ rs6000-builtin-new.def Table of built-in functions
+ rs6000-overload.defTable of overload functions
+
+   Both files group similar functions together in "stanzas," as
+   described below.
+
+   Each stanza in the built-in function file starts with a line
+   identifying the circumstances in which the group of functions is
+   permitted, with the gating predicate in square brackets.  For
+   example, this could be
+
+ [altivec]
+
+   or it could be
+
+ [power9]
+
+   The bracketed gating predicate is the only information allowed on
+   the stanza header line, other than whitespace.
+
+   Following the stanza header are two lines for each function: the
+   prototype line and the attributes line.  The prototype line has
+   this format, where the square brackets indicate optional
+   information and angle brackets indicate required information:
+
+ [kind]   ();
+
+   Here [kind] can be one of "const", "pure", or "fpmath";
+is a legal type for a built-in function result;
+is the name by which the function can be called;
+   and  is a comma-separated list of legal types
+   for built-in function arguments.  The argument list may be
+   empty, but the parentheses and semicolon are required.
+
+   The attributes line looks like this:
+
+   {}
+
+   Here  is a unique internal identifier for the built-in
+   function that will be used as part of an enumeration of all
+   built-in functions;  is the define_expand or
+   define_insn that will be invoked when the call is expanded;
+   and  is a comma-separated list of special
+   conditions that apply to the built-in function.  The attribute
+   list may be empty, but the braces are required.
+
+   Attributes are strings, such as these:
+
+ init Process as a vec_init function
+ set  Process as a vec_set function
+ extract  Process as a vec_extract function
+ nosoft   Not valid with -msoft-float
+ ldvecNeeds special handling for vec_ld semantics
+ stvecNeeds special handling for vec_st semantics
+ reve Needs special handling for element reversal
+ pred Needs special handling for comparison predicates
+ htm  Needs special handling for transactional memory
+ htmspr   HTM function using an SPR
+ htmcrHTM function using a CR
+ mma  Needs special handling for MMA instructions
+ quad MMA instruction using a register quad as an input operand
+ pair MMA instruction using a register pair as an input operand
+ no32bit  Not valid for TARGET_32BIT
+ 32bitRequires different handling for TARGET_32BIT
+ cpu  This is a "cpu_is" or "cpu_supports" builtin
+ ldstmask Altivec mask for load or store
+ lxvrse   Needs special handling for load-rightmost, sign-extended
+ lxvrze   Needs special handling for load-rightmost, zero-extended
+ endian   Needs special handling for endianness
+
+   An example stanza might look like this:
+
+[altivec]
+  const vsc __builtin_altivec_abs_v16qi (vsc);
+ABS_V16QI absv16qi2 {}
+  const vss __builtin_altivec_abs_v8hi (vss);
+ABS_V8HI absv8hi2 {}
+
+   Here "vsc" and "vss" are shorthand for "vector signed char" and
+   "vector signed short" to shorten line lengths and improve readability.
+   Note the use of indentation, which is recommended but not required.
+
+   The overload file has more complex stanza headers.  Here the stanza
+   represents all functions with the same overloaded function name:
+
+ [, , [[, ]] 

[PATCH 04/57] rs6000: Add initial input files

2021-04-27 Thread Bill Schmidt via Gcc-patches
This patch adds a tiny subset of the built-in and overload descriptions.

2021-04-02  Bill Schmidt  

gcc/
* config/rs6000/rs6000-builtin-new.def: New.
* config/rs6000/rs6000-overload.def: New.
---
 gcc/config/rs6000/rs6000-builtin-new.def | 199 +++
 gcc/config/rs6000/rs6000-overload.def|  82 ++
 2 files changed, 281 insertions(+)
 create mode 100644 gcc/config/rs6000/rs6000-builtin-new.def
 create mode 100644 gcc/config/rs6000/rs6000-overload.def

diff --git a/gcc/config/rs6000/rs6000-builtin-new.def 
b/gcc/config/rs6000/rs6000-builtin-new.def
new file mode 100644
index 000..a84a3def2d5
--- /dev/null
+++ b/gcc/config/rs6000/rs6000-builtin-new.def
@@ -0,0 +1,199 @@
+; Built-in functions for PowerPC.
+; Copyright (C) 2020-21 Free Software Foundation, Inc.
+; Contributed by Bill Schmidt, IBM 
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; .
+
+
+; Built-in functions in this file are organized into "stanzas", where
+; all built-ins in a given stanza are enabled together.  Each stanza
+; starts with a line identifying the circumstances in which the group of
+; functions is permitted, with the gating predicate in square brackets.
+; For example, this could be
+;
+; [altivec]
+;
+;   or it could be
+;
+; [power9]
+;
+; The bracketed gating predicate is the only information allowed on
+; the stanza header line, other than whitespace.
+;
+; Following the stanza header are two lines for each function: the
+; prototype line and the attributes line.  The prototype line has
+; this format, where the square brackets indicate optional
+; information and angle brackets indicate required information:
+;
+;   [kind]   ();
+;
+; Here [kind] can be one of "const", "pure", or "fpmath";
+;  is a legal type for a built-in function result;
+;  is the name by which the function can be called;
+; and  is a comma-separated list of legal types
+; for built-in function arguments.  The argument list may be
+; empty, but the parentheses and semicolon are required.
+;
+; A legal type is of the form:
+;
+;   [const] [[signed|unsigned]  | ] [*]
+;
+; where "const" applies only to a  of "int".  Legal values
+; of  are (for now):
+;
+;   char
+;   short
+;   int
+;   long
+;   long double
+;   long long
+;   float
+;   double
+;   __int128
+;   _Float128
+;   bool
+;   string
+;   _Decimal32
+;   _Decimal64
+;   _Decimal128
+;   __ibm128
+;
+; Legal values of  are as follows, and are shorthand for
+; the associated meaning:
+;
+;   vscvector signed char
+;   vucvector unsigned char
+;   vbcvector bool char
+;   vssvector signed short
+;   vusvector unsigned short
+;   vbsvector bool short
+;   vsivector signed int
+;   vuivector unsigned int
+;   vbivector bool int
+;   vsll   vector signed long long
+;   vull   vector unsigned long long
+;   vbll   vector bool long long
+;   vsqvector signed __int128
+;   vuqvector unsigned __int128
+;   vbqvector bool __int128
+;   vp vector pixel
+;   vf vector float
+;   vd vector double
+;   v256   __vector_pair
+;   v512   __vector_quad
+;
+; For simplicity, We don't support "short int" and "long long int".
+; We don't currently support a  of "_Float16".  "signed"
+; and "unsigned" only apply to integral base types.  The optional *
+; indicates a pointer type.
+;
+; The attributes line looks like this:
+;
+; {}
+;
+; Here  is a unique internal identifier for the built-in
+; function that will be used as part of an enumeration of all
+; built-in functions;  is the define_expand or
+; define_insn that will be invoked when the call is expanded;
+; and  is a comma-separated list of special
+; conditions that apply to the built-in function.  The attribute
+; list may be empty, but the braces are required.
+;
+; Attributes are strings, and the allowed ones are listed below.
+;
+;   init Process as a vec_init function
+;   set  Process as a vec_set function
+;   extract  Process as a vec_extract function
+;   nosoft   Not valid with -msoft-float
+;   ldvecNeeds special handling for vec_ld semantics
+;   stvecNeeds special handling for vec_st semantics
+;   reve Needs special handling 

[PATCH 01/57] Allow targets to specify build dependencies for out_object_file

2021-04-27 Thread Bill Schmidt via Gcc-patches
2021-03-03  Bill Schmidt  

gcc/
* Makefile.in (OUT_FILE_DEPS): New variable.
(out_object_file): Depend on OUT_FILE_DEPS.
---
 gcc/Makefile.in | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 8a5fb3fd99c..2fd94fc7dba 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -558,6 +558,7 @@ TM_MULTILIB_CONFIG=@TM_MULTILIB_CONFIG@
 TM_MULTILIB_EXCEPTIONS_CONFIG=@TM_MULTILIB_EXCEPTIONS_CONFIG@
 out_file=$(srcdir)/config/@out_file@
 out_object_file=@out_object_file@
+OUT_FILE_DEPS=
 common_out_file=$(srcdir)/common/config/@common_out_file@
 common_out_object_file=@common_out_object_file@
 md_file=$(srcdir)/common.md $(srcdir)/config/@md_file@
@@ -2361,7 +2362,7 @@ pass-instances.def: $(srcdir)/passes.def $(PASSES_EXTRA) \
$(AWK) -f $(srcdir)/gen-pass-instances.awk \
  $(srcdir)/passes.def $(PASSES_EXTRA) > pass-instances.def
 
-$(out_object_file): $(out_file)
+$(out_object_file): $(out_file) $(OUT_FILE_DEPS)
$(COMPILE) $<
$(POSTCOMPILE)
 
-- 
2.27.0



[PATCH 00/57] Replace the Power target-specific built-in machinery

2021-04-27 Thread Bill Schmidt via Gcc-patches
The design of the target-specific built-in function support in the
Power back end has not stood the test of time.  The machinery is
grossly inefficient, confusing, and arcane; and adding new built-in
functions is inefficient and error-prone.  This patch set introduces a
replacement. 

Because of the scope of the changes, it's important to be able to
verify that the new system makes only intended changes to the
functions that are supported.  Therefore this patch set adds a new
mechanism, and (in the final patch) enables it instead of the existing
support, but does not yet remove the old support.  That will happen in
a follow-up patch once we're comfortable with the new system.

Most of the patches in this set are specific to the rs6000 back end.
However, the first two patches make changes in common code and require
review from the appropriate maintainers.  Jakub and Jeff, I would
appreciate it if you could look at these two small patches.

After these changes are upstream, adding new built-in functions will
usually be as simple as adding two lines to a file,
rs6000-builtin-new.def, that give the prototype of the function and a
little additional information.  Adding new overloaded functions will
require adding a new section to another file, rs6000-overload.def,
with one line describing the overload information, and two lines for
each function to be dispatched to from the overloaded function.

The patches are divided into the following sections.

Patches 0001-0002: Common code patches

  Patch 0001 adds a mechanism to the Makefile to allow specifying
  additional dependencies for "out_object_file", which is rs6000.o for
  the rs6000 back end.  I found this necessary to be able to have
  rs6000.o depend on a header file generated during the build.

  Patch 0002 expands the gengtype machinery to scan header files
  created during the build for GC roots.

Patches 0003, 0005-0023: Generator program

  A new program, rs6000-gen-builtins, is created and executed during
  the build.  It reads rs6000-builtin-new.def and rs6000-overload.def
  and produces three output files:  rs6000-builtins.h,
  rs6000-builtins.c, and rs6000-vecdefines.h.  rs6000-builtins.h
  defines the data structures representing the built-in functions,
  overloaded functions, overload instantiations, and function type
  specifiers.  rs6000-builtins.c contains static initializers for the
  data structures, as well as the function rs6000_autoinit_builtins
  that performs additional run-time initialization.
  rs6000-vecdefines.h contains a set of #defines that map external
  identifiers such as vec_add to their internal builtin names, such as
  __builtin_vec_add.  This replaces most of the similar #defines
  previously contained in altivec.h, which now #includes the new file
  instead.

  This set of patches adds the source for the generator program.

Patches 0024-0025: Target build machinery

  These patches make changes to config.gcc and t-rs6000 to build and
  run the new generator program, and to ensure that the garbage
  collection roots in rs6000-builtins.h are scanned by gengtype.

Patches 0004, 0026-0031, 0033-0037: Input files

  These patches build up the input files to the generator program,
  listing all of the built-in functions and overloads to be
  processed.

Patch 0032: Add pointer types

  This patch creates and caches a bunch of pointer type nodes.  The
  existing built-in machinery, for some reason, only created base
  types up front and created the pointer types on demand (over and
  over and over again).  The new mechanism needs all the type nodes
  available, so we add them here.

Patch 0038: Call rs6000_autoinit_builtins

Patch 0039: A little special handling for Darwin

Patches 0040-0041: Miscellaneous support patches

Patch 0042: Rewrite the overload processing

  Most of this code remains largely the same as before, with the same
  special handling for a few interesting built-in functions.  But the
  general handling of overloaded functions is now much more efficient
  since the new data structures are designed for quick lookup, whereas
  the old machinery does a brutal linear search.

Patch 0043: Rewrite gimple folding

  The "rewrite" here consists entirely of changing the names of the
  builtins to be processed, since we need a separate enumeration of
  builtins for the new machinery.

Patch 0044: Vectorization support

  Small updates to the functions used for mapping built-ins to their
  vectorized counterparts.

Patches 0045-0050: Rewrite built-in function expansion

  This is where most of the meat comes in.  Lookup of built-ins at
  expand time is again much more efficient, replacing the old
  mechanism of multiple linear searches over the whole built-in
  table.  Another major change is that all built-in functions are
  always defined, but a test at expand time is used to determine
  whether they are enabled.  This allows proper handling of
  built-ins in the presence of "#pragma target" directives.  Also,
  

[PATCH 02/57] Support scanning of build-time GC roots in gengtype

2021-04-27 Thread Bill Schmidt via Gcc-patches
Currently gengtype supports scanning target-specific files for GC roots,
but those files must exist in the source tree.  This patch extends the
support to include header files generated into the build directory.  It
also allows targets to specify build dependencies for s-gtype to ensure
the built headers are up to date prior to running gengtype.

2021-04-02  Bill Schmidt  

gcc/
* Makefile.in (EXTRA_GTYPE_DEPS): New variable.
(s-gtype): Depend on EXTRA_GTYPE_DEPS.
* gengtype-state.c (state_writer::write_state_files_list): Add a
parameter to the fileslist expression for the number of build
headers to scan.
(read_state_file_list): Detect build headers and strip the initial
"./" from their names.
* gengtype.c (build_headers): New global variable.
(num_build_headers): Likewise.
(open_base_files): Emit #include for each build header.
(main): Detect and count build headers.
* gengtype.h (build_headers): New extern variable.
(num_build_headers): Likewise.
---
 gcc/Makefile.in  |  5 +++--
 gcc/gengtype-state.c | 29 +++--
 gcc/gengtype.c   | 19 ---
 gcc/gengtype.h   |  5 +
 4 files changed, 47 insertions(+), 11 deletions(-)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 2fd94fc7dba..1a253256042 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -561,6 +561,7 @@ out_object_file=@out_object_file@
 OUT_FILE_DEPS=
 common_out_file=$(srcdir)/common/config/@common_out_file@
 common_out_object_file=@common_out_object_file@
+EXTRA_GTYPE_DEPS=
 md_file=$(srcdir)/common.md $(srcdir)/config/@md_file@
 tm_file_list=@tm_file_list@
 tm_include_list=@tm_include_list@
@@ -2740,8 +2741,8 @@ s-gtyp-input: Makefile
$(SHELL) $(srcdir)/../move-if-change tmp-gi.list gtyp-input.list
$(STAMP) s-gtyp-input
 
-s-gtype: build/gengtype$(build_exeext) $(filter-out [%], $(GTFILES)) \
-gtyp-input.list
+s-gtype: $(EXTRA_GTYPE_DEPS) build/gengtype$(build_exeext) \
+   $(filter-out [%], $(GTFILES)) gtyp-input.list
 # First, parse all files and save a state file.
$(RUN_GEN) build/gengtype$(build_exeext) $(GENGTYPE_FLAGS) \
 -S $(srcdir) -I gtyp-input.list -w tmp-gtype.state
diff --git a/gcc/gengtype-state.c b/gcc/gengtype-state.c
index 891f2e18a61..be3549dce33 100644
--- a/gcc/gengtype-state.c
+++ b/gcc/gengtype-state.c
@@ -1269,7 +1269,7 @@ state_writer::write_state_files_list (void)
   int i = 0;
   /* Write the list of files with their lang_bitmap.  */
   begin_s_expr ("fileslist");
-  fprintf (state_file, "%d", (int) num_gt_files);
+  fprintf (state_file, "%d %d", (int) num_gt_files, (int) num_build_headers);
   for (i = 0; i < (int) num_gt_files; i++)
 {
   const char *cursrcrelpath = NULL;
@@ -2456,16 +2456,20 @@ read_state_files_list (void)
   struct state_token_st *t0 = peek_state_token (0);
   struct state_token_st *t1 = peek_state_token (1);
   struct state_token_st *t2 = peek_state_token (2);
+  struct state_token_st *t3 = peek_state_token (3);
 
   if (state_token_kind (t0) == STOK_LEFTPAR
   && state_token_is_name (t1, "!fileslist")
-  && state_token_kind (t2) == STOK_INTEGER)
+  && state_token_kind (t2) == STOK_INTEGER
+  && state_token_kind (t3) == STOK_INTEGER)
 {
-  int i = 0;
+  int i = 0, j = 0;
   num_gt_files = t2->stok_un.stok_num;
-  next_state_tokens (3);
-  t0 = t1 = t2 = NULL;
+  num_build_headers = t3->stok_un.stok_num;
+  next_state_tokens (4);
+  t0 = t1 = t2 = t3 = NULL;
   gt_files = XCNEWVEC (const input_file *, num_gt_files);
+  build_headers = XCNEWVEC (const char *, num_build_headers);
   for (i = 0; i < (int) num_gt_files; i++)
{
  bool issrcfile = FALSE;
@@ -2498,7 +2502,20 @@ read_state_files_list (void)
  free (fullpath);
}
  else
-   curgt = input_file_by_name (fnam);
+   {
+ curgt = input_file_by_name (fnam);
+ /* Look for a header file created during the build,
+which looks like "./.h".  */
+ int len = strlen (fnam);
+ if (len >= 5 && fnam[0] == '.' && fnam[1] == '/'
+ && fnam[len-2] == '.' && fnam[len-1] == 'h')
+   {
+ char *buf = (char *) xmalloc (len - 1);
+ /* Strip the leading "./" from the filename.  */
+ strcpy (buf, [2]);
+ build_headers[j++] = buf;
+   }
+   }
  set_lang_bitmap (curgt, bmap);
  gt_files[i] = curgt;
  next_state_tokens (2);
diff --git a/gcc/gengtype.c b/gcc/gengtype.c
index 98d4626f87e..57dc6e9fbe8 100644
--- a/gcc/gengtype.c
+++ b/gcc/gengtype.c
@@ -143,6 +143,11 

Re: [PATCH,rs6000] Test cases for p10 fusion patterns

2021-04-26 Thread will schmidt via Gcc-patches
On Mon, 2021-04-26 at 14:00 -0500, acsaw...@linux.ibm.com wrote:
> From: Aaron Sawdey 
> 
> This adds some test cases to make sure that the combine patterns for p10
> fusion are working.
> 
> OK for trunk?
> 
> gcc/testsuite/ChangeLog:
>   * gcc.target/powerpc/fusion-p10-ldcmpi.c: New file.
>   * gcc.target/powerpc/fusion-p10-2logical.c: New file.
> ---
>  .../gcc.target/powerpc/fusion-p10-2logical.c  | 205 ++
>  .../gcc.target/powerpc/fusion-p10-ldcmpi.c|  66 ++
>  2 files changed, 271 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/fusion-p10-2logical.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/fusion-p10-ldcmpi.c

ok


> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/fusion-p10-2logical.c 
> b/gcc/testsuite/gcc.target/powerpc/fusion-p10-2logical.c
> new file mode 100644
> index 000..9a205373505
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/fusion-p10-2logical.c
> @@ -0,0 +1,205 @@
> +/* { dg-do compile { target { powerpc*-*-* } } } */
> +/* { dg-skip-if "" { powerpc*-*-darwin* } } */
> +/* { dg-options "-mdejagnu-cpu=power10 -O3 -dp" } */
> +
> +#include 
> +#include 
> +
> +/* and/andc/eqv/nand/nor/or/orc/xor */
> +#define AND(a,b) ((a)&(b))
> +#define ANDC1(a,b) ((a)&((~b)))
> +#define ANDC2(a,b) ((~(a))&(b))
> +#define EQV(a,b) (~((a)^(b)))
> +#define NAND(a,b) (~((a)&(b)))
> +#define NOR(a,b) (~((a)|(b)))
> +#define OR(a,b) ((a)|(b))
> +#define ORC1(a,b) ((a)|((~b)))
> +#define ORC2(a,b) ((~(a))|(b))
> +#define XOR(a,b) ((a)^(b))
> +#define TEST1(type, func)
> \
> +  type func ## _and_T_ ## type (type a, type b, type c) { return 
> AND(func(a,b),c); } \
> +  type func ## _andc1_T_   ## type (type a, type b, type c) { return 
> ANDC1(func(a,b),c); } \
> +  type func ## _andc2_T_   ## type (type a, type b, type c) { return 
> ANDC2(func(a,b),c); } \
> +  type func ## _eqv_T_ ## type (type a, type b, type c) { return 
> EQV(func(a,b),c); } \
> +  type func ## _nand_T_## type (type a, type b, type c) { return 
> NAND(func(a,b),c); } \
> +  type func ## _nor_T_ ## type (type a, type b, type c) { return 
> NOR(func(a,b),c); } \
> +  type func ## _or_T_  ## type (type a, type b, type c) { return 
> OR(func(a,b),c); } \
> +  type func ## _orc1_T_## type (type a, type b, type c) { return 
> ORC1(func(a,b),c); } \
> +  type func ## _orc2_T_## type (type a, type b, type c) { return 
> ORC2(func(a,b),c); } \
> +  type func ## _xor_T_ ## type (type a, type b, type c) { return 
> XOR(func(a,b),c); } \
> +  type func ## _rev_and_T_ ## type (type a, type b, type c) { return 
> AND(c,func(a,b)); } \
> +  type func ## _rev_andc1_T_   ## type (type a, type b, type c) { return 
> ANDC1(c,func(a,b)); } \
> +  type func ## _rev_andc2_T_   ## type (type a, type b, type c) { return 
> ANDC2(c,func(a,b)); } \
> +  type func ## _rev_eqv_T_ ## type (type a, type b, type c) { return 
> EQV(c,func(a,b)); } \
> +  type func ## _rev_nand_T_## type (type a, type b, type c) { return 
> NAND(c,func(a,b)); } \
> +  type func ## _rev_nor_T_ ## type (type a, type b, type c) { return 
> NOR(c,func(a,b)); } \
> +  type func ## _rev_or_T_  ## type (type a, type b, type c) { return 
> OR(c,func(a,b)); } \
> +  type func ## _rev_orc1_T_## type (type a, type b, type c) { return 
> ORC1(c,func(a,b)); } \
> +  type func ## _rev_orc2_T_## type (type a, type b, type c) { return 
> ORC2(c,func(a,b)); } \
> +  type func ## _rev_xor_T_ ## type (type a, type b, type c) { return 
> XOR(c,func(a,b)); }
> +#define TEST(type)\
> +  TEST1(type,AND) \
> +  TEST1(type,ANDC1)   \
> +  TEST1(type,ANDC2)   \
> +  TEST1(type,EQV) \
> +  TEST1(type,NAND)\
> +  TEST1(type,NOR) \
> +  TEST1(type,OR)  \
> +  TEST1(type,ORC1)\
> +  TEST1(type,ORC2)\
> +  TEST1(type,XOR)
> +
> +typedef vector bool char vboolchar_t;
> +typedef vector unsigned int vuint_t;
> +
> +TEST(uint8_t);
> +TEST(int8_t);
> +TEST(uint16_t);
> +TEST(int16_t);
> +TEST(uint32_t);
> +TEST(int32_t);
> +TEST(uint64_t);
> +TEST(int64_t);
> +TEST(vboolchar_t);
> +TEST(vuint_t);

ok

> +
> +/* Recreate with:
> +   grep ' \*fuse_' fusion-p10-2logical.s|sed -e 's,^.*\*,,' |sort -k 7,7 
> |uniq -c|awk '{l=30-length($2); printf("/%s* { %s { scan-assembler-times 
> \"%s\"%-*s%4d } } *%s/\n","","dg-final",$2,l,"",$1,"");}'
> + */

nice, ok.


> +  
> +/* { dg-final { scan-assembler-times "fuse_and_and/1"
>   16 } } */
> +/* { dg-final { scan-assembler-times "fuse_and_and/2"
>   16 } } */
> +/* { dg-final { scan-assembler-times "fuse_andc_and/0"   
>   16 } } */
> +/* { dg-final { scan-assembler-times "fuse_andc_and/1"   
>   26 } } */





> diff --git a/gcc/testsuite/gcc.target/powerpc/fusion-p10-ldcmpi.c 
> b/gcc/testsuite/gcc.target/powerpc/fusion-p10-ldcmpi.c
> new file mode 

Re: [PATCH,rs6000] Add insn types for fusion pairs

2021-04-26 Thread will schmidt via Gcc-patches
On Mon, 2021-04-26 at 13:04 -0500, acsaw...@linux.ibm.com wrote:
> From: Aaron Sawdey 
> 
> This adds new values for insn attr type for p10 fusion. The
> genfusion.pl
> script is modified to use them, and fusion.md regenerated to capture
> the new patterns. There are also some formatting only changes to
> fusion.md that apparently weren't captured after a previous commit
> of genfusion.pl.
> 
> If bootstrap/regtest passes, OK for trunk and backport to 11.2?
> 
> Thanks,
> Aaron
> 
Hi,


> gcc/
>   * rs6000.md (define_attr "type"): Add types for fusion.
>   * genfusion.md (gen_ld_cmpi_p10): Use new fusion types.
>   (gen_2logical): Use new fusion types.
>   * fusion.md: Regenerate.

Should the new types be listed here?


> ---
>  gcc/config/rs6000/fusion.md| 288 ---
> --
>  gcc/config/rs6000/genfusion.pl |   8 +-
>  gcc/config/rs6000/rs6000.md|   4 +-
>  3 files changed, 152 insertions(+), 148 deletions(-)
> 


> diff --git a/gcc/config/rs6000/rs6000.md
> b/gcc/config/rs6000/rs6000.md
> index c8cdc42533c..801e1014267 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -204,7 +204,9 @@ (define_attr "type"
> vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,
> vecfloat,vecfdiv,vecdouble,mtvsr,mfvsr,crypto,
> veclogical,veccmpfx,vecexts,vecmove,
> -   htm,htmsimple,dfp,mma"
> +   htm,htmsimple,dfp,mma,
> +   fused_arith_logical,fused_cmp_isel,fused_carry,fused_load_cmpi,
> +   fused_load_load,fused_store_store,fused_addis_load,fused_mtbc,fus
> ed_vector"


A few of these new attributes appear unused in this patch.  
Should those be dropped, or will they be used later? 


> diff --git a/gcc/config/rs6000/fusion.md
> b/gcc/config/rs6000/fusion.md
> index 56478fcae1d..6d71bc2df73 100644
> --- a/gcc/config/rs6000/fusion.md
> +++ b/gcc/config/rs6000/fusion.md



> @@ -1503,7 +1503,7 @@ (define_insn "*fuse_xor_xor"
> xor %3,%1,%0\;xor %3,%3,%2
> xor %3,%1,%0\;xor %3,%3,%2
> xor %4,%1,%0\;xor %3,%4,%2"
> -  [(set_attr "type" "logical")
> +  [(set_attr "type" "fused_arith_logical")
> (set_attr "cost" "6")
> (set_attr "length" "8")])
> 
> @@ -1521,7 +1521,7 @@ (define_insn "*fuse_vand_vand"
> vand %3,%1,%0\;vand %3,%3,%2
> vand %3,%1,%0\;vand %3,%3,%2
> vand %4,%1,%0\;vand %3,%4,%2"
> -  [(set_attr "type" "logical")
> +  [(set_attr "type" "fused_vector")
> (set_attr "cost" "6")
> (set_attr "length" "8")])


Noting that the vector-logicals are lumped into the fused_vector versus
logical..   I assume this is correct.  :-)




diff --git a/gcc/config/rs6000/genfusion.pl
> b/gcc/config/rs6000/genfusion.pl
> index c86c7436a62..ce48fd94f95 100755
> --- a/gcc/config/rs6000/genfusion.pl
> +++ b/gcc/config/rs6000/genfusion.pl
> @@ -135,7 +135,7 @@ sub gen_ld_cmpi_p10
> print "   (set (match_dup 2)\n";
> print "(compare:${ccmode} (match_dup 0) (match_dup
> 3)))]\n";
> print "  \"\"\n";
> -   print "  [(set_attr \"type\" \"load\")\n";
> +   print "  [(set_attr \"type\" \"fused_load_cmpi\")\n";
> print "   (set_attr \"cost\" \"8\")\n";
> print "   (set_attr \"length\" \"8\")])\n";
> print "\n";
> @@ -159,18 +159,20 @@ sub gen_2logical
>  my ($kind, $vchr, $mode, $pred, $constraint, $cr, $outer,
> $outer_op,
>   $outer_comp, $outer_inv, $outer_rtl, $inner, $inner_comp,
> $inner_inv,
>   $inner_rtl, $inner_op, $both_commute, $c4, $bc, $inner_arg0,
> - $inner_arg1, $inner_exp, $outer_arg2, $outer_exp, $insn);
> + $inner_arg1, $inner_exp, $outer_arg2, $outer_exp, $insn,
> $fuse_type);
>KIND: foreach $kind ('scalar','vector') {
>if ( $kind eq 'vector' ) {
> $vchr = "v";
> $mode = "VM";
> $pred = "altivec_register_operand";
> $constraint = "v";
> +   $fuse_type = "fused_vector";
>} else {
> $vchr = "";
> $mode = "GPR";
> $pred = "gpc_reg_operand";
> $constraint = "r";
> +   $fuse_type = "fused_arith_logical";
>}
>$c4 =
> "${constraint},${constraint},${constraint},${constraint}";
>  OUTER: foreach $outer ( @logicals ) {
> @@ -227,7 +229,7 @@ sub gen_2logical
> ${inner_op} %3,%1,%0\\;${outer_op} %3,%3,%2
> ${inner_op} %3,%1,%0\\;${outer_op} %3,%3,%2
> ${inner_op} %4,%1,%0\\;${outer_op} %3,%4,%2"
> -  [(set_attr "type" "logical")
> +  [(set_attr "type" "$fuse_type")
> (set_attr "cost" "6")
> (set_attr "length" "8")])


ok

thanks
-Will

>  EOF
> 



Re: [PATCH 4/4] rs6000: Add ROP tests

2021-04-26 Thread Bill Schmidt via Gcc-patches

On 4/26/21 11:04 AM, will schmidt wrote:

On Sun, 2021-04-25 at 20:50 -0500, Bill Schmidt via Gcc-patches wrote:

2021-03-25  Bill Schmidt  

gcc/testsuite/
* gcc.target/powerpc/rop-1.c: New.
* gcc.target/powerpc/rop-2.c: New.
* gcc.target/powerpc/rop-3.c: New.
* gcc.target/powerpc/rop-4.c: New.
* gcc.target/powerpc/rop-5.c: New.

ok


---
  gcc/testsuite/gcc.target/powerpc/rop-1.c | 16 
  gcc/testsuite/gcc.target/powerpc/rop-2.c | 16 
  gcc/testsuite/gcc.target/powerpc/rop-3.c | 19 +++
  gcc/testsuite/gcc.target/powerpc/rop-4.c | 14 ++
  gcc/testsuite/gcc.target/powerpc/rop-5.c | 17 +
  5 files changed, 82 insertions(+)
  create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-1.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-2.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-3.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-4.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-5.c

diff --git a/gcc/testsuite/gcc.target/powerpc/rop-1.c 
b/gcc/testsuite/gcc.target/powerpc/rop-1.c
new file mode 100644
index 000..cf8e2b01dda
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/rop-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect" } */
+
+/* Verify that ROP-protect instructions are inserted when a
+   call is present.  */
+
+extern void foo (void);
+
+int bar ()
+{
+  foo ();
+  return 5;
+}
+
+/* { dg-final { scan-assembler {\mhashst\M} } } */
+/* { dg-final { scan-assembler {\mhashchk\M} } } */

ok



diff --git a/gcc/testsuite/gcc.target/powerpc/rop-2.c 
b/gcc/testsuite/gcc.target/powerpc/rop-2.c
new file mode 100644
index 000..dde403b0ef5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/rop-2.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect -mprivileged" } */
+
+/* Verify that privileged ROP-protect instructions are inserted when a
+   call is present.  */
+
+extern void foo (void);
+
+int bar ()
+{
+  foo ();
+  return 5;
+}
+
+/* { dg-final { scan-assembler {\mhashstp\M} } } */
+/* { dg-final { scan-assembler {\mhashchkp\M} } } */

ok



diff --git a/gcc/testsuite/gcc.target/powerpc/rop-3.c 
b/gcc/testsuite/gcc.target/powerpc/rop-3.c
new file mode 100644
index 000..054f94fda99
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/rop-3.c
@@ -0,0 +1,19 @@
+/* { dg-do run { target { power10_hw } } } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-require-effective-target powerpc_elfv2 } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect" } */
+
+/* Verify that ROP-protect instructions execute correctly when a
+   call is present.  */
+
+void __attribute__((noinline)) foo ()
+{
+  asm ("");
+}
+
+int main ()
+{
+  foo ();
+  return 0;
+}
+

ok



diff --git a/gcc/testsuite/gcc.target/powerpc/rop-4.c 
b/gcc/testsuite/gcc.target/powerpc/rop-4.c
new file mode 100644
index 000..e2be8b2c035
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/rop-4.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect" } */
+
+/* Verify that no ROP-protect instructions are inserted when no
+   call is present.  */
+
+
+int bar ()
+{
+  return 5;
+}
+
+/* { dg-final { scan-assembler-not {\mhashst\M} } } */
+/* { dg-final { scan-assembler-not {\mhashchk\M} } } */


ok


diff --git a/gcc/testsuite/gcc.target/powerpc/rop-5.c 
b/gcc/testsuite/gcc.target/powerpc/rop-5.c
new file mode 100644
index 000..b759fa59979
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/rop-5.c
@@ -0,0 +1,17 @@
+/* { dg-do run { target { power10_hw } } } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-require-effective-target powerpc_elfv2 } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect" } */
+
+/* Verify that __ROP_PROTECT__ is predefined for -mrop-protect.  */
+
+extern void abort (void);
+
+int main ()
+{
+#ifndef __ROP_PROTECT__
+  abort ();
+#endif
+  return 0;
+}
+

ok.


Does there need to be another test to verify if -mrop-protect is on by
default without specifying -mrop-protect?   (or is it?)  Question on
0/4.


It's off by default (see the Init(0) in patch 1/4).

Bill


with that noted,
lgtm,
thanks,
-will





Re: [PATCH 0/4] [rs6000] ROP support

2021-04-26 Thread Bill Schmidt via Gcc-patches

On 4/26/21 11:01 AM, will schmidt wrote:

On Sun, 2021-04-25 at 20:50 -0500, Bill Schmidt via Gcc-patches wrote:

Add POWER10 support for hashst[p] and hashchk[p] operations.  When
the -mrop-protect option is selected, any function that loads the
link
register from memory before returning must have protection in the
prologue and epilogue to ensure the link register save location has
not been compromised.  If -mprivileged is also specified, the
protection instructions generated require supervisor privilege.

Hi,

Is -mprivileged tied directly to ROP, or is it a 'generic' option?

As
is, it looks like it can be considered generic, so could be also used
for other cases where we would want to generate instructions that
require supervisor privilege.


Yes, this is deliberately designed to be orthogonal from the specific 
ROP support.  That is, ROP is the first use, but other future uses are 
anticipated.


Bill



Additional comments on the subsequent patches..
thanks
-Will


The patches are broken up into logical chunks:
  - Option handling
  - Instruction generation
  - Predefined macro handling
  - Test cases

Bootstrapped and tested on a POWER10 system with no regressions.
Tests on a kernel that enables user-space ROP mitigation were
successful.  Is this series ok for trunk?  I would also like to later
backport these patches to GCC for the 11.2 release.

Thanks!
Bill

Bill Schmidt (4):
   rs6000: Add -mrop-protect and -mprivileged flags
   rs6000: Emit ROP-protect instructions in prologue and epilogue
   rs6000: Conditionally define __ROP_PROTECT__
   rs6000: Add ROP tests

  gcc/config/rs6000/rs6000-c.c |  3 +
  gcc/config/rs6000/rs6000-internal.h  |  2 +
  gcc/config/rs6000/rs6000-logue.c | 86 +-
--
  gcc/config/rs6000/rs6000.c   |  7 ++
  gcc/config/rs6000/rs6000.md  | 39 +++
  gcc/config/rs6000/rs6000.opt |  6 ++
  gcc/doc/invoke.texi  | 19 +-
  gcc/testsuite/gcc.target/powerpc/rop-1.c | 16 +
  gcc/testsuite/gcc.target/powerpc/rop-2.c | 16 +
  gcc/testsuite/gcc.target/powerpc/rop-3.c | 19 ++
  gcc/testsuite/gcc.target/powerpc/rop-4.c | 14 
  gcc/testsuite/gcc.target/powerpc/rop-5.c | 17 +
  12 files changed, 231 insertions(+), 13 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-1.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-2.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-3.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-4.c
  create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-5.c



Re: [PATCH 4/4] rs6000: Add ROP tests

2021-04-26 Thread will schmidt via Gcc-patches
On Sun, 2021-04-25 at 20:50 -0500, Bill Schmidt via Gcc-patches wrote:
> 2021-03-25  Bill Schmidt  
> 
> gcc/testsuite/
>   * gcc.target/powerpc/rop-1.c: New.
>   * gcc.target/powerpc/rop-2.c: New.
>   * gcc.target/powerpc/rop-3.c: New.
>   * gcc.target/powerpc/rop-4.c: New.
>   * gcc.target/powerpc/rop-5.c: New.

ok

> ---
>  gcc/testsuite/gcc.target/powerpc/rop-1.c | 16 
>  gcc/testsuite/gcc.target/powerpc/rop-2.c | 16 
>  gcc/testsuite/gcc.target/powerpc/rop-3.c | 19 +++
>  gcc/testsuite/gcc.target/powerpc/rop-4.c | 14 ++
>  gcc/testsuite/gcc.target/powerpc/rop-5.c | 17 +
>  5 files changed, 82 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-1.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-2.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-3.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-4.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-5.c
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/rop-1.c 
> b/gcc/testsuite/gcc.target/powerpc/rop-1.c
> new file mode 100644
> index 000..cf8e2b01dda
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/rop-1.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect" } */
> +
> +/* Verify that ROP-protect instructions are inserted when a
> +   call is present.  */
> +
> +extern void foo (void);
> +
> +int bar ()
> +{
> +  foo ();
> +  return 5;
> +}
> +
> +/* { dg-final { scan-assembler {\mhashst\M} } } */
> +/* { dg-final { scan-assembler {\mhashchk\M} } } */

ok


> diff --git a/gcc/testsuite/gcc.target/powerpc/rop-2.c 
> b/gcc/testsuite/gcc.target/powerpc/rop-2.c
> new file mode 100644
> index 000..dde403b0ef5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/rop-2.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect -mprivileged" } */
> +
> +/* Verify that privileged ROP-protect instructions are inserted when a
> +   call is present.  */
> +
> +extern void foo (void);
> +
> +int bar ()
> +{
> +  foo ();
> +  return 5;
> +}
> +
> +/* { dg-final { scan-assembler {\mhashstp\M} } } */
> +/* { dg-final { scan-assembler {\mhashchkp\M} } } */

ok


> diff --git a/gcc/testsuite/gcc.target/powerpc/rop-3.c 
> b/gcc/testsuite/gcc.target/powerpc/rop-3.c
> new file mode 100644
> index 000..054f94fda99
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/rop-3.c
> @@ -0,0 +1,19 @@
> +/* { dg-do run { target { power10_hw } } } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-require-effective-target powerpc_elfv2 } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect" } */
> +
> +/* Verify that ROP-protect instructions execute correctly when a
> +   call is present.  */
> +
> +void __attribute__((noinline)) foo ()
> +{
> +  asm ("");
> +}
> +
> +int main ()
> +{
> +  foo ();
> +  return 0;
> +}
> +

ok


> diff --git a/gcc/testsuite/gcc.target/powerpc/rop-4.c 
> b/gcc/testsuite/gcc.target/powerpc/rop-4.c
> new file mode 100644
> index 000..e2be8b2c035
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/rop-4.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect" } */
> +
> +/* Verify that no ROP-protect instructions are inserted when no
> +   call is present.  */
> +
> +
> +int bar ()
> +{
> +  return 5;
> +}
> +
> +/* { dg-final { scan-assembler-not {\mhashst\M} } } */
> +/* { dg-final { scan-assembler-not {\mhashchk\M} } } */


ok

> diff --git a/gcc/testsuite/gcc.target/powerpc/rop-5.c 
> b/gcc/testsuite/gcc.target/powerpc/rop-5.c
> new file mode 100644
> index 000..b759fa59979
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/rop-5.c
> @@ -0,0 +1,17 @@
> +/* { dg-do run { target { power10_hw } } } */
> +/* { dg-require-effective-target power10_ok } */
> +/* { dg-require-effective-target powerpc_elfv2 } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect" } */
> +
> +/* Verify that __ROP_PROTECT__ is predefined for -mrop-protect.  */
> +
> +extern void abort (void);
> +
> +int main ()
> +{
> +#ifndef __ROP_PROTECT__
> +  abort ();
> +#endif
> +  return 0;
> +}
> +

ok.


Does there need to be another test to verify if -mrop-protect is on by
default without specifying -mrop-protect?   (or is it?)  Question on
0/4.

with that noted,
lgtm, 
thanks,
-will





Re: [PATCH 3/4] rs6000: Conditionally define __ROP_PROTECT__

2021-04-26 Thread will schmidt via Gcc-patches
On Sun, 2021-04-25 at 20:50 -0500, Bill Schmidt via Gcc-patches wrote:
> 2021-03-25  Bill Schmidt  
> 
> gcc/
>   * config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Define
>   __ROP_PROTECT__ if -mrop-protect is selected.


ok

> ---
>  gcc/config/rs6000/rs6000-c.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
> index 0f8a629ff5a..afcb5bb6e39 100644
> --- a/gcc/config/rs6000/rs6000-c.c
> +++ b/gcc/config/rs6000/rs6000-c.c
> @@ -602,6 +602,9 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT 
> flags,
>/* Whether pc-relative code is being generated.  */
>if ((flags & OPTION_MASK_PCREL) != 0)
>  rs6000_define_or_undefine_macro (define_p, "__PCREL__");
> +  /* Tell the user -mrop-protect is in play.  */
> +  if (rs6000_rop_protect)
> +rs6000_define_or_undefine_macro (define_p, "__ROP_PROTECT__");
> 

I notice that almost all of the other defines are controled by an (if
(flags & OPTION) logic block.. but this seems OK.

lgtm, 
thanks,
-WIll


>  }
> 
>  void



Re: [PATCH 2/4] rs6000: Emit ROP-protect instructions in prologue and epilogue

2021-04-26 Thread will schmidt via Gcc-patches
On Sun, 2021-04-25 at 20:50 -0500, Bill Schmidt via Gcc-patches wrote:
> Insert the hashst and hashchk instructions when -mrop-protect has been
> selected.  The encrypted save slot for ROP mitigation is placed
> between the parameter save area and the alloca space (if any;
> otherwise the local variable space).
> 
> Note that ROP-mitigation instructions are currently only provided for
> the ELFv2 ABI.
> 
> 2021-03-25  Bill Schmidt  
> 
> gcc/
>   * config/rs6000/rs6000-internal.h (rs6000_stack): Add
>   rop_check_save_offset and rop_check_size.
>   * config/rs6000/rs6000-logue.c (rs6000_stack_info): Compute
>   rop_check_size and rop_check_save_offset.
>   (debug_stack_info): Dump rop_save_offset and rop_check_size.
>   (rs6000_emit_prologue): Assert if WORLD_SAVE used with
>   -mrop-protect; emit hashst[p] in prologue; emit hashchk[p] in
>   epilogue.
>   * config/rs6000/rs6000.md (unspec): Add UNSPEC_HASHST[P] and
>   UNSPEC_HASHCHK[P].
>   (hashst): New define_insn.
>   (hashstp): Likewise.
>   (hashchk): Likewise.
>   (hashchkp): Likewise.

ok

> ---
>  gcc/config/rs6000/rs6000-internal.h |  2 +
>  gcc/config/rs6000/rs6000-logue.c| 86 +
>  gcc/config/rs6000/rs6000.md | 39 +
>  3 files changed, 116 insertions(+), 11 deletions(-)
> 
> diff --git a/gcc/config/rs6000/rs6000-internal.h 
> b/gcc/config/rs6000/rs6000-internal.h
> index 428a7861a98..8fc77ba6138 100644
> --- a/gcc/config/rs6000/rs6000-internal.h
> +++ b/gcc/config/rs6000/rs6000-internal.h
> @@ -39,6 +39,7 @@ typedef struct rs6000_stack {
>int gp_save_offset;/* offset to save GP regs from initial 
> SP */
>int fp_save_offset;/* offset to save FP regs from initial 
> SP */
>int altivec_save_offset;   /* offset to save AltiVec regs from initial SP 
> */
> +  int rop_check_save_offset; /* offset to save ROP check from initial SP */
>int lr_save_offset;/* offset to save LR from initial SP */
>int cr_save_offset;/* offset to save CR from initial SP */
>int vrsave_save_offset;/* offset to save VRSAVE from initial SP */
> @@ -53,6 +54,7 @@ typedef struct rs6000_stack {
>int gp_size;   /* size of saved GP registers */
>int fp_size;   /* size of saved FP registers */
>int altivec_size;  /* size of saved AltiVec registers */
> +  int rop_check_size;/* size of ROP check slot */
>int cr_size;   /* size to hold CR if not in fixed area 
> */
>int vrsave_size;   /* size to hold VRSAVE */
>int altivec_padding_size;  /* size of altivec alignment padding */

ok

> diff --git a/gcc/config/rs6000/rs6000-logue.c 
> b/gcc/config/rs6000/rs6000-logue.c
> index b0ac183ceff..10cf7a2de93 100644
> --- a/gcc/config/rs6000/rs6000-logue.c
> +++ b/gcc/config/rs6000/rs6000-logue.c
> @@ -595,19 +595,21 @@ rs6000_savres_strategy (rs6000_stack_t *info,
>   +---+
>   | Parameter save area (+padding*) (P)   |  32
>   +---+
> - | Alloca space (A)  |  32+P
> + | Optional ROP check slot (R)   |  32+P
>   +---+
> - | Local variable space (L)  |  32+P+A
> + | Alloca space (A)  |  32+P+R
>   +---+
> - | Save area for AltiVec registers (W)   |  32+P+A+L
> + | Local variable space (L)  |  32+P+R+A
>   +---+
> - | AltiVec alignment padding (Y) |  32+P+A+L+W
> + | Save area for AltiVec registers (W)   |  32+P+R+A+L
>   +---+
> - | Save area for GP registers (G)|  32+P+A+L+W+Y
> + | AltiVec alignment padding (Y) |  32+P+R+A+L+W
>   +---+
> - | Save area for FP registers (F)|  32+P+A+L+W+Y+G
> + | Save area for GP registers (G)|  32+P+R+A+L+W+Y
>   +---+
> - old SP->| back chain to caller's caller |  32+P+A+L+W+Y+G+F
> + | Save area for FP registers (F)|  32+P+R+A+L+W+Y+G
> + +---+
> + old SP->| back chain to caller's caller |  32+P+R+A+L+W+Y+G+F
>   +-

Re: [PATCH 1/4] rs6000: Add -mrop-protect and -mprivileged flags

2021-04-26 Thread will schmidt via Gcc-patches
On Sun, 2021-04-25 at 20:50 -0500, Bill Schmidt via Gcc-patches wrote:
> 2021-03-25  Bill Schmidt  
> 
> gcc/
>   * config/rs6000/rs6000.c (rs6000_option_override_internal):
>   Disable shrink wrap when inserting ROP-protect instructions.
>   * config/rs6000/rs6000.opt (mrop-protect): New option.
>   (mprivileged): Likewise.
>   * doc/invoke.texi: Document mrop-protect and mprivileged.

Hi, 


> ---
>  gcc/config/rs6000/rs6000.c   |  7 +++
>  gcc/config/rs6000/rs6000.opt |  6 ++
>  gcc/doc/invoke.texi  | 19 +--
>  3 files changed, 30 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 844fee88cf3..d13ed6e7ff4 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -4036,6 +4036,13 @@ rs6000_option_override_internal (bool global_init_p)
>&& ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
>  rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
> 
> +  /* If we are inserting ROP-protect instructions, disable shrink wrap.  */
> +  if (rs6000_rop_protect)
> +{
> +  flag_shrink_wrap = 0;
> +  flag_shrink_wrap_separate = 0;
> +}

Does this (shrink-wrap is disabled if/when ROP-protect is enabled) need
additional commentary somewhere?  


> +
>/* If we can shrink-wrap the TOC register save separately, then use
>   -msave-toc-indirect unless explicitly disabled.  */
>if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
> diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
> index 0dbdf753673..d116fd12f7e 100644
> --- a/gcc/config/rs6000/rs6000.opt
> +++ b/gcc/config/rs6000/rs6000.opt
> @@ -619,3 +619,9 @@ Generate (do not generate) MMA instructions.
> 
>  mrelative-jumptables
>  Target Undocumented Var(rs6000_relative_jumptables) Init(1) Save
> +
> +mrop-protect
> +Target Var(rs6000_rop_protect) Init(0)
> +
> +mprivileged
> +Target Var(rs6000_privileged) Init(0)

Most but not all of the entries in rs6000.opt have an additional
description line.  I'd wonder about updating this to be stl

> +mrop-protect
> +Target Var(rs6000_rop_protect) Init(0)

Enable ROP protection 

> +
> +mprivileged
> +Target Var(rs6000_privileged) Init(0)

Enable privileged instructions for ROP protection.


OK with me either way.  :-)




> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index e98b0962b9f..36bd0bf9b3b 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -1238,7 +1238,8 @@ See RS/6000 and PowerPC Options.
>  -mgnu-attribute  -mno-gnu-attribute @gol
>  -mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{reg} 
> @gol
>  -mstack-protector-guard-offset=@var{offset} -mprefixed -mno-prefixed @gol
> --mpcrel -mno-pcrel -mmma -mno-mmma}
> +-mpcrel -mno-pcrel -mmma -mno-mmma -mrop-protect -mno-rop-protect @gol
> +-mprivileged -mno-privileged}
> 
>  @emph{RX Options}
>  @gccoptlist{-m64bit-doubles  -m32bit-doubles  -fpu  -nofpu@gol
> @@ -27019,7 +27020,8 @@ following options:
>  -mmulhw  -mdlmzb  -mmfpgpr  -mvsx @gol
>  -mcrypto  -mhtm  -mpower8-fusion  -mpower8-vector @gol
>  -mquad-memory  -mquad-memory-atomic  -mfloat128 @gol
> --mfloat128-hardware -mprefixed -mpcrel -mmma}
> +-mfloat128-hardware -mprefixed -mpcrel -mmma @gol
> +-mrop-protect -mprivileged}
> 
>  The particular options set for any particular CPU varies between
>  compiler versions, depending on what setting seems to produce optimal
> @@ -28024,6 +28026,19 @@ store instructions when the option 
> @option{-mcpu=future} is used.
>  Generate (do not generate) the MMA instructions when the option
>  @option{-mcpu=future} is used.
> 
> +@item -mrop-protect
> +@itemx -mno-rop-protect
> +@opindex mrop-protect
> +@opindex mno-rop-protect
> +Generate (do not generate) ROP protection instructions when the option
> +@option{-mcpu=power10} is used.

Is the option on by default?  if so, may want another testcase to
verify ROP instructions are generated with just -mcpu=power10.
if not,
perhaps the "-mcpu=power10" reference here instead be "-mrop-protect".


> +
> +@item -mprivileged
> +@itemx -mno-privileged
> +@opindex mprivileged
> +@opindex mno-privileged
> +Generate (do not generate) instructions for privileged state.
> +
>  @item -mblock-ops-unaligned-vsx
>  @itemx -mno-block-ops-unaligned-vsx
>  @opindex block-ops-unaligned-vsx


lgtm
thanks,
-Will




Re: [PATCH 0/4] [rs6000] ROP support

2021-04-26 Thread will schmidt via Gcc-patches
On Sun, 2021-04-25 at 20:50 -0500, Bill Schmidt via Gcc-patches wrote:
> Add POWER10 support for hashst[p] and hashchk[p] operations.  When
> the -mrop-protect option is selected, any function that loads the
> link
> register from memory before returning must have protection in the
> prologue and epilogue to ensure the link register save location has
> not been compromised.  If -mprivileged is also specified, the
> protection instructions generated require supervisor privilege.

Hi,

Is -mprivileged tied directly to ROP, or is it a 'generic' option?

As
is, it looks like it can be considered generic, so could be also used
for other cases where we would want to generate instructions that
require supervisor privilege.

Additional comments on the subsequent patches.. 
thanks
-Will

> 
> The patches are broken up into logical chunks:
>  - Option handling
>  - Instruction generation
>  - Predefined macro handling
>  - Test cases
> 
> Bootstrapped and tested on a POWER10 system with no regressions.
> Tests on a kernel that enables user-space ROP mitigation were
> successful.  Is this series ok for trunk?  I would also like to later
> backport these patches to GCC for the 11.2 release.
> 
> Thanks!
> Bill
> 
> Bill Schmidt (4):
>   rs6000: Add -mrop-protect and -mprivileged flags
>   rs6000: Emit ROP-protect instructions in prologue and epilogue
>   rs6000: Conditionally define __ROP_PROTECT__
>   rs6000: Add ROP tests
> 
>  gcc/config/rs6000/rs6000-c.c |  3 +
>  gcc/config/rs6000/rs6000-internal.h  |  2 +
>  gcc/config/rs6000/rs6000-logue.c | 86 +-
> --
>  gcc/config/rs6000/rs6000.c   |  7 ++
>  gcc/config/rs6000/rs6000.md  | 39 +++
>  gcc/config/rs6000/rs6000.opt |  6 ++
>  gcc/doc/invoke.texi  | 19 +-
>  gcc/testsuite/gcc.target/powerpc/rop-1.c | 16 +
>  gcc/testsuite/gcc.target/powerpc/rop-2.c | 16 +
>  gcc/testsuite/gcc.target/powerpc/rop-3.c | 19 ++
>  gcc/testsuite/gcc.target/powerpc/rop-4.c | 14 
>  gcc/testsuite/gcc.target/powerpc/rop-5.c | 17 +
>  12 files changed, 231 insertions(+), 13 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-1.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-2.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-3.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-4.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-5.c
> 



[PATCH 0/4] [rs6000] ROP support

2021-04-25 Thread Bill Schmidt via Gcc-patches
Add POWER10 support for hashst[p] and hashchk[p] operations.  When
the -mrop-protect option is selected, any function that loads the link
register from memory before returning must have protection in the
prologue and epilogue to ensure the link register save location has
not been compromised.  If -mprivileged is also specified, the
protection instructions generated require supervisor privilege.

The patches are broken up into logical chunks:
 - Option handling
 - Instruction generation
 - Predefined macro handling
 - Test cases

Bootstrapped and tested on a POWER10 system with no regressions.
Tests on a kernel that enables user-space ROP mitigation were
successful.  Is this series ok for trunk?  I would also like to later
backport these patches to GCC for the 11.2 release.

Thanks!
Bill

Bill Schmidt (4):
  rs6000: Add -mrop-protect and -mprivileged flags
  rs6000: Emit ROP-protect instructions in prologue and epilogue
  rs6000: Conditionally define __ROP_PROTECT__
  rs6000: Add ROP tests

 gcc/config/rs6000/rs6000-c.c |  3 +
 gcc/config/rs6000/rs6000-internal.h  |  2 +
 gcc/config/rs6000/rs6000-logue.c | 86 +---
 gcc/config/rs6000/rs6000.c   |  7 ++
 gcc/config/rs6000/rs6000.md  | 39 +++
 gcc/config/rs6000/rs6000.opt |  6 ++
 gcc/doc/invoke.texi  | 19 +-
 gcc/testsuite/gcc.target/powerpc/rop-1.c | 16 +
 gcc/testsuite/gcc.target/powerpc/rop-2.c | 16 +
 gcc/testsuite/gcc.target/powerpc/rop-3.c | 19 ++
 gcc/testsuite/gcc.target/powerpc/rop-4.c | 14 
 gcc/testsuite/gcc.target/powerpc/rop-5.c | 17 +
 12 files changed, 231 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-3.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-4.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-5.c

-- 
2.27.0



[PATCH 4/4] rs6000: Add ROP tests

2021-04-25 Thread Bill Schmidt via Gcc-patches
2021-03-25  Bill Schmidt  

gcc/testsuite/
* gcc.target/powerpc/rop-1.c: New.
* gcc.target/powerpc/rop-2.c: New.
* gcc.target/powerpc/rop-3.c: New.
* gcc.target/powerpc/rop-4.c: New.
* gcc.target/powerpc/rop-5.c: New.
---
 gcc/testsuite/gcc.target/powerpc/rop-1.c | 16 
 gcc/testsuite/gcc.target/powerpc/rop-2.c | 16 
 gcc/testsuite/gcc.target/powerpc/rop-3.c | 19 +++
 gcc/testsuite/gcc.target/powerpc/rop-4.c | 14 ++
 gcc/testsuite/gcc.target/powerpc/rop-5.c | 17 +
 5 files changed, 82 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-3.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-4.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/rop-5.c

diff --git a/gcc/testsuite/gcc.target/powerpc/rop-1.c 
b/gcc/testsuite/gcc.target/powerpc/rop-1.c
new file mode 100644
index 000..cf8e2b01dda
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/rop-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect" } */
+
+/* Verify that ROP-protect instructions are inserted when a
+   call is present.  */
+
+extern void foo (void);
+
+int bar ()
+{
+  foo ();
+  return 5;
+}
+
+/* { dg-final { scan-assembler {\mhashst\M} } } */
+/* { dg-final { scan-assembler {\mhashchk\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/rop-2.c 
b/gcc/testsuite/gcc.target/powerpc/rop-2.c
new file mode 100644
index 000..dde403b0ef5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/rop-2.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect -mprivileged" } */
+
+/* Verify that privileged ROP-protect instructions are inserted when a
+   call is present.  */
+
+extern void foo (void);
+
+int bar ()
+{
+  foo ();
+  return 5;
+}
+
+/* { dg-final { scan-assembler {\mhashstp\M} } } */
+/* { dg-final { scan-assembler {\mhashchkp\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/rop-3.c 
b/gcc/testsuite/gcc.target/powerpc/rop-3.c
new file mode 100644
index 000..054f94fda99
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/rop-3.c
@@ -0,0 +1,19 @@
+/* { dg-do run { target { power10_hw } } } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-require-effective-target powerpc_elfv2 } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect" } */
+
+/* Verify that ROP-protect instructions execute correctly when a
+   call is present.  */
+
+void __attribute__((noinline)) foo ()
+{
+  asm ("");
+}
+
+int main ()
+{
+  foo ();
+  return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/rop-4.c 
b/gcc/testsuite/gcc.target/powerpc/rop-4.c
new file mode 100644
index 000..e2be8b2c035
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/rop-4.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect" } */
+
+/* Verify that no ROP-protect instructions are inserted when no
+   call is present.  */
+
+
+int bar ()
+{
+  return 5;
+}
+
+/* { dg-final { scan-assembler-not {\mhashst\M} } } */
+/* { dg-final { scan-assembler-not {\mhashchk\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/rop-5.c 
b/gcc/testsuite/gcc.target/powerpc/rop-5.c
new file mode 100644
index 000..b759fa59979
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/rop-5.c
@@ -0,0 +1,17 @@
+/* { dg-do run { target { power10_hw } } } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-require-effective-target powerpc_elfv2 } */
+/* { dg-options "-O2 -mdejagnu-cpu=power10 -mrop-protect" } */
+
+/* Verify that __ROP_PROTECT__ is predefined for -mrop-protect.  */
+
+extern void abort (void);
+
+int main ()
+{
+#ifndef __ROP_PROTECT__
+  abort ();
+#endif
+  return 0;
+}
+
-- 
2.27.0



[PATCH 2/4] rs6000: Emit ROP-protect instructions in prologue and epilogue

2021-04-25 Thread Bill Schmidt via Gcc-patches
Insert the hashst and hashchk instructions when -mrop-protect has been
selected.  The encrypted save slot for ROP mitigation is placed
between the parameter save area and the alloca space (if any;
otherwise the local variable space).

Note that ROP-mitigation instructions are currently only provided for
the ELFv2 ABI.

2021-03-25  Bill Schmidt  

gcc/
* config/rs6000/rs6000-internal.h (rs6000_stack): Add
rop_check_save_offset and rop_check_size.
* config/rs6000/rs6000-logue.c (rs6000_stack_info): Compute
rop_check_size and rop_check_save_offset.
(debug_stack_info): Dump rop_save_offset and rop_check_size.
(rs6000_emit_prologue): Assert if WORLD_SAVE used with
-mrop-protect; emit hashst[p] in prologue; emit hashchk[p] in
epilogue.
* config/rs6000/rs6000.md (unspec): Add UNSPEC_HASHST[P] and
UNSPEC_HASHCHK[P].
(hashst): New define_insn.
(hashstp): Likewise.
(hashchk): Likewise.
(hashchkp): Likewise.
---
 gcc/config/rs6000/rs6000-internal.h |  2 +
 gcc/config/rs6000/rs6000-logue.c| 86 +
 gcc/config/rs6000/rs6000.md | 39 +
 3 files changed, 116 insertions(+), 11 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-internal.h 
b/gcc/config/rs6000/rs6000-internal.h
index 428a7861a98..8fc77ba6138 100644
--- a/gcc/config/rs6000/rs6000-internal.h
+++ b/gcc/config/rs6000/rs6000-internal.h
@@ -39,6 +39,7 @@ typedef struct rs6000_stack {
   int gp_save_offset;  /* offset to save GP regs from initial SP */
   int fp_save_offset;  /* offset to save FP regs from initial SP */
   int altivec_save_offset; /* offset to save AltiVec regs from initial SP 
*/
+  int rop_check_save_offset;   /* offset to save ROP check from initial SP */
   int lr_save_offset;  /* offset to save LR from initial SP */
   int cr_save_offset;  /* offset to save CR from initial SP */
   int vrsave_save_offset;  /* offset to save VRSAVE from initial SP */
@@ -53,6 +54,7 @@ typedef struct rs6000_stack {
   int gp_size; /* size of saved GP registers */
   int fp_size; /* size of saved FP registers */
   int altivec_size;/* size of saved AltiVec registers */
+  int rop_check_size;  /* size of ROP check slot */
   int cr_size; /* size to hold CR if not in fixed area */
   int vrsave_size; /* size to hold VRSAVE */
   int altivec_padding_size;/* size of altivec alignment padding */
diff --git a/gcc/config/rs6000/rs6000-logue.c b/gcc/config/rs6000/rs6000-logue.c
index b0ac183ceff..10cf7a2de93 100644
--- a/gcc/config/rs6000/rs6000-logue.c
+++ b/gcc/config/rs6000/rs6000-logue.c
@@ -595,19 +595,21 @@ rs6000_savres_strategy (rs6000_stack_t *info,
+---+
| Parameter save area (+padding*) (P)   |  32
+---+
-   | Alloca space (A)  |  32+P
+   | Optional ROP check slot (R)   |  32+P
+---+
-   | Local variable space (L)  |  32+P+A
+   | Alloca space (A)  |  32+P+R
+---+
-   | Save area for AltiVec registers (W)   |  32+P+A+L
+   | Local variable space (L)  |  32+P+R+A
+---+
-   | AltiVec alignment padding (Y) |  32+P+A+L+W
+   | Save area for AltiVec registers (W)   |  32+P+R+A+L
+---+
-   | Save area for GP registers (G)|  32+P+A+L+W+Y
+   | AltiVec alignment padding (Y) |  32+P+R+A+L+W
+---+
-   | Save area for FP registers (F)|  32+P+A+L+W+Y+G
+   | Save area for GP registers (G)|  32+P+R+A+L+W+Y
+---+
-   old SP->| back chain to caller's caller |  32+P+A+L+W+Y+G+F
+   | Save area for FP registers (F)|  32+P+R+A+L+W+Y+G
+   +---+
+   old SP->| back chain to caller's caller |  32+P+R+A+L+W+Y+G+F
+---+
 
  * If the alloca area is present, the parameter save area is
@@ -717,6 +719,19 @@ rs6000_stack_info (void)
   /* Does this function call anything (apart from sibling calls)?  */
   info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame);
 
+  if (TARGET_POWER10 && info->calls_p
+  && DEFAULT_ABI == ABI_ELFv2 && rs6000_rop_protect)
+info->rop_check_size = 8;
+  else if (rs6000_rop_protect && DEFAULT_ABI != ABI_ELFv2)
+{
+  /* We 

[PATCH 3/4] rs6000: Conditionally define __ROP_PROTECT__

2021-04-25 Thread Bill Schmidt via Gcc-patches
2021-03-25  Bill Schmidt  

gcc/
* config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Define
__ROP_PROTECT__ if -mrop-protect is selected.
---
 gcc/config/rs6000/rs6000-c.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index 0f8a629ff5a..afcb5bb6e39 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -602,6 +602,9 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT 
flags,
   /* Whether pc-relative code is being generated.  */
   if ((flags & OPTION_MASK_PCREL) != 0)
 rs6000_define_or_undefine_macro (define_p, "__PCREL__");
+  /* Tell the user -mrop-protect is in play.  */
+  if (rs6000_rop_protect)
+rs6000_define_or_undefine_macro (define_p, "__ROP_PROTECT__");
 }
 
 void
-- 
2.27.0



[PATCH 1/4] rs6000: Add -mrop-protect and -mprivileged flags

2021-04-25 Thread Bill Schmidt via Gcc-patches
2021-03-25  Bill Schmidt  

gcc/
* config/rs6000/rs6000.c (rs6000_option_override_internal):
Disable shrink wrap when inserting ROP-protect instructions.
* config/rs6000/rs6000.opt (mrop-protect): New option.
(mprivileged): Likewise.
* doc/invoke.texi: Document mrop-protect and mprivileged.
---
 gcc/config/rs6000/rs6000.c   |  7 +++
 gcc/config/rs6000/rs6000.opt |  6 ++
 gcc/doc/invoke.texi  | 19 +--
 3 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 844fee88cf3..d13ed6e7ff4 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -4036,6 +4036,13 @@ rs6000_option_override_internal (bool global_init_p)
   && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
 
+  /* If we are inserting ROP-protect instructions, disable shrink wrap.  */
+  if (rs6000_rop_protect)
+{
+  flag_shrink_wrap = 0;
+  flag_shrink_wrap_separate = 0;
+}
+
   /* If we can shrink-wrap the TOC register save separately, then use
  -msave-toc-indirect unless explicitly disabled.  */
   if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 0dbdf753673..d116fd12f7e 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -619,3 +619,9 @@ Generate (do not generate) MMA instructions.
 
 mrelative-jumptables
 Target Undocumented Var(rs6000_relative_jumptables) Init(1) Save
+
+mrop-protect
+Target Var(rs6000_rop_protect) Init(0)
+
+mprivileged
+Target Var(rs6000_privileged) Init(0)
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index e98b0962b9f..36bd0bf9b3b 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1238,7 +1238,8 @@ See RS/6000 and PowerPC Options.
 -mgnu-attribute  -mno-gnu-attribute @gol
 -mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{reg} @gol
 -mstack-protector-guard-offset=@var{offset} -mprefixed -mno-prefixed @gol
--mpcrel -mno-pcrel -mmma -mno-mmma}
+-mpcrel -mno-pcrel -mmma -mno-mmma -mrop-protect -mno-rop-protect @gol
+-mprivileged -mno-privileged}
 
 @emph{RX Options}
 @gccoptlist{-m64bit-doubles  -m32bit-doubles  -fpu  -nofpu@gol
@@ -27019,7 +27020,8 @@ following options:
 -mmulhw  -mdlmzb  -mmfpgpr  -mvsx @gol
 -mcrypto  -mhtm  -mpower8-fusion  -mpower8-vector @gol
 -mquad-memory  -mquad-memory-atomic  -mfloat128 @gol
--mfloat128-hardware -mprefixed -mpcrel -mmma}
+-mfloat128-hardware -mprefixed -mpcrel -mmma @gol
+-mrop-protect -mprivileged}
 
 The particular options set for any particular CPU varies between
 compiler versions, depending on what setting seems to produce optimal
@@ -28024,6 +28026,19 @@ store instructions when the option 
@option{-mcpu=future} is used.
 Generate (do not generate) the MMA instructions when the option
 @option{-mcpu=future} is used.
 
+@item -mrop-protect
+@itemx -mno-rop-protect
+@opindex mrop-protect
+@opindex mno-rop-protect
+Generate (do not generate) ROP protection instructions when the option
+@option{-mcpu=power10} is used.
+
+@item -mprivileged
+@itemx -mno-privileged
+@opindex mprivileged
+@opindex mno-privileged
+Generate (do not generate) instructions for privileged state.
+
 @item -mblock-ops-unaligned-vsx
 @itemx -mno-block-ops-unaligned-vsx
 @opindex block-ops-unaligned-vsx
-- 
2.27.0



Re: [PATCH] Fix logic error in 32-bit trampolines, PR target/98952

2021-04-25 Thread Bill Schmidt via Gcc-patches

On 4/23/21 6:58 PM, Segher Boessenkool wrote:

On Fri, Apr 23, 2021 at 06:24:07PM -0400, Michael Meissner wrote:

On Thu, Apr 22, 2021 at 05:56:32PM -0500, Segher Boessenkool wrote:

As Will says, it looks like the ELFv2 version has the same bug.  Please
fix that the same way.

Yes it has the same bug.  However in practice it would never be hit, since this
bug is 32-bit, and we only build 64-bit systems with ELF v2.  I did fix it.

Hrm, in that case, why do we have that code at all?!


Okay for trunk.  Okay for backport to 11 when that branch opens again.
Does this need more backports?  (Those should follow after 11 of
course).

Bill mentioned we may want to backport this to earlier branches before they are
frozen.  Tulio, are backports to earlier revisions important?

Well, the bug has been there since the original commit to (then)
tramp.asm, which was 25 years ago, and only now people noticed ;-)

We should have a backport to GCC 11 at least.  Older is up to you (and
Tulio).
This was reported to us as a compatibility problem with Clang that was 
holding up porting a language runtime to Power.  Since this is very 
obviously a bug, I would like to be aggressive about backporting it to 
previous releases to avoid any other such problems.  Thanks for considering!


Bill




Segher


Re: [PATCH] doc: Update Power builtin documentation in user's manual

2021-04-16 Thread Bill Schmidt via Gcc-patches

On 4/16/21 8:56 AM, Bill Schmidt via Gcc-patches wrote:

The standard for many Power vector interfaces is now the recently
published Power Vector Intrinsics Programming Reference.  Reference
that document for the relevant interfaces, and remove redundant
information from the GCC user's manual.


Forgot to mention that I verified the PDF output.  Is this okay for 
trunk in stage 4?


Bill



2021-04-16  Bill Schmidt  

gcc/
* doc/extend.texi (PowerPC AltiVec/VSX Built-in Functions): Revise
this section and its subsections.
---
  gcc/doc/extend.texi | 2200 +--
  1 file changed, 232 insertions(+), 1968 deletions(-)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 1ddafb3ff2c..3260f0639d2 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -18165,14 +18165,216 @@ Internally, GCC uses built-in functions to achieve 
the functionality in
  the aforementioned header file, but they are not supported and are
  subject to change without notice.

-GCC complies with the OpenPOWER 64-Bit ELF V2 ABI Specification,
+GCC complies with the Power Vector Intrinsic Programming Reference (PVIPR),
  which may be found at
-@uref{https://openpowerfoundation.org/?resource_lib=64-bit-elf-v2-abi-specification-power-architecture}.
-Appendix A of this document lists the vector API interfaces that must be
+@uref{https://openpowerfoundation.org/?resource_lib=power-vector-intrinsic-programming-reference}.
+Chapter 4 of this document fully documents the vector API interfaces
+that must be
  provided by compliant compilers.  Programmers should preferentially use
  the interfaces described therein.  However, historically GCC has provided
  additional interfaces for access to vector instructions.  These are
-briefly described below.
+briefly described below.  Where the PVIPR provides a portable interface,
+other functions in GCC that provide the same capabilities should be
+considered deprecated.
+
+The PVIPR documents the following overloaded functions:
+
+@multitable @columnfractions 0.33 0.33 0.33
+
+@item @code{vec_abs}
+@tab @code{vec_absd}
+@tab @code{vec_abss}
+@item @code{vec_add}
+@tab @code{vec_addc}
+@tab @code{vec_adde}
+@item @code{vec_addec}
+@tab @code{vec_adds}
+@tab @code{vec_all_eq}
+@item @code{vec_all_ge}
+@tab @code{vec_all_gt}
+@tab @code{vec_all_in}
+@item @code{vec_all_le}
+@tab @code{vec_all_lt}
+@tab @code{vec_all_nan}
+@item @code{vec_all_ne}
+@tab @code{vec_all_nge}
+@tab @code{vec_all_ngt}
+@item @code{vec_all_nle}
+@tab @code{vec_all_nlt}
+@tab @code{vec_all_numeric}
+@item @code{vec_and}
+@tab @code{vec_andc}
+@tab @code{vec_any_eq}
+@item @code{vec_any_ge}
+@tab @code{vec_any_gt}
+@tab @code{vec_any_le}
+@item @code{vec_any_lt}
+@tab @code{vec_any_nan}
+@tab @code{vec_any_ne}
+@item @code{vec_any_nge}
+@tab @code{vec_any_ngt}
+@tab @code{vec_any_nle}
+@item @code{vec_any_nlt}
+@tab @code{vec_any_numeric}
+@tab @code{vec_any_out}
+@item @code{vec_avg}
+@tab @code{vec_bperm}
+@tab @code{vec_ceil}
+@item @code{vec_cipher_be}
+@tab @code{vec_cipherlast_be}
+@tab @code{vec_cmpb}
+@item @code{vec_cmpeq}
+@tab @code{vec_cmpge}
+@tab @code{vec_cmpgt}
+@item @code{vec_cmple}
+@tab @code{vec_cmplt}
+@tab @code{vec_cmpne}
+@item @code{vec_cmpnez}
+@tab @code{vec_cntlz}
+@tab @code{vec_cntlz_lsbb}
+@item @code{vec_cnttz}
+@tab @code{vec_cnttz_lsbb}
+@tab @code{vec_cpsgn}
+@item @code{vec_ctf}
+@tab @code{vec_cts}
+@tab @code{vec_ctu}
+@item @code{vec_div}
+@tab @code{vec_double}
+@tab @code{vec_doublee}
+@item @code{vec_doubleh}
+@tab @code{vec_doublel}
+@tab @code{vec_doubleo}
+@item @code{vec_eqv}
+@tab @code{vec_expte}
+@tab @code{vec_extract}
+@item @code{vec_extract_exp}
+@tab @code{vec_extract_fp32_from_shorth}
+@tab @code{vec_extract_fp32_from_shortl}
+@item @code{vec_extract_sig}
+@tab @code{vec_extract_4b}
+@tab @code{vec_first_match_index}
+@item @code{vec_first_match_or_eos_index}
+@tab @code{vec_first_mismatch_index}
+@tab @code{vec_first_mismatch_or_eos_index}
+@item @code{vec_float}
+@tab @code{vec_float2}
+@tab @code{vec_floate}
+@item @code{vec_floato}
+@tab @code{vec_floor}
+@tab @code{vec_gb}
+@item @code{vec_insert}
+@tab @code{vec_insert_exp}
+@tab @code{vec_insert4b}
+@item @code{vec_ld}
+@tab @code{vec_lde}
+@tab @code{vec_ldl}
+@item @code{vec_loge}
+@tab @code{vec_madd}
+@tab @code{vec_madds}
+@item @code{vec_max}
+@tab @code{vec_mergee}
+@tab @code{vec_mergeh}
+@item @code{vec_mergel}
+@tab @code{vec_mergeo}
+@tab @code{vec_mfvscr}
+@item @code{vec_min}
+@tab @code{vec_mradds}
+@tab @code{vec_msub}
+@item @code{vec_msum}
+@tab @code{vec_msums}
+@tab @code{vec_mtvscr}
+@item @code{vec_mul}
+@tab @code{vec_mule}
+@tab @code{vec_mulo}
+@item @code{vec_nabs}
+@tab @code{vec_nand}
+@tab @code{vec_ncipher_be}
+@item @code{vec_ncipherlast_be}
+@tab @code{vec_nearbyint}
+@tab @code{vec_neg}
+@item @code{vec_nmadd}
+@tab @code{vec_nmsub}
+@tab @code{vec_nor}
+@item @code{vec_or}
+@tab @code{vec_orc}
+@tab @code{vec_pack}
+@item

[PATCH] doc: Update Power builtin documentation in user's manual

2021-04-16 Thread Bill Schmidt via Gcc-patches
The standard for many Power vector interfaces is now the recently
published Power Vector Intrinsics Programming Reference.  Reference
that document for the relevant interfaces, and remove redundant
information from the GCC user's manual.

2021-04-16  Bill Schmidt  

gcc/
* doc/extend.texi (PowerPC AltiVec/VSX Built-in Functions): Revise
this section and its subsections.
---
 gcc/doc/extend.texi | 2200 +--
 1 file changed, 232 insertions(+), 1968 deletions(-)

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 1ddafb3ff2c..3260f0639d2 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -18165,14 +18165,216 @@ Internally, GCC uses built-in functions to achieve 
the functionality in
 the aforementioned header file, but they are not supported and are
 subject to change without notice.
 
-GCC complies with the OpenPOWER 64-Bit ELF V2 ABI Specification,
+GCC complies with the Power Vector Intrinsic Programming Reference (PVIPR),
 which may be found at
-@uref{https://openpowerfoundation.org/?resource_lib=64-bit-elf-v2-abi-specification-power-architecture}.
-Appendix A of this document lists the vector API interfaces that must be
+@uref{https://openpowerfoundation.org/?resource_lib=power-vector-intrinsic-programming-reference}.
+Chapter 4 of this document fully documents the vector API interfaces
+that must be
 provided by compliant compilers.  Programmers should preferentially use
 the interfaces described therein.  However, historically GCC has provided
 additional interfaces for access to vector instructions.  These are
-briefly described below.
+briefly described below.  Where the PVIPR provides a portable interface,
+other functions in GCC that provide the same capabilities should be
+considered deprecated.
+
+The PVIPR documents the following overloaded functions:
+
+@multitable @columnfractions 0.33 0.33 0.33
+
+@item @code{vec_abs}
+@tab @code{vec_absd}
+@tab @code{vec_abss}
+@item @code{vec_add}
+@tab @code{vec_addc}
+@tab @code{vec_adde}
+@item @code{vec_addec}
+@tab @code{vec_adds}
+@tab @code{vec_all_eq}
+@item @code{vec_all_ge}
+@tab @code{vec_all_gt}
+@tab @code{vec_all_in}
+@item @code{vec_all_le}
+@tab @code{vec_all_lt}
+@tab @code{vec_all_nan}
+@item @code{vec_all_ne}
+@tab @code{vec_all_nge}
+@tab @code{vec_all_ngt}
+@item @code{vec_all_nle}
+@tab @code{vec_all_nlt}
+@tab @code{vec_all_numeric}
+@item @code{vec_and}
+@tab @code{vec_andc}
+@tab @code{vec_any_eq}
+@item @code{vec_any_ge}
+@tab @code{vec_any_gt}
+@tab @code{vec_any_le}
+@item @code{vec_any_lt}
+@tab @code{vec_any_nan}
+@tab @code{vec_any_ne}
+@item @code{vec_any_nge}
+@tab @code{vec_any_ngt}
+@tab @code{vec_any_nle}
+@item @code{vec_any_nlt}
+@tab @code{vec_any_numeric}
+@tab @code{vec_any_out}
+@item @code{vec_avg}
+@tab @code{vec_bperm}
+@tab @code{vec_ceil}
+@item @code{vec_cipher_be}
+@tab @code{vec_cipherlast_be}
+@tab @code{vec_cmpb}
+@item @code{vec_cmpeq}
+@tab @code{vec_cmpge}
+@tab @code{vec_cmpgt}
+@item @code{vec_cmple}
+@tab @code{vec_cmplt}
+@tab @code{vec_cmpne}
+@item @code{vec_cmpnez}
+@tab @code{vec_cntlz}
+@tab @code{vec_cntlz_lsbb}
+@item @code{vec_cnttz}
+@tab @code{vec_cnttz_lsbb}
+@tab @code{vec_cpsgn}
+@item @code{vec_ctf}
+@tab @code{vec_cts}
+@tab @code{vec_ctu}
+@item @code{vec_div}
+@tab @code{vec_double}
+@tab @code{vec_doublee}
+@item @code{vec_doubleh}
+@tab @code{vec_doublel}
+@tab @code{vec_doubleo}
+@item @code{vec_eqv}
+@tab @code{vec_expte}
+@tab @code{vec_extract}
+@item @code{vec_extract_exp}
+@tab @code{vec_extract_fp32_from_shorth}
+@tab @code{vec_extract_fp32_from_shortl}
+@item @code{vec_extract_sig}
+@tab @code{vec_extract_4b}
+@tab @code{vec_first_match_index}
+@item @code{vec_first_match_or_eos_index}
+@tab @code{vec_first_mismatch_index}
+@tab @code{vec_first_mismatch_or_eos_index}
+@item @code{vec_float}
+@tab @code{vec_float2}
+@tab @code{vec_floate}
+@item @code{vec_floato}
+@tab @code{vec_floor}
+@tab @code{vec_gb}
+@item @code{vec_insert}
+@tab @code{vec_insert_exp}
+@tab @code{vec_insert4b}
+@item @code{vec_ld}
+@tab @code{vec_lde}
+@tab @code{vec_ldl}
+@item @code{vec_loge}
+@tab @code{vec_madd}
+@tab @code{vec_madds}
+@item @code{vec_max}
+@tab @code{vec_mergee}
+@tab @code{vec_mergeh}
+@item @code{vec_mergel}
+@tab @code{vec_mergeo}
+@tab @code{vec_mfvscr}
+@item @code{vec_min}
+@tab @code{vec_mradds}
+@tab @code{vec_msub}
+@item @code{vec_msum}
+@tab @code{vec_msums}
+@tab @code{vec_mtvscr}
+@item @code{vec_mul}
+@tab @code{vec_mule}
+@tab @code{vec_mulo}
+@item @code{vec_nabs}
+@tab @code{vec_nand}
+@tab @code{vec_ncipher_be}
+@item @code{vec_ncipherlast_be}
+@tab @code{vec_nearbyint}
+@tab @code{vec_neg}
+@item @code{vec_nmadd}
+@tab @code{vec_nmsub}
+@tab @code{vec_nor}
+@item @code{vec_or}
+@tab @code{vec_orc}
+@tab @code{vec_pack}
+@item @code{vec_pack_to_short_fp32}
+@tab @code{vec_packpx}
+@tab @code{vec_packs}
+@item @code{vec_packsu}
+@tab @code{vec_parity_lsbb}
+@tab @code{vec_perm}
+@item 

Re: [PATCH] Fix logic error in 32-bit trampolines, PR target/98952

2021-04-12 Thread will schmidt via Gcc-patches
On Fri, 2021-04-09 at 17:09 -0400, Michael Meissner wrote:
> Fix logic error in 32-bit trampolines, PR target/98952.
> 
> The test in the PowerPC 32-bit trampoline support is backwards.  It aborts
> if the trampoline size is greater than the expected size.  It should abort
> when the trampoline size is less than the expected size.
> 
> I verified this by creating a 32-bit trampoline program and manually
> changing the size of the trampoline to be 48 instead of 40.  The program
> aborted with the larger size.  I updated this code and ran the test again
> and it passed.
> 
> I did a bootstrap build on a big endian power8 system that supports both
> 32-bit and 64-bit executables, and there were no regressions.  Can I check
> this patch into the trunk?
> 
> libgcc/
> 2021-04-09  Michael Meissner  
> 
>   PR target/98952
>   * config/rs6000/tramp.S (__trampoline_setup): Fix trampoline size
>   comparison in 32-bit.
> ---
>  libgcc/config/rs6000/tramp.S | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/libgcc/config/rs6000/tramp.S b/libgcc/config/rs6000/tramp.S
> index 4236a82b402..6b61d892da6 100644
> --- a/libgcc/config/rs6000/tramp.S
> +++ b/libgcc/config/rs6000/tramp.S
> @@ -64,8 +64,7 @@ FUNC_START(__trampoline_setup)
>  mflr r11
>  addi r7,r11,trampoline_initial-4-.LCF0 /* trampoline address -4 */
> 
> - li  r8,trampoline_size  /* verify that the trampoline is big 
> enough */
> - cmpwcr1,r8,r4
> + cmpwi   cr1,r4,trampoline_size  /* verify that the trampoline is big 
> enough */


Hmm, I spent several minutes trying to determine how cmpw behaves
differently than cmpwi before noticing you also swapped the
order of the r4,r8 operands. 

That seems OK.

A statement in the description indicating that you used a cmpwi instead
of a cmpw since you were in the neighborhood would help call that out. 


The #elif  _CALL_ELF == 2  portion of tramp.S (line 159 or so) has a
similar compare stanza with respect to the order of operands on the
compare.  Will this also have a backwards greater-than less-than issue?

li  r8,trampoline_size  /* verify that the trampoline is big 
enough */
cmpwcr1,r8,r4
srwir4,r4,3 /* # doublewords to move */
addir9,r3,-8/* adjust pointer for stdu */
mtctr   r4
blt cr1,.Labort




thanks
-Will


>   srwir4,r4,2 /* # words to move */
>   addir9,r3,-4/* adjust pointer for lwzu */
>   mtctr   r4
> -- 
> 2.22.0
> 
> 



Re: [PATCH 2/2] Add IEEE 128-bit min/max support on PowerPC

2021-04-09 Thread will schmidt via Gcc-patches
On Fri, 2021-04-09 at 10:43 -0400, Michael Meissner wrote:
> Add IEEE 128-bit fp conditional move on PowerPC.
> 
> This patch has been posted various times in the past.  My memory is the last
> time I changed the patch, I addressed the concerns posted at that time.  Since
> then the patch seems to have gone into a limbo state.

This is covered in the patch series title page, Don't distract from the
patch itself here.

> 
> This patch adds the support for power10 IEEE 128-bit floating point 
> conditional
> move and for automatically generating min/max.  Unlike the previous patch, I
> decided to keep two separate patterns for fpmask before splitting (one pattern
> for normal compares, and the other pattern for inverted compares).  I can go
> back to a single pattern with a new predicate that allows either comparison.

ok.

> 
> Compared to the original code, these patterns do simplify the fpmask insns to
> having one alternative instead of two.  In the original code, the first
> alternative tried to use the result as a temporary register.  But that doesn't
> work if you are doing a conditional move with SF/DF types, but the comparison
> is KF/TF.  That is because the SF/DF types can use the traditional FPR
> registers, but IEEE 128-bit floating point can only do arithmetic in the
> traditional Altivec registers.

ok.

> 
> This code also has to insert a XXPERMDI if you are moving KF/TF values, but
> the comparison is done with SF/DF values.  In this case, the set and compare
> mask for SF/DF clears the bottom 64-bits of the register, and the XXPERMDI is
> needed to fill it.

ok.

> 
> I have tested this patch in various little endian and big endian PowerPC 
> builds
> since I've posted.  It has no regressions, and it adds the instructions  if
> -mcpu=power10 is used.
> 
> gcc/
> 2021-04-09 Michael Meissner  
> 
> * config/rs6000/rs6000.c (have_compare_and_set_mask): Add IEEE
> 128-bit floating point types.

ok

> * config/rs6000/rs6000.md (FPMASK): New iterator.
> (FPMASK2): New iterator.

comment on this below.


> (Fv mode attribute): Add KFmode and TFmode.

ok


Missing an entry?  I'm not certain I've followed changelog versus code
accurately here.  May need an additional entry, stl
(movcc): Replace SFDF with FPMASK



> (movcc_fpmask): Replace
> movcc_p9.  Add IEEE 128-bit fp support.
> (movcc_invert_fpmask): Replace
> movcc_invert_p9.  Add IEEE 128-bit fp
> support.
> (fpmask): Add IEEE 128-bit fp support.  Enable generator to
> build te RTL.
> (xxsel): Add IEEE 128-bit fp support.  Enable generator to
> build te RTL.
ok


> 
> gcc/testsuite/
> 2021-04-09  Michael Meissner  
> 
> * gcc.target/powerpc/float128-cmove.c: New test.
> * gcc.target/powerpc/float128-minmax-3.c: New test.
> ---
>  gcc/config/rs6000/rs6000.c|   8 +-
>  gcc/config/rs6000/rs6000.md   | 186 --
>  .../gcc.target/powerpc/float128-cmove.c   |  93 +
>  .../gcc.target/powerpc/float128-minmax-3.c|  15 ++
>  4 files changed, 236 insertions(+), 66 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/float128-cmove.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/float128-minmax-3.c
> 
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index e87686c1c4d..ad0d83f6d3f 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -15706,8 +15706,8 @@ rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, 
> rtx op_false,
>return 1;
>  }
> 
> -/* Possibly emit the xsmaxcdp and xsmincdp instructions to emit a maximum or
> -   minimum with "C" semantics.
> +/* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
> +   maximum or minimum with "C" semantics.
> 
> Unless you use -ffast-math, you can't use these instructions to replace
> conditions that implicitly reverse the condition because the comparison
> @@ -15843,6 +15843,10 @@ have_compare_and_set_mask (machine_mode mode)
>  case E_DFmode:
>return TARGET_P9_MINMAX;
> 
> +case E_KFmode:
> +case E_TFmode:
> +  return FLOAT128_MIN_MAX_FPMASK_P (mode);
> +
>  default:
>break;
>  }
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 17b2fdc1cdd..ca4a4d01f05 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -575,6 +575,19 @@ (define_mode_iterator SFDF [SF DF])
>  ; And again, for when we need two FP modes in a pattern.
>  (define_mode_iterator SFDF2 [SF DF])
> 
> +; Floating scalars that supports the set compare mask instruction.
> +(define_mode_iterator FPMASK [SF
> +   DF
> +   (KF "FLOAT128_MIN_MAX_FPMASK_P (KFmode)")
> +   (TF "FLOAT128_MIN_MAX_FPMASK_P (TFmode)")])
> +
> +; And again, for patterns that need two (potentially) 

Re: [PATCH 1/2] Add IEEE 128-bit min/max support on PowerPC

2021-04-09 Thread will schmidt via Gcc-patches
On Fri, 2021-04-09 at 10:42 -0400, Michael Meissner wrote:
> Add IEEE 128-bit min/max support on PowerPC.
> 
> This patch has been posted various times in the past.  My memory is the last
> time I changed the patch, I addressed the concerns posted at that time.  Since
> then the patch seems to have gone into a limbo state.

Hi,

I'll throw some comments at this below, and see if it will trigger more
follow-up. 

> 
> This patch adds the support for the IEEE 128-bit floating point C minimum and
> maximum instructions.  The next patch will add the support for using the
> compare and set mask instruction to implement conditional moves.
> 
> Rather than trying to overload the current SF/DF min/max support, it was
> simpler to just provide the new instructions as a separate insn.
> 
> I have tested this patch in various little endian and big endian PowerPC 
> builds
> since I've posted.  It has no regressions, and it adds the instructions  if
> -mcpu=power10 is used.
> 
> gcc/
> 2021-04-09  Michael Meissner  
> 
>   * config/rs6000/rs6000.c (rs6000_emit_minmax): Add support for ISA
>   3.1 IEEE 128-bit floating point xsmaxcqp and xsmincqp instructions.

I don't see any direct reference to xsmaxcqp or xsmincqp with respect
to this change below. 

It looks like this change adds the FLOAT128_MIN_MAX_FPMASK_P (mode)
check
as criteria for emitting some form of a SET instruction. 
   emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));

Ok, I see it now,  the instructions are mildly obfuscated by
"xscqp" as part of the rs6000.md change below.




>   * config/rs6000/rs6.h (FLOAT128_MIN_MAX_FPMASK_P): New macro.

which is
#define FLOAT128_MIN_MAX_FPMASK_P(MODE) \
  (TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (MODE))

Are there any non MIN_MAX scenarios that will require the combination
of POWER10,FLOAT128_HW,FLOAT128_IEEE(mode)?  I'd wonder if there is a name
not specific to *_MIN_MAX_* that would be a better naming choice.
But, naming is hard. :-)


>   * config/rs6000/rs6000.md (s3): Add support for the
>   ISA 3.1 IEEE 128-bit minimum and maximum instructions.

I'd move the "xsmaxcqp,xsmincqp" instruction references from the rs6000.c 
changelog blurb to this changelog blurb.

I've looked over the rest, no further relevant comments below.
thanks
-Will

> 
> gcc/testsuite/
> 2021-04-09  Michael Meissner  
> 
>   * gcc.target/powerpc/float128-minmax-2.c: New test.
> ---
>  gcc/config/rs6000/rs6000.c|  3 ++-
>  gcc/config/rs6000/rs6000.h|  5 +
>  gcc/config/rs6000/rs6000.md   | 11 +++
>  .../gcc.target/powerpc/float128-minmax-2.c| 15 +++
>  4 files changed, 33 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/float128-minmax-2.c
> 
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 35f5c332c41..e87686c1c4d 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -16111,7 +16111,8 @@ rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx 
> op0, rtx op1)
>/* VSX/altivec have direct min/max insns.  */
>if ((code == SMAX || code == SMIN)
>&& (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
> -   || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode
> +   || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))
> +   || FLOAT128_MIN_MAX_FPMASK_P (mode)))
>  {
>emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
>return;

ok


> diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
> index 233a92baf3c..e3fb0798622 100644
> --- a/gcc/config/rs6000/rs6000.h
> +++ b/gcc/config/rs6000/rs6000.h
> @@ -345,6 +345,11 @@ extern const char *host_detect_local_cpu (int argc, 
> const char **argv);
> || ((MODE) == TDmode) \
> || (!TARGET_FLOAT128_TYPE && FLOAT128_IEEE_P (MODE)))
> 
> +/* Macro whether the float128 minimum, maximum, and set compare mask
> +   instructions are enabled.  */
> +#define FLOAT128_MIN_MAX_FPMASK_P(MODE)  
> \
> +  (TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (MODE))
> +
>  /* Return true for floating point that does not use a vector register.  */
>  #define SCALAR_FLOAT_MODE_NOT_VECTOR_P(MODE) \
>(SCALAR_FLOAT_MODE_P (MODE) && !FLOAT128_VECTOR_P (MODE))


ok

> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index c8cdc42533c..17b2fdc1cdd 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -5194,6 +5194,17 @@ (define_insn "*s3_vsx"
>  }
>[(set_attr "type" "fp")])
> 
> +;; Min/max for ISA 3.1 IEEE 128-bit floating point
> +(define_insn "s3"
> +  [(set (match_operand:IEEE128 0 "altivec_register_operand" "=v")
> + (fp_minmax:IEEE128
> +  (match_operand:IEEE128 1 "altivec_register_operand" "v")
> +  

Re: [PATCH 1/2, rs6000] Add const_anchor for rs6000 [PR33699]

2021-03-18 Thread will schmidt via Gcc-patches
On Thu, 2021-03-18 at 09:21 +0800, HAO CHEN GUI wrote:
> David & Segher,
> 
> Thanks so much for your explanation. My patch wants to enables the 
> constant anchor on rs6000 as TARGET_ANCHOR_CONST or targetm.anchor_const 
> is undefined. I realized that we have addi and addis instructions. So 
> the range of the offset could be a 32 bit constant.
> 
> I put a test case at 
> https://github.ibm.com/wschmidt/power-gcc/issues/1042#issuecomment-28922825. 
> It shows how anchor_const can improve asm output. With anchor_const, the 
> second complex constant loading can be eliminated by cse if it is within 
> the range of the first one.

I think about 99.9% of the community won't be able to reach that link. 
If progress on this issue requires additional eyes on the testcase you
may need to provide the test case here.

Thanks
-Will


> 
>Thanks again and looking forward to your advice.
> 
> On 18/3/2021 上午 8:57, David Edelsohn wrote:
> > On Wed, Mar 17, 2021 at 8:26 PM Segher Boessenkool
> >  wrote:
> > > Hi!
> > > 
> > > On Wed, Mar 17, 2021 at 03:35:30PM -0400, David Edelsohn wrote:
> > > > I disagree with your new definitions and I disagree with the manner in
> > > > which you are trying to change the values.
> > > 
> > > Yes.
> > > 
> > > > Your patch is NOT okay without a lot more explanation and justification.
> > > 
> > > Which is why I said:
> > > 
> > > > > > 1) This isn't suitable for stage 4.
> > > 
> > > You give a lot more reasons to not want it, but that was enough for me.
> > > 
> > > > > > 2) Please add a test case, which shows what it does, that it is 
> > > > > > useful.
> > > 
> > > I meant there is no way we can accept this patch if we aren't shown what
> > > it does, and that that is a good thing.
> > > 
> > > > > > 3) Does this work on other OSes than Linux?  What about Darwin and 
> > > > > > AIX?
> > > 
> > > And here I meant that there is no way we can accept patches that
> > > influence code generation on all platforms when we have no idea what it
> > > does on most platforms.  I did not intend to suggest the patch would be
> > > more acceptable if it was tested on other platforms; I wanted to say it
> > > is not acceptable if it is not.
> > > 
> > > The main issue is 2).  We need to understand what problem this patch is
> > > trying to solve.  I'm sure Hao Chen had a reason for doing this patch,
> > > so I'd like to know what it is trying to achieve, what it is trying to
> > > improve!
> > 
> > Investigating this with Segher, I believe that there is some confusion
> > about the "ANCHOR" macros.
> > 
> > TARGET_MIN_ANCHOR_OFFSET and TARGET_MAX_ANCHOR_OFFSET are not related
> > to TARGET_ANCHOR_CONST.
> > 
> > Also, TARGET_ANCHOR_CONST can be defined as a macro to trigger the
> > hook, and doesn't need targetm.anchor_const.
> > 
> > Any change to TARGET_ANCHOR_CONST requires extensive performance
> > testing.  Yes, it presumably fixes the testcase, but the impact on
> > overall performance is the critical question.
> > 
> > Thanks, David



Re: [PATCH, rs6000 V2] Update "prefix" attribute for Power10 [PR99133]

2021-03-18 Thread will schmidt via Gcc-patches
On Wed, 2021-03-17 at 15:49 -0500, Pat Haugen via Gcc-patches wrote:
> Update prefixed attribute for Power10.
> 
> This patch creates a new attribute, prepend_prefixed_insn, which is
> used to mark
> those instructions that are prefixed and need to have a 'p' prepended
> to their
> mnemonic at asm emit time. The existing "prefix" attribute is now
> used to mark
> all instructions that are prefixed form.
> 
> Bootstrap/regtest on powerpc64le (Power10) and powerpc64 (Power8
> 32/64) with no
> new regressions. Ok for trunk?
> 
> -Pat
> 
> 
> 2021-03-17  Pat Haugen  
> 
> gcc/
>   PR target/99133
>   * config/rs6000/altivec.md (xxspltiw_v4si, xxspltiw_v4sf_inst,
>   xxspltidp_v2df_inst, xxsplti32dx_v4si_inst,
> xxsplti32dx_v4sf_inst,
>   xxblend_, xxpermx_inst, xxeval): Mark prefixed.
>   * config/rs6000/mma.md (mma_, mma_,
>   mma_, mma_, mma_, mma_,
>   mma_, mma_, mma_, mma_):
>   Likewise.
>   * config/rs6000/pcrel-opt.md: Adjust attribute name.
>   * config/rs6000/rs6000.c (rs6000_final_prescan_insn): Adjust
> test. 
>   * config/rs6000/rs6000.md (define_attr
> "prepend_prefixed_insn"): New.
>   (define_attr "prefixed"): Update initializer.
>   (*tls_gd_pcrel, *tls_ld_pcrel, tls_dtprel_,
>   tls_tprel_, *tls_got_tprel_pcrel_,
> *pcrel_local_addr,
>   *pcrel_extern_addr, stack_protect_setdi, stack_protect_testdi):
>   Adjust attribute name.
>   * config/rs6000/sync.md (load_quadpti, store_quadpti):
> Likewise.
> 
> 


Changelog matches patch contents.  (ok!) :-)

Per this change:

+;; Whether an insn is a prefixed insn.  A prefixed instruction has a prefix
+;; instruction word that conveys additional information such as a larger
+;; immediate, additional operands, etc., in addition to the normal instruction
+;; word.  The default "length" attribute will also be adjusted by default to
+;; be 12 bytes.
+(define_attr "prefixed" "no,yes"
+  (if_then_else (eq_attr "prepend_prefixed_insn" "yes")
+   (const_string "yes")
+   (const_string "no")))


.. it looks like at least most of the users of the "prefixed" attribute have
been switched over to use "prepend_prefixed_insn" instead.   Are there still
users of the "prefixed" attribute remaining ?  I'm guessing so, given context,
but can't tell for certain.

(Just a question, not a specific request for a change)

lgtm

thanks
-Will







Re: [PATCH 1/2, rs6000] Add const_anchor for rs6000 [PR33699]

2021-03-16 Thread will schmidt via Gcc-patches
On Mon, 2021-03-15 at 11:11 +0800, HAO CHEN GUI via Gcc-patches wrote:
> Hi,
> 
>  This patch adds const_anchor for rs6000. The const_anchor is
> used 
> in cse pass.
> 
>  The attachment are the patch diff and change log file.
> 
>  Bootstrapped and tested on powerpc64le with no regressions. Is
> this 
> okay for trunk? Any  recommendations? Thanks a lot.
> 


> * config/rs6000/rs6000.c (rs6000_option_override_internal): Set
> targetm.const_anchor, targetm.min_anchor_offset
> and targetm.max_anchor_offset.


Part two of my review (i missed this first time)..  :-)

Some variation of "PR Target/33699 " should be included as part of the
changelog blurb.


Thanks
-WIll



Re: [PATCH 1/2, rs6000] Add const_anchor for rs6000 [PR33699]

2021-03-16 Thread will schmidt via Gcc-patches
On Mon, 2021-03-15 at 11:11 +0800, HAO CHEN GUI via Gcc-patches wrote:
> Hi,
> 
>  This patch adds const_anchor for rs6000. The const_anchor is used 
> in cse pass.
> 
>  The attachment are the patch diff and change log file.
> 
>  Bootstrapped and tested on powerpc64le with no regressions. Is this 
> okay for trunk? Any  recommendations? Thanks a lot.


Be sure to CC David and Segher to help ensure they see the arch
specific patch. :-) 
> 

> * config/rs6000/rs6000.c (rs6000_option_override_internal): Set
> targetm.const_anchor, targetm.min_anchor_offset
> and targetm.max_anchor_offset.


lgtm.


> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index ec068c58aa5..2b2350c53ae 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -4911,6 +4911,13 @@ rs6000_option_override_internal (bool global_init_p)
>  warning (0, "%qs is deprecated and not recommended in any circumstances",
>"-mno-speculate-indirect-jumps");
>  
> +  if (TARGET_64BIT)
> +{
> +  targetm.min_anchor_offset = -32768;
> +  targetm.max_anchor_offset = 32767;
> +  targetm.const_anchor = 0x8000;
> +}
> +


The mix of decimal and hexadecimal notation catches my eye, but 
this matches the style I see for other architectures, mips in
particular.

Do we want/need to explicitly set the values for !TARGET_64BIT ?   (I
can't immediately tell what the default values are).

lgtm.


>return ret;
>  }
>  



Re: [PATCH] rs6000: Use rldimi for vec init instead of shift + ior

2021-02-18 Thread will schmidt via Gcc-patches
On Wed, 2021-02-03 at 14:37 +0800, Kewen.Lin via Gcc-patches wrote:
> Hi,
> 

Hi,


> This patch merges the previously approved one[1] and its relied patch


I don't see the review for [1] in the archives.  


> made by Segher here[2], it's to make unsigned int vector init go with
> rldimi to merge two integers instead of shift and ior.
> 
> Segher's patch in [2] is required to make the test case pass,
> otherwise the costing for new pseudo-to-pseudo copies and the folding
> with nonzero_bits in combine will make the rl*imi pattern become
> compact and split into ior and shift unexpectedly.
> 
> The commit log of Segher's patch describes it in more details:
> 
> "An rl*imi is usually written as an IOR of an ASHIFT or similar, and an
> AND of a register with a constant mask.  In some cases combine knows
> that that AND doesn't do anything (because all zero bits in that mask
> correspond to bits known to be already zero), and then no pattern
> matches.  This patch adds a define_split for such cases.  It uses
> nonzero_bits in the condition of the splitter, but does not need it
> afterwards for the instruction to be recognised.  This is necessary
> because later passes can see fewer nonzero_bits.
> 
> Because it is a splitter, combine will only use it when starting with
> three insns (or more), even though the result is just one.  This isn't
> a huge problem in practice, but some possible combinations still won't
> happen."
> 
> Bootstrapped/regtested on powerpc64le-linux-gnu P9 and
> powerpc64-linux-gnu P8, also SPEC2017 build/run passed on P9.
> 
> Is it ok for trunk?
> 
> BR,
> Kewen
> 
> [1] https://gcc.gnu.org/pipermail/gcc-patches/2020-December/562407.html
> [2] https://gcc.gnu.org/pipermail/gcc-patches/2021-January/563526.html
> 
> 
> gcc/ChangeLog:
> 
> 2020-02-03  Segher Boessenkool  
>   Kewen Lin  
> 
>   * config/rs6000/rs6000.md (*rotl3_insert_3): Renamed to...
>   (rotl3_insert_3): ...this.

ok

>   (plus_ior_xor): New code_iterator.
>   (define_split for GPR rl*imi): New splitter.

As described above, these two changes appear to identical to what was
posted by Segher in [1].

(
>   * config/rs6000/vsx.md (vsx_init_v4si): Use gen_rotldi3_insert_3
>   for integer merging.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/powerpc/vec-init-10.c: New test.

+/* { dg-final { scan-assembler-not "or" } } */

As is, it looks OK to me.  Per other reviews i've gotten, you may
get a request to wrap the "or" with \m \M .
Some existing cases with
scan-assembler-not have a leading whitespace/tab qualifier too. 
i.e.
/* { dg-final { scan-assembler-not "\[ \t\]or "  } } */

Thanks,
-Will

> 
> -
> 



Re: [PATCH] rs6000: Convert the vector element register to SImode [PR98914]

2021-02-18 Thread will schmidt via Gcc-patches
On Wed, 2021-02-03 at 03:01 -0600, Xionghu Luo via Gcc-patches wrote:

Hi,

> v[k] will also be expanded to IFN VEC_SET if k is long type when
> built
> with -Og.  -O0 didn't exposed the issue due to v is TREE_ADDRESSABLE,
> -O1 and above also didn't capture it because of v[k] is not optimized
> to
> VIEW_CONVERT_EXPR(v)[k_1].
> vec_insert defines the element argument type to be signed int by
> ELFv2
> ABI, so convert it to SImode if it wasn't for Power target
> requirements.

The intro paragraph seems to start mid sentence.  Did something get cut
off?
The description here is specific to the reported testcase failure. 
This should describe the patch behavior instead.  Something like 
"When
expanding a vector with a variable rtx, the rtx type needs to be SI"
...
(I defer to any other suggestions of better or improved wording).


> 
> gcc/ChangeLog:


Reference "PR target/98914" somewhere in here.


> 
> 2021-02-03  Xionghu Luo  
> 
>   * config/rs6000/rs6000.c (rs6000_expand_vector_set): Convert
>   elt_rtx to SImode if it wasn't.

s/if it wasn't//


> 
> gcc/testsuite/ChangeLog:
> 
> 2021-02-03  Xionghu Luo  
> 
>   * gcc.target/powerpc/pr98914.c: New test.
> ---
>  gcc/config/rs6000/rs6000.c | 17 ++---
>  gcc/testsuite/gcc.target/powerpc/pr98914.c | 11 +++
>  2 files changed, 21 insertions(+), 7 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr98914.c
> 
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index ec068c58aa5..9f7f8da56c6 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -7000,8 +7000,6 @@ rs6000_expand_vector_set_var_p9 (rtx target,
> rtx val, rtx idx)
> 
>gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
> 
> -  gcc_assert (GET_MODE (idx) == E_SImode);
> -
>machine_mode inner_mode = GET_MODE (val);
> 
>rtx tmp = gen_reg_rtx (GET_MODE (idx));


This needs a changelog blurb.


> @@ -7047,8 +7045,6 @@ rs6000_expand_vector_set_var_p8 (rtx target,
> rtx val, rtx idx)
> 
>gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
> 
> -  gcc_assert (GET_MODE (idx) == E_SImode);
> -
>machine_mode inner_mode = GET_MODE (val);
>HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);


Same.

> 
> @@ -7144,7 +7140,7 @@ rs6000_expand_vector_set (rtx target, rtx val,
> rtx elt_rtx)
>machine_mode mode = GET_MODE (target);
>machine_mode inner_mode = GET_MODE_INNER (mode);
>rtx reg = gen_reg_rtx (mode);
> -  rtx mask, mem, x;
> +  rtx mask, mem, x, elt_si;
>int width = GET_MODE_SIZE (inner_mode);
>int i;
> 
> @@ -7154,16 +7150,23 @@ rs6000_expand_vector_set (rtx target, rtx
> val, rtx elt_rtx)
>  {
>if (!CONST_INT_P (elt_rtx))
>   {
> +   /* elt_rtx should be SImode from ELFv2 ABI.  */
> +   elt_si = gen_reg_rtx (E_SImode);
> +   if (GET_MODE (elt_rtx) != E_SImode)
> + convert_move (elt_si, elt_rtx, 0);
> +   else
> + elt_si = elt_rtx;
> +

ok.



> /* For V2DI/V2DF, could leverage the P9 version to generate
> xxpermdi
>when elt_rtx is variable.  */
> if ((TARGET_P9_VECTOR && TARGET_POWERPC64) || width == 8)
>   {
> -   rs6000_expand_vector_set_var_p9 (target, val, elt_rtx);
> +   rs6000_expand_vector_set_var_p9 (target, val, elt_si);
> return;
>   }
> else if (TARGET_P8_VECTOR && TARGET_DIRECT_MOVE_64BIT)
>   {
> -   rs6000_expand_vector_set_var_p8 (target, val, elt_rtx);
> +   rs6000_expand_vector_set_var_p8 (target, val, elt_si);
> return;
>   }
>   }
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr98914.c
> b/gcc/testsuite/gcc.target/powerpc/pr98914.c
> new file mode 100644
> index 000..e4d78e3e6b3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr98914.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target powerpc_p8vector_ok } */
> +/* { dg-options "-Og -mvsx" } */
> +
> +vector int
> +foo (vector int v)
> +{
> +  for (long k = 0; k < 1; ++k)
> +v[k] = 0;
> +  return v;
> +}
ok

thanks
-Will



Re: [PATCH, rs6000] Optimization for PowerPC 64bit constant generation [PR94395]

2021-01-29 Thread will schmidt via Gcc-patches
On Fri, 2021-01-29 at 11:11 +0800, HAO CHEN GUI via Gcc-patches wrote:
> Hi,
> 

Hi,
  just a couple cosmetic nits below.
Thanks,


> This patch tries to optimize PowerPC 64 bit constant generation
> when 
> the constant can be transformed from a 32 bit or 16 bit constant by 
> rotating, shifting and mask AND.

Presumably this *does* optimize the constant generation. :-) 
s/tries to optimize/optimizes/




> 
> The attachments are the patch diff file and change log file.
> 
> Bootstrapped and tested on powerpc64le with no regressions. Is
> this 
> okay for trunk? Any  recommendations? Thanks a lot.
>

 

>PR target/94395
>* config/rs6000/rs6000.c (rs6000_emit_set_32bit_const,
>rs6000_rotate_long_const, rs6000_peel_long_const): New functions.
>(rs6000_emit_set_long_const, num_insns_constant_gpr): Call new
>functions.
>* testsuite/gcc.target/powerpc/pr94395.c: New test.


ok



On Fri, 2021-01-29 at 11:11 +0800, HAO CHEN GUI via Gcc-patches wrote:
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index f26fc13484b..bcb867ffe94 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -1109,6 +1109,9 @@ static tree rs6000_handle_longcall_attribute (tree *, 
> tree, tree, int, bool *);
>  static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool 
> *);
>  static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
>  static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
> +static HOST_WIDE_INT rs6000_rotate_long_const (unsigned HOST_WIDE_INT, int 
> *);
> +static HOST_WIDE_INT rs6000_peel_long_const (unsigned HOST_WIDE_INT, int *,
> +  int *);
>  static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
>  static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
>  static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, 
> bool);
> @@ -5868,12 +5871,28 @@ num_insns_constant_gpr (HOST_WIDE_INT value)
>  
>else if (TARGET_POWERPC64)
>  {
> +  int rotate, head, tail;
> +  HOST_WIDE_INT imm1, imm2;
> +  unsigned HOST_WIDE_INT uc = value;
>HOST_WIDE_INT low  = ((value & 0x) ^ 0x8000) - 0x8000;
>HOST_WIDE_INT high = value >> 31;
>  
>if (high == 0 || high == -1)
>   return 2;
>  
> +  /* A long constant can be transformed from both a 16 bit constant and
> +  a 32 bit constant. So we first test imm1 and imm2 if they're 16
> +  bit.  */
> +  imm1 = rs6000_rotate_long_const (uc, );
> +  if (SIGNED_INTEGER_16BIT_P (imm1))
> + return 2;
> +  imm2 = rs6000_peel_long_const (uc, , );
> +  if (SIGNED_INTEGER_16BIT_P (imm2))
> + return 2;
> +  if (SIGNED_INTEGER_NBIT_P (imm1, 32)
> +   || SIGNED_INTEGER_NBIT_P (imm2, 32))
> + return 3;
> +

Ok.


>high >>= 1;
>  
>if (low == 0 || low == high)
> @@ -9720,6 +9739,96 @@ rs6000_emit_set_const (rtx dest, rtx source)
>return true;
>  }
>  
> +/* Function to load 32 a bit constant.  */
> +static void
> +rs6000_emit_set_32bit_const (rtx dest, HOST_WIDE_INT c)
> +{
> +  gcc_assert (SIGNED_INTEGER_NBIT_P (c, 32));
> +
> +  rtx temp = can_create_pseudo_p () ? gen_reg_rtx (DImode) : dest;
> +
> +  if (SIGNED_INTEGER_16BIT_P (c))
> +emit_insn (gen_rtx_SET (dest, GEN_INT (c)));
> +  else
> +{
> +  emit_insn (gen_rtx_SET (copy_rtx (temp),
> +  GEN_INT (c & ~(HOST_WIDE_INT) 0x)));
> +  emit_insn (gen_rtx_SET (dest,
> +   gen_rtx_IOR (DImode, copy_rtx (temp),
> +GEN_INT (c & 0x;
> +}
> +}

ok


> +
> +/* Helper function of rs6000_emit_set_long_const to left rotate a long
> +   constant. It returns the result immediately when it finds a 32 bit
> +   constant. It at most rotates for 31 bits.
> +   For instant, the constant 0x1234 can be transformed to
> +   a 32 bit constant 0x4123 by left rotating 12 bits.  */
> +static HOST_WIDE_INT
> +rs6000_rotate_long_const (unsigned HOST_WIDE_INT c, int *rot)
> +{
> +  int bitsize = GET_MODE_BITSIZE (DImode);
> +  bool found = false;
> +  unsigned HOST_WIDE_INT imm = c;
> +  unsigned HOST_WIDE_INT m = imm >> (bitsize - 1);
> +  int rotate = 0;
> +
> +  while (rotate < 31 && !found)
> +{
> +  imm = imm << 1 | m;
> +  if (clz_hwi (imm) > 32 || clz_hwi (~imm) > 32)
> + found = true;
> +  rotate++;
> +  m = imm >> (bitsize - 1);
> +}
> +
> +  *rot = rotate;
> +  return imm;
> +}

ok.

> +
> +/* Helper function of rs6000_emit_set_long_const to reutrn a constant by

return

> +   removing consecutive 0s and 1s at the head and tail then setting all high
> +   bits.
> +   For instance, 0x00fff2345000 can be transformed to 0xfff2345 by
> +   peeling the head and tail,  then to 0x234 by setting all
> +   high bits.

You lost a 

[committed] [rs6000] Fix typo in gcc.target/pr91903.c dg-require stanza

2021-01-29 Thread will schmidt via Gcc-patches
[PATCH, rs6000] Fix typo in gcc.target/pr91903.c dg-require stanza

Hi,
  I somehow messed up when I tested this change..  Committed as obvious, also 
had pre-approval blessing per offline discussion.


Fix obvious typo in testcases dg-require stanza.

2021-01-29  Will Schmidt 

testsuite/ChangeLog:
* gcc.target/powerpc/pr91903.c: Fix dg-require stanza.

diff --git a/gcc/testsuite/gcc.target/powerpc/pr91903.c 
b/gcc/testsuite/gcc.target/powerpc/pr91903.c
index efd217e..3045d07 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr91903.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr91903.c
@@ -1,5 +1,5 @@
 /* { dg-do compile */
-/* { dg-require-effective-target p8vector_ok } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
 /* { dg-options "-mdejagnu-cpu=power8" } */
 
 #include 



Re: [PATCH] Add conversions between _Float128 and Decimal.

2021-01-29 Thread will schmidt via Gcc-patches
On Thu, 2021-01-28 at 21:42 -0500, Michael Meissner via Gcc-patches wrote:
> [PATCH] Add conversions between _Float128 and Decimal.
> 

Hi, 
Just a couple cosmetic nits in the description.  The changelog seems to
match that patch contents OK.  


> This patch implements conversions between _Float128 and the 3 Decimal
> floating types.  It does by extendending the dfp-bit conversions to add a

nit: 'does so' or 'does this' 

> new binary floating point type (KF), and doing the conversions in the same
> mannor as the other binary/decimal conversions.

manner.

> 
> In particular for conversions from _Float128 to Decimal, it uses a sprintf
> variant to convert _Float128 to strings, and a type specific function that
> converts the string output to the appropriate Decimal type
> 
> For conversions from one of the Decimal types to _Float128, it uses a decimal
> function to convert to string (i.e. __decimalToString), and then uses a
> variant of strtold to convert to _Float128.


Are the sprintf and strtold functions called actually variants? 


> 
> If the user is linked against GLIBC 2.32 or newer, then the sprintf and 
> strtold
> variant functions can use the features directly in GLIBC 2.32 to do this
> conversion.
> 
> If you have an older GLIBC and want to convert _Float128 to one of the Decimal
> types, it will convert the _Float128 to __ibm128 and then convert that to
> Decimal.
> 
> Similarly if you have one of the Decimal types, and want to convert to
> _Float128, it will first convert the Decimal type to __ibm128, and then 
> convert
> __ibm128 to _Float128.
> 
> These functions will primarily be used if/when the default PowerPC long double
> type is changed to IEEE 128-bit, but they could also be used if the user
> explicitly converts _Float128 to/from a Decimal type.
> 
> One test case relating to Decimal fails if I build a compiler where the 
> default
> is IEEE 128-bit:
> 
> * c-c++-common/dfp/convert-bfp-11.c
> 
> I have patches for this test, and they have been submitted separately.

ok

> 
> I have tested this patch by doing builds, bootstraps, and make check with 3
> builds on a power9 little endian server:
> 
> *   Build one used the default long double being IBM 128-bit;
> *   Build two set the long double default to IEEE 128-bit; (and)
> *   Build three set the long double default to 64-bit.
> 
> The compilers built fine providing I recompiled gmp, mpc, and mpfr with the
> appropriate long double options.  There were a few differences in the test
> suite runs that will be addressed in later patches, but over all it works
> well.  This patch is required to be able to build a toolchain where the 
> default
> long double is IEEE 128-bit.

> Can I check this patch into the master branch 
> for
> GCC 11?

Separate the check-in question from the description paragraph.

> 
> I have also built compilers with this patch on a big endian power8 system that
> has both 32-bit and 64-bit support.  There were no regressions in running 
> these
> tests on the system.
> 
> Can I check this patch into the master branch?
> 
> libgcc/
> 2021-01-28  Michael Meissner  
> 
>   * config/rs6000/_dd_to_kf.c: New file.
>   * config/rs6000/_kf_to_dd.c: New file.
>   * config/rs6000/_kf_to_sd.c: New file.
>   * config/rs6000/_kf_to_td.c: New file.
>   * config/rs6000/_sd_to_kf.c: New file.
>   * config/rs6000/_sprintfkf.c: New file.
>   * config/rs6000/_sprintfkf.h: New file.
>   * config/rs6000/_strtokf.h: New file.
>   * config/rs6000/_strtokf.c: New file.
>   * config/rs6000/_td_to_kf.c: New file.
>   * config/rs6000/quad-float128.h: Add new declarations.
>   * config/rs6000/t-float128 (fp128_dec_funcs): New macro.
>   (fp128_decstr_funcs): New macro.
>   (ibm128_dec_funcs): New macro.
>   (fp128_ppc_funcs): Add the new conversions.
>   (fp128_dec_objs): Force Decimal <-> __float128 conversions to be
>   compiled with -mabi=ieeelongdouble.
>   (fp128_decstr_objs): Force __float128 <-> string conversions to be
>   compiled with -mabi=ibmlongdouble.
>   (ibm128_dec_objs): Force Decimal <-> __float128 conversions to be
>   compiled with -mabi=ieeelongdouble.
>   (FP128_CFLAGS_DECIMAL): New macro.
>   (IBM128_CFLAGS_DECIMAL): New macro.
>   * dfp-bit.c (DFP_TO_BFP): Add PowerPC _Float128 support.
>   (BFP_TO_DFP): Add PowerPC _Float128 support.
>   * dfp-bit.h (BFP_KIND): Add new binary floating point kind for
>   IEEE 128-bit floating point.
>   (DFP_TO_BFP): Add PowerPC _Float128 support.
>   (BFP_TO_DFP): Add PowerPC _Float128 support.
>   (BFP_SPRINTF): New macro.
> ---
>  libgcc/config/rs6000/_dd_to_kf.c | 37 ++
>  libgcc/config/rs6000/_kf_to_dd.c | 37 ++
>  libgcc/config/rs6000/_kf_to_sd.c | 37 ++
>  libgcc/config/rs6000/_kf_to_td.c | 37 ++
>  

Re: [PATCH] PowerPC: Map IEEE 128-bit long double built-ins.

2021-01-27 Thread will schmidt via Gcc-patches
On Wed, 2021-01-27 at 19:43 -0600, Segher Boessenkool wrote:
> On Wed, Jan 27, 2021 at 01:06:46PM -0600, will schmidt wrote:
> > On Thu, 2021-01-14 at 11:59 -0500, Michael Meissner via Gcc-patches 
> > wrote:
> > > November 19th, 2020:
> > > Message-ID: <20201119235814.ga...@ibm-toto.the-meissners.org>
> > 
> > Subject and date should be sufficient
> 
> Only if people pick good subjects, and do not send ten patches with a
> similar subject line on the same day.  I asked for the message id,
> that works pretty much everywhere.

Good points..  I wasn't aware you had specifically asked for the
message ids.  Thanks for clarifying the situation. :-)


> 
> > _if_ having the old versions
> > of the patchs are necessary to review the latest version of the
> > patch.  Which ideally is not the case.
> 
> Stronger that that: I need to know what changed!  So please just
> explain
> what changed, in just a short sentence or two, or more if that is
> needed
> (but not if it is not needed).
> 
> 
> Segher



Re: [PATCH, rs6000] improve vec_ctf invalid parameter handling. (pr91903)

2021-01-27 Thread will schmidt via Gcc-patches
On Wed, 2021-01-27 at 18:24 -0600, Segher Boessenkool wrote:
> Hi!
> 
> On Mon, Oct 26, 2020 at 04:22:32PM -0500, will schmidt wrote:
> >   Per PR91903, GCC ICEs when we attempt to pass a variable
> > (or out of range value) into the vec_ctf() builtin.  Per
> > investigation, the parameter checking exists for this
> > builtin with the int types, but was missing for
> > the long long types.
> > 
> > This patch adds the missing CODE_FOR_* entries to the
> > rs6000_expand_binup_builtin to cover that scenario.
> > This patch also updates some existing tests to remove
> > calls to vec_ctf() and vec_cts() that contain negative
> > values.
> > --- a/gcc/testsuite/gcc.target/powerpc/builtins-1.fold.h
> > +++ b/gcc/testsuite/gcc.target/powerpc/builtins-1.fold.h
> > @@ -212,14 +212,14 @@ int main ()
> >extern vector unsigned long long u9; u9 = vec_mergeo (u3, u4);
> >  
> >extern vector long long l8; l8 = vec_mul (l3, l4);
> >extern vector unsigned long long u6; u6 = vec_mul (u3, u4);
> >  
> > -  extern vector double dh; dh = vec_ctf (la, -2);
> > +  extern vector double dh; dh = vec_ctf (la, 2);
> >extern vector double di; di = vec_ctf (ua, 2);
> >extern vector int sz; sz = vec_cts (fa, 0x1F);
> > -  extern vector long long l9; l9 = vec_cts (dh, -2);
> > +  extern vector long long l9; l9 = vec_cts (dh, 2);
> 
> I think removing the negative inputs here reduces test coverage?  Why
> did you change them, it isn't immediately clear to me?


The vec_ctf() and vec_cts() builtins accept a const int parameter which
should be in the range of 0..31.   The PR was initially
written/described as an ICE when a variable was passed into the
builtin, and part of debug/fixups revealed that the testcase negative
values were also invalid.
I'll clarify that in the commit message.


> 
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/powerpc/pr91903.c
> > @@ -0,0 +1,74 @@
> > +/* { dg-do compile */
> > +/* { dg-require-effective-target p8vector_hw } */
> 
> Compile tests should use p8vector_ok, instead.  (We do not care what
> kind of hardware the system under test is: we can run this on a
> cross-
> compiler just fine, after all!)

ok

> 
> > +/* { dg-skip-if "" { powerpc*-*-darwin* } } */
> 
> Please skip this line.  If the test does not work for Darwin Iain can
> easily disable it, but if you do, no one will find out if it does
> work.

ok, sounds good.
> 
> Okay for trunk with those things fixed, and the -2 thing looked at.
> Thanks!
> 

Thanks for the review. :-)

> 
> Segher



Re: [PATCH, rs6000] improve vec_ctf invalid parameter handling. (pr91903)

2021-01-27 Thread will schmidt via Gcc-patches


Ping!  

Thanks
-Will


On Mon, 2021-01-04 at 18:03 -0600, will schmidt via Gcc-patches wrote:
> On Mon, 2020-10-26 at 16:22 -0500, will schmidt wrote:
> > [PATCH, rs6000] improve vec_ctf invalid parameter handling.
> > 
> > Hi,
> >   Per PR91903, GCC ICEs when we attempt to pass a variable
> > (or out of range value) into the vec_ctf() builtin.  Per
> > investigation, the parameter checking exists for this
> > builtin with the int types, but was missing for
> > the long long types.
> > 
> > This patch adds the missing CODE_FOR_* entries to the
> > rs6000_expand_binup_builtin to cover that scenario.
> > This patch also updates some existing tests to remove
> > calls to vec_ctf() and vec_cts() that contain negative
> > values.
> > 
> > Regtested clean on power7, power8, power9 Linux targets.
> > 
> > OK for trunk?
> 
> 
> I've reviewed the list archives in case my local inbox lost a
> response..  I don't think this one was reviewed.  
> so..
> 
> ping!  
> 
> :-) 
> 
> thanks
> -Will
> 
> 
> > 
> > THanks,
> > -Will
> > 
> > PR target/91903
> > 
> > 2020-10-26  Will Schmidt  
> > 
> > gcc/ChangeLog:
> > * config/rs6000/rs6000-call.c (rs6000_expand_binup_builtin):
> > Add
> > clauses for CODE_FOR_vsx_xvcvuxddp_scale and
> > CODE_FOR_vsx_xvcvsxddp_scale to the parameter checking code.
> > 
> > gcc/testsuite/ChangeLog:
> > * testsuite/gcc.target/powerpc/pr91903.c: New test.
> > * testsuite/gcc.target/powerpc/builtins-1.fold.h: Update.
> > * testsuite/gcc.target/powerpc/builtins-2.c: Update.
> > 
> > diff --git a/gcc/config/rs6000/rs6000-call.c
> > b/gcc/config/rs6000/rs6000-call.c
> > index b044778a7ae4..eb7e007e68d3 100644
> > --- a/gcc/config/rs6000/rs6000-call.c
> > +++ b/gcc/config/rs6000/rs6000-call.c
> > @@ -9447,11 +9447,13 @@ rs6000_expand_binop_builtin (enum insn_code
> > icode, tree exp, rtx target)
> > }
> >  }
> >else if (icode == CODE_FOR_altivec_vcfux
> >|| icode == CODE_FOR_altivec_vcfsx
> >|| icode == CODE_FOR_altivec_vctsxs
> > -  || icode == CODE_FOR_altivec_vctuxs)
> > +  || icode == CODE_FOR_altivec_vctuxs
> > +  || icode == CODE_FOR_vsx_xvcvuxddp_scale
> > +  || icode == CODE_FOR_vsx_xvcvsxddp_scale)
> >  {
> >/* Only allow 5-bit unsigned literals.  */
> >STRIP_NOPS (arg1);
> >if (TREE_CODE (arg1) != INTEGER_CST
> >   || TREE_INT_CST_LOW (arg1) & ~0x1f)
> > diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1.fold.h
> > b/gcc/testsuite/gcc.target/powerpc/builtins-1.fold.h
> > index 8bc5f5e43366..42d552295e3e 100644
> > --- a/gcc/testsuite/gcc.target/powerpc/builtins-1.fold.h
> > +++ b/gcc/testsuite/gcc.target/powerpc/builtins-1.fold.h
> > @@ -212,14 +212,14 @@ int main ()
> >extern vector unsigned long long u9; u9 = vec_mergeo (u3, u4);
> >  
> >extern vector long long l8; l8 = vec_mul (l3, l4);
> >extern vector unsigned long long u6; u6 = vec_mul (u3, u4);
> >  
> > -  extern vector double dh; dh = vec_ctf (la, -2);
> > +  extern vector double dh; dh = vec_ctf (la, 2);
> >extern vector double di; di = vec_ctf (ua, 2);
> >extern vector int sz; sz = vec_cts (fa, 0x1F);
> > -  extern vector long long l9; l9 = vec_cts (dh, -2);
> > +  extern vector long long l9; l9 = vec_cts (dh, 2);
> >extern vector unsigned long long u7; u7 = vec_ctu (di, 2);
> >extern vector unsigned int usz; usz = vec_ctu (fa, 0x1F);
> >  
> >extern vector float f1; f1 = vec_mergee (fa, fb);
> >extern vector float f2; f2 = vec_mergeo (fa, fb);
> > diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-2.c
> > b/gcc/testsuite/gcc.target/powerpc/builtins-2.c
> > index 2aa23a377992..30acae47faff 100644
> > --- a/gcc/testsuite/gcc.target/powerpc/builtins-2.c
> > +++ b/gcc/testsuite/gcc.target/powerpc/builtins-2.c
> > @@ -40,16 +40,16 @@ int main ()
> >  
> >if (se[0] != 27L || se[1] != 27L || sf[0] != -14L || sf[1] !=
> > -14L
> >|| ue[0] != 27L || ue[1] != 27L || uf[0] != 14L || uf[1] !=
> > 14L)
> >  abort ();
> >  
> > -  vector double da = vec_ctf (sa, -2);
> > +  vector double da = vec_ctf (sa, 2);
> >vector double db = vec_ctf (ua, 2);
> > -  vector long long sg = vec_cts (da, -2);
> > +  vector long long sg = vec_cts (da, 2);
> >vector unsigned long long ug = vec_ctu (db, 2);
> >  
> > -  if (da[0] != 108.0 || da

Re: [PATCH] PowerPC: Map IEEE 128-bit long double built-ins.

2021-01-27 Thread will schmidt via Gcc-patches
On Thu, 2021-01-14 at 11:59 -0500, Michael Meissner via Gcc-patches wrote:
> From 78435dee177447080434cdc08fc76b1029c7f576 Mon Sep 17 00:00:00 2001
> From: Michael Meissner 
> Date: Wed, 13 Jan 2021 21:47:03 -0500
> Subject: [PATCH] PowerPC: Map IEEE 128-bit long double built-ins.
> 
> This patch replaces patches previously submitted:
> 
> September 24th, 2020:
> Message-ID: <20200924203159.ga31...@ibm-toto.the-meissners.org>
> 
> October 9th, 2020:
> Message-ID: <20201009043543.ga11...@ibm-toto.the-meissners.org>
> 
> October 24th, 2020:
> Message-ID: <2020100346.ga8...@ibm-toto.the-meissners.org>
> 
> November 19th, 2020:
> Message-ID: <20201119235814.ga...@ibm-toto.the-meissners.org>


Subject and date should be sufficient _if_ having the old versions
of the patchs are necessary to review the latest version of the
patch.  Which ideally is not the case.


> 
> This patch maps the built-in functions that take or return long double
> arguments on systems where long double is IEEE 128-bit.
> 
> If long double is IEEE 128-bit, this patch goes through the built-in functions
> and changes the name of the math, scanf, and printf built-in functions to use
> the functions that GLIBC provides when long double uses the IEEE 128-bit
> representation.

ok.

> 
> In addition, changing the name in GCC allows the Fortran compiler to
> automatically use the correct name.

Does the fortran compiler currently use the wrong name? (pr?)

> 
> To map the math functions, typically this patch changes l to
> __ieee128.  However there are some exceptions that are handled with this
> patch.

This appears to be  the rs6000_mangle_decl_assembler_name() function, which
also maps l_r to ieee128_r, and looks like some additional special
handling for printf and scanf.  


> To map the printf functions,  is mapped to __ieee128.
> 
> To map the scanf functions,  is mapped to __isoc99_ieee128.


> 
> I have tested this patch by doing builds, bootstraps, and make check with 3
> builds on a power9 little endian server:
> 
> * Build one used the default long double being IBM 128-bit;
> * Build two set the long double default to IEEE 128-bit; (and)
> * Build three set the long double default to 64-bit.
> 

ok

> The compilers built fine providing I recompiled gmp, mpc, and mpfr with the
> appropriate long double options.

Presumably the build is otherwise broken... 
Does that mean more than invoking download_preqrequisites as part of the
build?   If there are specific options required during configure/build of
those packages, they should be called out.

> There were a few differences in the test
> suite runs that will be addressed in later patches, but over all it works
> well.

Presumably minimal. :-)


>   This patch is required to be able to build a toolchain where the default
> long double is IEEE 128-bit. 

Ok.   Could lead the patch description with this,.  I imagine this is
just one of several patches that are still required towrards that goal.



>  Can I check this patch into the master branch for
> GCC 11?





> 
> gcc/
> 2021-01-14  Michael Meissner  
> 
>   * config/rs6000/rs6000.c (ieee128_builtin_name): New function.
>   (built_in_uses_long_double): New function.
>   (identifier_ends_in_suffix): New function.
>   (rs6000_mangle_decl_assembler_name): Update support for mapping built-in
>   function names for long double built-in functions if long double is
>   IEEE 128-bit to catch all of the built-in functions that take or
>   return long double arguments.
> 
> gcc/testsuite/
> 2021-01-14  Michael Meissner  
> 
>   * gcc.target/powerpc/float128-longdouble-math.c: New test.
>   * gcc.target/powerpc/float128-longdouble-stdio.c: New test.
>   * gcc.target/powerpc/float128-math.c: Adjust test for new name
>   being generated.  Add support for running test on power10.  Add
>   support for running if long double defaults to 64-bits.
> ---
>  gcc/config/rs6000/rs6000.c| 239 --
>  .../powerpc/float128-longdouble-math.c| 442 ++
>  .../powerpc/float128-longdouble-stdio.c   |  36 ++
>  .../gcc.target/powerpc/float128-math.c|  16 +-
>  4 files changed, 694 insertions(+), 39 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.target/powerpc/float128-longdouble-math.c
>  create mode 100644 
> gcc/testsuite/gcc.target/powerpc/float128-longdouble-stdio.c
> 
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 6f48dd6566d..282703b9715 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -27100,6 +27100,172 @@ rs6000_globalize_decl_name (FILE * stream, tree 
> decl)
>  #endif
> 
>  
> +/* If long double uses the IEEE 128-bit representation, return the name used
> +   within GLIBC for the IEEE 128-bit long double built-in, instead of the
> +   default IBM 128-bit long double built-in.  Or return NULL if the built-in
> +   function does not use long double.  

Re: [PATCH, rs6000] Deprecate unnecessary __builtin_dfp_dtstsfi_*_dd and td overloads

2021-01-26 Thread Bill Schmidt via Gcc-patches

On 1/25/21 10:23 AM, Segher Boessenkool wrote:

Hi!

On Thu, Jan 21, 2021 at 05:49:14PM -0600, will schmidt wrote:

   Noted as part of the work-in-progress builtins rewrite, the
__builtin_dfp_dtstsfi_*_{dd,td} builtins are redundant, and are thusly
being marked as deprecated.  They will be removed as part of the builtins
rewrite sometime in the future.
gcc/ChangeLog:
* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
Mark builtins P9_BUILTIN_DFP_TSTSFI_LT_DD, P9_BUILTIN_DFP_TSTSFI_EQ_DD
P9_BUILTIN_DFP_TSTSFI_GT_DD, P9_BUILTIN_DFP_TSTSFI_OV_DD,
P9_BUILTIN_DFP_TSTSFI_LT_TD, P9_BUILTIN_DFP_TSTSFI_EQ_TD,
P9_BUILTIN_DFP_TSTSFI_GT_TD, P9_BUILTIN_DFP_TSTSFI_OV_TD as deprecated.
* doc/extend.texi: Update examples to indicate deprecated functions.

testsuite/ChangeLog:
* gcc.target/powerpc/dfp/dtstsfi-10.c: Mark 
__builtin_dfp_dtstsfi_*_{dd,td}
calls as deprecated.
* gcc.target/powerpc/dfp/dtstsfi-11.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-12.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-13.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-14.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-15.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-16.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-17.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-18.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-19.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-30.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-31.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-32.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-33.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-34.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-35.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-36.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-37.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-38.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-39.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-50.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-51.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-52.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-53.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-54.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-55.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-56.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-57.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-58.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-59.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-70.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-71.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-72.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-73.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-74.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-75.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-76.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-77.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-78.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-79.c: Same.
* gcc.target/powerpc/pr92661.c: Same.

This is okay for trunk if Bill thinks it is the right direction.  Thanks!


I'm afraid I thought these were only introduced in GCC 11, but it turns 
out they are much older.  So we can't remove them without a deprecation 
cycle, unfortunately.  Please hold off until GCC 12. I apologize for 
misleading you.


Bill




Segher


Re: [PATCH] rs6000: Fix vec insert ilp32 ICE and test failures [PR98799]

2021-01-26 Thread will schmidt via Gcc-patches
On Tue, 2021-01-26 at 01:46 -0600, Xionghu Luo via Gcc-patches wrote:
> From: "luo...@cn.ibm.com" 
> 
> UNSPEC_SI_FROM_SF is not supported when TARGET_DIRECT_MOVE_64BIT
> is false for -m32, don't generate VIEW_CONVERT_EXPR(ARRAY_REF) for
> variable vector insert.  Remove rs6000_expand_vector_set_var helper
> function, adjust the p8 and p9 definitions position and make them
> static.
> 
> The previous commit r11-6858 missed check m32, This patch is tested pass
> on P7BE{m32,m64}/P8BE{m32,m64}/P8LE/P9LE with
> RUNTESTFLAGS="--target_board =unix'{-m32,-m64}" for BE targets.
> 
> gcc/ChangeLog:
> 
>   2021-01-26  Xionghu Luo  
>   David Edelsohn 
> 
>   PR target/98799
>   * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
>   Don't generate VIEW_CONVERT_EXPR for m32.

This is hinted at in the description, but would be good to be clear in
the changelog too. 

Consider something like "Don't generate VIEW_CONVERT_EXPR for fcode
ALTIVEC_BUILTIN_VEC_INSERT when -m32."


>   * config/rs6000/rs6000-protos.h (rs6000_expand_vector_set_var):
>   Delete.


>   * config/rs6000/rs6000.c (rs6000_expand_vector_set): Remove the
>   wrapper call rs6000_expand_vector_set_var. Call
>   rs6000_expand_vector_set_var_p9 and rs6000_expand_vector_set_var_p8
>   directly.
>   (rs6000_expand_vector_set_var): Delete.

The diff conflates the deleted function with the changes to an existing
function, making it harder to sort out...   

Was/is deleting the rs6000_expand_vector_set_var() helper necessary for
this fix, or just cleanup?   


Add:
(rs6000_expand_vector_set_var_p9): Make static.
(rs6000_expand_
vector_set_var_p8): Make static.





> 
> gcc/testsuite/ChangeLog:
> 
>   2021-01-26  Xionghu Luo  
> 
>   PR target/98827
>   * gcc.target/powerpc/fold-vec-insert-char-p8.c: Adjust ilp32.
>   * gcc.target/powerpc/fold-vec-insert-char-p9.c: Likewise.
>   * gcc.target/powerpc/fold-vec-insert-double.c: Likewise.
>   * gcc.target/powerpc/fold-vec-insert-float-p8.c: Likewise.
>   * gcc.target/powerpc/fold-vec-insert-float-p9.c: Likewise.
>   * gcc.target/powerpc/fold-vec-insert-int-p8.c: Likewise.
>   * gcc.target/powerpc/fold-vec-insert-int-p9.c: Likewise.
>   * gcc.target/powerpc/fold-vec-insert-longlong.c: Likewise.
>   * gcc.target/powerpc/fold-vec-insert-short-p8.c: Likewise.
>   * gcc.target/powerpc/fold-vec-insert-short-p9.c: Likewise.
>   * gcc.target/powerpc/pr79251.p8.c: Likewise.
>   * gcc.target/powerpc/pr79251.p9.c: Likewise.
>   * gcc.target/powerpc/vsx-builtin-7.c: Likewise.

Just a glance, those changes look OK.


> ---
>  gcc/config/rs6000/rs6000-c.c  |   2 +-
>  gcc/config/rs6000/rs6000-protos.h |   1 -
>  gcc/config/rs6000/rs6000.c| 236 +-
>  .../powerpc/fold-vec-insert-char-p8.c |  14 +-
>  .../powerpc/fold-vec-insert-char-p9.c |   6 +-
>  .../powerpc/fold-vec-insert-double.c  |  10 +-
>  .../powerpc/fold-vec-insert-float-p8.c|  12 +-
>  .../powerpc/fold-vec-insert-float-p9.c|   6 +-
>  .../powerpc/fold-vec-insert-int-p8.c  |  13 +-
>  .../powerpc/fold-vec-insert-int-p9.c  |   9 +-
>  .../powerpc/fold-vec-insert-longlong.c|   8 +-
>  .../powerpc/fold-vec-insert-short-p8.c|  10 +-
>  .../powerpc/fold-vec-insert-short-p9.c|  13 +-
>  gcc/testsuite/gcc.target/powerpc/pr79251.p8.c |  17 +-
>  gcc/testsuite/gcc.target/powerpc/pr79251.p9.c |  16 +-
>  .../gcc.target/powerpc/vsx-builtin-7.c|   2 +-
>  16 files changed, 203 insertions(+), 172 deletions(-)
> 
> diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
> index f6ee1e61b56..656cdb39f3f 100644
> --- a/gcc/config/rs6000/rs6000-c.c
> +++ b/gcc/config/rs6000/rs6000-c.c
> @@ -1600,7 +1600,7 @@ altivec_resolve_overloaded_builtin (location_t loc, 
> tree fndecl,
> stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
>   }
> 
> -  if (TARGET_P8_VECTOR)
> +  if (TARGET_P8_VECTOR && TARGET_DIRECT_MOVE_64BIT)
>   {
> stmt = build_array_ref (loc, stmt, arg2);
> stmt = fold_build2 (MODIFY_EXPR, TREE_TYPE (arg0), stmt,
> diff --git a/gcc/config/rs6000/rs6000-protos.h 
> b/gcc/config/rs6000/rs6000-protos.h
> index 9a46a414743..9cca7325d0d 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -58,7 +58,6 @@ extern bool rs6000_split_128bit_ok_p (rtx []);
>  extern void rs6000_expand_float128_convert (rtx, rtx, bool);
>  extern void rs6000_expand_vector_init (rtx, rtx);
>  extern void rs6000_expand_vector_set (rtx, rtx, rtx);
> -extern void rs6000_expand_vector_set_var (rtx, rtx, rtx);
>  extern void rs6000_expand_vector_extract (rtx, rtx, rtx);
>  extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx);
>  extern rtx rs6000_adjust_vec_address (rtx, rtx, rtx, rtx, machine_mode);
> 

[PATCH, rs6000] Deprecate unnecessary __builtin_dfp_dtstsfi_*_dd and td overloads

2021-01-21 Thread will schmidt via Gcc-patches
[PATCH, rs6000] Deprecate unnecessary __builtin_dfp_dtstsfi_*_dd and td 
overloads

Hi,
  Noted as part of the work-in-progress builtins rewrite, the
__builtin_dfp_dtstsfi_*_{dd,td} builtins are redundant, and are thusly
being marked as deprecated.  They will be removed as part of the builtins
rewrite sometime in the future.
This includes the builtins __builtin_dfp_dtstsfi_eq_dd,
__builtin_dfp_dtstsfi_gt_dd, __builtin_dfp_dtstsfi_lt_dd,
__builtin_dfp_dtstsfi_ov_dd, __builtin_dfp_dtstsfi_eq_td,
__builtin_dfp_dtstsfi_gt_td, __builtin_dfp_dtstsfi_lt_td,
and __builtin_dfp_dtstsfi_ov_td.

Regtests underway.

OK for trunk?

Thanks
-Will

--

gcc/ChangeLog:
* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
Mark builtins P9_BUILTIN_DFP_TSTSFI_LT_DD, P9_BUILTIN_DFP_TSTSFI_EQ_DD
P9_BUILTIN_DFP_TSTSFI_GT_DD, P9_BUILTIN_DFP_TSTSFI_OV_DD,
P9_BUILTIN_DFP_TSTSFI_LT_TD, P9_BUILTIN_DFP_TSTSFI_EQ_TD,
P9_BUILTIN_DFP_TSTSFI_GT_TD, P9_BUILTIN_DFP_TSTSFI_OV_TD as deprecated.
* doc/extend.texi: Update examples to indicate deprecated functions.

testsuite/ChangeLog:
* gcc.target/powerpc/dfp/dtstsfi-10.c: Mark 
__builtin_dfp_dtstsfi_*_{dd,td}
calls as deprecated.
* gcc.target/powerpc/dfp/dtstsfi-11.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-12.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-13.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-14.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-15.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-16.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-17.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-18.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-19.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-30.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-31.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-32.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-33.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-34.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-35.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-36.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-37.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-38.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-39.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-50.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-51.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-52.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-53.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-54.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-55.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-56.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-57.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-58.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-59.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-70.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-71.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-72.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-73.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-74.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-75.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-76.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-77.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-78.c: Same.
* gcc.target/powerpc/dfp/dtstsfi-79.c: Same.
* gcc.target/powerpc/pr92661.c: Same.

diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index cdc64bd63c66..9a79e5684f20 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -946,10 +946,21 @@ altivec_resolve_overloaded_builtin (location_t loc, tree 
fndecl,
   else if (fcode == ALTIVEC_BUILTIN_VEC_LVSR && !BYTES_BIG_ENDIAN)
 warning (OPT_Wdeprecated,
 "% is deprecated for little endian; use "
 "assignment for unaligned loads and stores");
 
+  if (fcode == P9_BUILTIN_DFP_TSTSFI_LT_DD
+   || fcode == P9_BUILTIN_DFP_TSTSFI_EQ_DD
+   || fcode == P9_BUILTIN_DFP_TSTSFI_GT_DD
+   || fcode == P9_BUILTIN_DFP_TSTSFI_OV_DD
+   || fcode == P9_BUILTIN_DFP_TSTSFI_LT_TD
+   || fcode == P9_BUILTIN_DFP_TSTSFI_EQ_TD
+   || fcode == P9_BUILTIN_DFP_TSTSFI_GT_TD
+   || fcode == P9_BUILTIN_DFP_TSTSFI_OV_TD)
+  warning (OPT_Wdeprecated, "builtin '%s' is deprecated",
+  IDENTIFIER_POINTER (DECL_NAME (fndecl)));
+
   if (fcode == ALTIVEC_BUILTIN_VEC_MUL)
 {
   /* vec_mul needs to be special cased because there are no instructions
 for it for the {un}signed char, {un}signed short, and {un}signed int
 types.  */
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 8daa1c679748..90db01daeac6 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -17859,31 +17859,33 @@ int __builtin_byte_in_set (unsigned char u, unsigned 
long long set);
 int __builtin_byte_in_range (unsigned char u, unsigned int range);
 int __builtin_byte_in_either_range (unsigned char u, unsigned int ranges);
 
 int __builtin_dfp_dtstsfi_lt (unsigned int 

[PATCH, rs6000] Update pr88233.c test (pr91799)

2021-01-14 Thread will schmidt via Gcc-patches
[PATCH, rs6000] Update pr88233.c test (pr91799)

Hi,

This is a follow-up fix to clean up pr91799.  Per review of test results,
it appears that the combination of target and dg-require stanzas is
not sufficient to properly limit the test to 64-bit only on darwin.

This adds additional clauses to limit the test to 64-bit environments, and
to configs that support at least power8 codegen.

Tested on power7 and power8 using assorted variations of
  make -k check-gcc-c "RUNTESTFLAGS=powerpc.exp=pr88233.c
  --target_board=unix/'{-mcpu=power7,-mcpu=power6,-mcpu=power8}''{-m32,-m64}'"

OK for trunk?

PR target/91799

2021-01-14  Will Schmidt 

gcc/testsuite/ChangeLog:
* gcc.target/powerpc/pr88233.c: Update dg-require stanzas.

diff --git a/gcc/testsuite/gcc.target/powerpc/pr88233.c 
b/gcc/testsuite/gcc.target/powerpc/pr88233.c
index 8e5f15b83b50..acea7a586867 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr88233.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr88233.c
@@ -1,6 +1,7 @@
-/* { dg-do compile { target lp64 } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target has_arch_pwr8 } */
 /* { dg-options "-O2 -mdejagnu-cpu=power8" } */
 
 typedef struct { double a[2]; } A;
 A
 foo (const A *a)



Re: [PATCH v4] rs6000, vector integer multiply/divide/modulo instructions

2021-01-11 Thread will schmidt via Gcc-patches
On Mon, 2020-12-07 at 16:31 -0800, Carl Love wrote:
> Will:
> 
> I have addressed you comments with regards to the Change Log entries.  
> 
> The extra define vec_div was removed.
> 
> Added the missing entries for DIVU_V2DI  DIVS_V2DI in rs6000-call.c.
> 
> The extra MULLD_V2DI case statement entry was removed.
> 
> Added comment in rs6000.md about size for vector types per discussion
> with Pat.
> 
>   Carl
> 
> 
> GCC maintainers:
> 
> The following patch adds new builtins for the vector integer multiply,
> divide and modulo operations.  The builtins are: vec_mulh(),
> vec_dive(), vec_mod() for signed and unsigned integers and long
> longintegers. The existing support for the vec_div()and vec_mul()
> builtins emulate the vector operations with multiple scalar
> instructions.  This patch adds support for these builtins using the new
> vector instructions for Power 10.

Missing a couple spaces. 
"long integers"
 and 
"vec_div() and".


> 
> The patch was compiled and tested on:
> 
>   powerpc64le-unknown-linux-gnu (Power 9 LE)
>   powerpc64le-unknown-linux-gnu (Power 10 LE)
> 
> with no regressions. Additionally the new test case was compiled and
> executed by hand on Mambo to verify the test case passes.

May also be worth trying on Power8/BE, just for the variety.

> 
> Please let me know if this patch is acceptable for mainline.  Thanks.
> 
> Carl Love
> 
> -
> 
> From 15f9c090106c62af83cc405414466ad03d1a4c55 Mon Sep 17 00:00:00 2001
> From: Carl Love 
> Date: Fri, 4 Sep 2020 19:24:22 -0500
> Subject: [PATCH] rs6000, vector integer multiply/divide/modulo instructions
> 
> 2020-12-07  Carl Love  
> 
> gcc/
>   * config/rs6000/altivec.h (vec_mulh, vec_dive, vec_mod): Newdefines.

Embedded tab there.

>   * config/rs6000/altivec.md (VIlong): Move define to file vsx.md.

>   * config/rs6000/rs6000-builtin.def (DIVES_V4SI, DIVES_V2DI,
>   DIVEU_V4SI, DIVEU_V2DI, DIVS_V4SI, DIVS_V2DI, DIVU_V4SI,
>   DIVU_V2DI, MODS_V2DI, MODS_V4SI, MODU_V2DI, MODU_V4SI,
>   MULHS_V2DI, MULHS_V4SI, MULHU_V2DI, MULHU_V4SI, MULLD_V2DI):
>   Add builtin define.
>   (MULH, DIVE, MOD):  Add new BU_P10_OVERLOAD_2 definitions.



>   * config/rs6000/rs6000-call.c (altivec_overloaded_builtins): Add
>   VSX_BUILTIN_VEC_DIV, P10_BUILTIN_VEC_VDIVE,
>   P10_BUILTIN_VEC_VDIVE, P10_BUILTIN_VEC_VMOD, P10_BUILTIN_VEC_VMULH
>   overloaded definitions.

P10_BUILTIN_VEC_VDIVE is mentioned here twice.
I don't see it in the
patch body at all. 
I don't see P10_BUILTIN_VEC_VMOD either.
Also don't
see P10_BUILTIN_VEC_VMULH.


>   (builtin_function_type) [P10V_BUILTIN_DIVEU_V4SI,
>   P10V_BUILTIN_DIVEU_V2DI, P10V_BUILTIN_DIVU_V4SI,
>   P10V_BUILTIN_DIVU_V2DI, P10V_BUILTIN_MODU_V2DI,
>   P10V_BUILTIN_MODU_V4SI, P10V_BUILTIN_MULHU_V2DI,
>   P10V_BUILTIN_MULHU_V4SI, P10V_BUILTIN_MULLD_V2DI]: Add case
>   statements for builtins.

I don't see the P10V_BUILTIN_MULLD_V2DI case statement entry in the
patch below.   A previous review commented that there might have been a
missing altivec_overloaded_builtins entry for the MULLD_V2DI entry. 
Codegen for unsigned mull against v2di was correct?


>   * config/rs6000/rs6000.md (bits): Add new attribute sizes.

I'd be more verbose here to provide something searchable.
i.e. "Add V4SI,V2DI entries..."

>   * config/rs6000/vsx.md (VIlong): New define_mode_iterator.

Not new.  'Moved here from altivec.md' or something similar.


>   (UNSPEC_VDIVES, UNSPEC_VDIVEU): New unspec definitions.
>   (vsx_mul_v2di): Add if TARGET_POWER10 statement.
>   (vsx_udiv_v2di): Add if TARGET_POWER10 statement.
>   (dives_, diveu_, div3, uvdiv3,
>   mods_, modu_, mulhs_, mulhu_, mulv2di3):
>   Add define_insn, mode is VIlong.
>   doc/extend.texi (vec_mulh, vec_mul, vec_div, vec_dive, vec_mod): Add
>   builtin descriptions.
> 
> gcc/testsuite/
>   * gcc.target/powerpc/builtins-1-p10-runnable.c: New test file.
> ---
>  gcc/config/rs6000/altivec.h   |   4 +
>  gcc/config/rs6000/altivec.md  |   2 -
>  gcc/config/rs6000/rs6000-builtin.def  |  22 +
>  gcc/config/rs6000/rs6000-call.c   |  53 +++
>  gcc/config/rs6000/rs6000.md   |   4 +-
>  gcc/config/rs6000/vsx.md  | 212 +++---
>  gcc/doc/extend.texi   | 120 ++
>  .../powerpc/builtins-1-p10-runnable.c | 398 ++
>  8 files changed, 762 insertions(+), 53 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-1-p10-runnable.c
> 
> diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
> index e1884f51bd8..b678e5cf28d 100644
> --- a/gcc/config/rs6000/altivec.h
> +++ b/gcc/config/rs6000/altivec.h
> @@ -750,6 +750,10 @@ __altivec_scalar_pred(vec_any_nle,
>  

Re: [PATCH, rs6000] improve vec_ctf invalid parameter handling. (pr91903)

2021-01-04 Thread will schmidt via Gcc-patches
On Mon, 2020-10-26 at 16:22 -0500, will schmidt wrote:
> [PATCH, rs6000] improve vec_ctf invalid parameter handling.
> 
> Hi,
>   Per PR91903, GCC ICEs when we attempt to pass a variable
> (or out of range value) into the vec_ctf() builtin.  Per
> investigation, the parameter checking exists for this
> builtin with the int types, but was missing for
> the long long types.
> 
> This patch adds the missing CODE_FOR_* entries to the
> rs6000_expand_binup_builtin to cover that scenario.
> This patch also updates some existing tests to remove
> calls to vec_ctf() and vec_cts() that contain negative
> values.
> 
> Regtested clean on power7, power8, power9 Linux targets.
> 
> OK for trunk?


I've reviewed the list archives in case my local inbox lost a response..  I 
don't think this one was reviewed.  
so..

ping!  

:-) 

thanks
-Will


> 
> THanks,
> -Will
> 
> PR target/91903
> 
> 2020-10-26  Will Schmidt  
> 
> gcc/ChangeLog:
>   * config/rs6000/rs6000-call.c (rs6000_expand_binup_builtin): Add
>   clauses for CODE_FOR_vsx_xvcvuxddp_scale and
>   CODE_FOR_vsx_xvcvsxddp_scale to the parameter checking code.
> 
> gcc/testsuite/ChangeLog:
>   * testsuite/gcc.target/powerpc/pr91903.c: New test.
>   * testsuite/gcc.target/powerpc/builtins-1.fold.h: Update.
>   * testsuite/gcc.target/powerpc/builtins-2.c: Update.
> 
> diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
> index b044778a7ae4..eb7e007e68d3 100644
> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -9447,11 +9447,13 @@ rs6000_expand_binop_builtin (enum insn_code icode, 
> tree exp, rtx target)
>   }
>  }
>else if (icode == CODE_FOR_altivec_vcfux
>|| icode == CODE_FOR_altivec_vcfsx
>|| icode == CODE_FOR_altivec_vctsxs
> -  || icode == CODE_FOR_altivec_vctuxs)
> +  || icode == CODE_FOR_altivec_vctuxs
> +  || icode == CODE_FOR_vsx_xvcvuxddp_scale
> +  || icode == CODE_FOR_vsx_xvcvsxddp_scale)
>  {
>/* Only allow 5-bit unsigned literals.  */
>STRIP_NOPS (arg1);
>if (TREE_CODE (arg1) != INTEGER_CST
> || TREE_INT_CST_LOW (arg1) & ~0x1f)
> diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1.fold.h 
> b/gcc/testsuite/gcc.target/powerpc/builtins-1.fold.h
> index 8bc5f5e43366..42d552295e3e 100644
> --- a/gcc/testsuite/gcc.target/powerpc/builtins-1.fold.h
> +++ b/gcc/testsuite/gcc.target/powerpc/builtins-1.fold.h
> @@ -212,14 +212,14 @@ int main ()
>extern vector unsigned long long u9; u9 = vec_mergeo (u3, u4);
>  
>extern vector long long l8; l8 = vec_mul (l3, l4);
>extern vector unsigned long long u6; u6 = vec_mul (u3, u4);
>  
> -  extern vector double dh; dh = vec_ctf (la, -2);
> +  extern vector double dh; dh = vec_ctf (la, 2);
>extern vector double di; di = vec_ctf (ua, 2);
>extern vector int sz; sz = vec_cts (fa, 0x1F);
> -  extern vector long long l9; l9 = vec_cts (dh, -2);
> +  extern vector long long l9; l9 = vec_cts (dh, 2);
>extern vector unsigned long long u7; u7 = vec_ctu (di, 2);
>extern vector unsigned int usz; usz = vec_ctu (fa, 0x1F);
>  
>extern vector float f1; f1 = vec_mergee (fa, fb);
>extern vector float f2; f2 = vec_mergeo (fa, fb);
> diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-2.c 
> b/gcc/testsuite/gcc.target/powerpc/builtins-2.c
> index 2aa23a377992..30acae47faff 100644
> --- a/gcc/testsuite/gcc.target/powerpc/builtins-2.c
> +++ b/gcc/testsuite/gcc.target/powerpc/builtins-2.c
> @@ -40,16 +40,16 @@ int main ()
>  
>if (se[0] != 27L || se[1] != 27L || sf[0] != -14L || sf[1] != -14L
>|| ue[0] != 27L || ue[1] != 27L || uf[0] != 14L || uf[1] != 14L)
>  abort ();
>  
> -  vector double da = vec_ctf (sa, -2);
> +  vector double da = vec_ctf (sa, 2);
>vector double db = vec_ctf (ua, 2);
> -  vector long long sg = vec_cts (da, -2);
> +  vector long long sg = vec_cts (da, 2);
>vector unsigned long long ug = vec_ctu (db, 2);
>  
> -  if (da[0] != 108.0 || da[1] != -56.0 || db[0] != 6.75 || db[1] != 3.5
> +  if (da[0] != 6.75 || da[1] != -3.5 || db[0] != 6.75 || db[1] != 3.5
>|| sg[0] != 27L || sg[1] != -14L || ug[0] != 27L || ug[1] != 14L)
>  abort ();
>  
>vector float fa = vec_ctf (inta, 5);
>if (fa[0] != 0.843750 || fa[1] != -0.031250 || fa[2] != 0.125000 || fa[3] 
> != 0.281250)
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr91903.c 
> b/gcc/testsuite/gcc.target/powerpc/pr91903.c
> new file mode 100644
> index ..f0792117a88f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr91903.c
> @@ -0,0 +1,74 @@
> +/* { dg-do compile */
> +/* { dg-require-effective-target p8vector_hw } */
> +/* { dg-skip-if "" { powerpc*-*-darwin* } } */
> +/* { dg-options "-mdejagnu-cpu=power8" } */
> +
> +#include 
> +#include 
> +#include 
> +
> +vector double retd;
> +vector float retf;
> +vector signed int retsi;
> +
> +void test_int(vector signed int a, const int b)
> +{
> + retf = 

Re: [PATCH, rs6000] Update "size" attribute for Power10

2020-12-09 Thread will schmidt via Gcc-patches
On Tue, 2020-12-08 at 15:46 -0600, Pat Haugen via Gcc-patches wrote:
> Update size attribute for Power10.
> 
> 
> This patch was broken out from my larger patch to update various
> attributes for
> Power10, in order to make the review process hopefully easier. This
> patch only
> updates the size attribute for various new instructions. There were
> no changes
> requested to this portion of the original patch, so nothing is new
> here.
> 
> Bootstrap/regtest on powerpc64le (Power8/Power10) with no new
> regressions. Ok for trunk?
> 
> -Pat
> 
> 
> 2020-11-08  Pat Haugen  
> 
> gcc/


I think you'll need to specify gcc/ChangeLog at commit time.
Beyond that nit, the Changelog content here looks to match the patch
body OK.
lgtm,
thanks
-Will

>   * config/rs6000/dfp.md (extendddtd2, trunctddd2,
> *cmp_internal1,
>   floatditd2, ftrunc2, fixdi2, dfp_ddedpd_,
>   dfp_denbcd_, dfp_dxex_, dfp_diex_,
>   *dfp_sgnfcnc_, dfp_dscli_, dfp_dscri_):
> Update size
>   attribute for Power10.
>   * config/rs6000/mma.md (*movoo): Likewise.
>   * config/rs6000/rs6000.md (define_attr "size"): Add 256.
>   (define_mode_attr bits): Add DD/TD modes.
>   * config/rs6000/sync.md (load_quadpti, store_quadpti,
> load_lockedpti,
>   store_conditionalpti): Update size attribute for Power10.
> 



Re: [PATCH, powerpc] testsuite update tests for powerpc power10 target codegen.

2020-12-08 Thread will schmidt via Gcc-patches
On Tue, 2020-12-08 at 20:20 +1030, Alan Modra wrote:
> On Mon, Dec 07, 2020 at 05:49:05PM -0600, will schmidt via Gcc-
> patches wrote:
> > [PATCH, powerpc] testsuite update tests for powerpc power10 target
> > codegen.
> 
> Appears to duplicate work I did earlier,
> https://gcc.gnu.org/pipermail/gcc-patches/2020-October/557587.html
> 
> Except I omitted fold-vec-store-builtin_vec_xst-longlong.c, due to
> -mdejagnu-cpu=power8 in that test meaning we don't see any power10
> insns.

Ah shoot, I hate to duplicate work..  and I prob even looked over your
patches (a week + or so ago?)

Your previously submitted patch should 'win'.  I'll take a peek back
and make sure I've at least posted a lgtm for your submission. :-)

Thanks
-Will




[PATCH, powerpc] testsuite update tests for powerpc power10 target codegen.

2020-12-07 Thread will schmidt via Gcc-patches
[PATCH, powerpc] testsuite update tests for powerpc power10 target codegen.

Hi,

Assorted fix-ups to include prefixed load and store instructions in the
scan-assembler stanzas for the gcc.target/powerpc tests.
For these tests, we simply need to add pstxv or plxv added to the chain
of expected instructions for the load or store codegen tests to cover the
power10 targets codegen.

Sniff-testing completed ok against a gcc built to target power10.  Undergoing
regtest to ensure I didn't trip up anything on p7,p8,p9 (older) targets.

OK for trunk?

Thanks,
-Will


testsuite/ChangeLog:
* fold-vec-load-builtin_vec_xl-char.c: Update scan-assembler-times
stanza to reflect power10 target codegen.
* fold-vec-load-builtin_vec_xl-double.c: Ditto.
* fold-vec-load-builtin_vec_xl-float.c: Ditto.
* fold-vec-load-builtin_vec_xl-int.c: Ditto.
* fold-vec-load-builtin_vec_xl-longlong.c: Ditto.
* fold-vec-load-builtin_vec_xl-short.c: Ditto.
* fold-vec-load-vec_vsx_ld-char.c: Ditto.
* fold-vec-load-vec_vsx_ld-double.c: Ditto.
* fold-vec-load-vec_vsx_ld-float.c: Ditto.
* fold-vec-load-vec_vsx_ld-int.c: Ditto.
* fold-vec-load-vec_vsx_ld-longlong.c: Ditto.
* fold-vec-load-vec_vsx_ld-short.c: Ditto.
* fold-vec-load-vec_xl-char.c: Ditto.
* fold-vec-load-vec_xl-double.c: Ditto.
* fold-vec-load-vec_xl-float.c: Ditto.
* fold-vec-load-vec_xl-int.c: Ditto.
* fold-vec-load-vec_xl-longlong.c: Ditto.
* fold-vec-load-vec_xl-short.c: Ditto.
* fold-vec-splat-floatdouble.c: Ditto.
* fold-vec-splat-longlong.c: Ditto.
* fold-vec-store-builtin_vec_xst-char.c: Ditto.
* fold-vec-store-builtin_vec_xst-double.c: Ditto.
* fold-vec-store-builtin_vec_xst-float.c: Ditto.
* fold-vec-store-builtin_vec_xst-int.c: Ditto.
* fold-vec-store-builtin_vec_xst-longlong.c: Ditto.
* fold-vec-store-builtin_vec_xst-short.c: Ditto.
* fold-vec-store-vec_vsx_st-char.c: Ditto.
* fold-vec-store-vec_vsx_st-double.c: Ditto.
* fold-vec-store-vec_vsx_st-float.c: Ditto.
* fold-vec-store-vec_vsx_st-int.c: Ditto.
* fold-vec-store-vec_vsx_st-longlong.c: Ditto.
* fold-vec-store-vec_vsx_st-short.c: Ditto.
* fold-vec-store-vec_xst-char.c: Ditto.
* fold-vec-store-vec_xst-double.c: Ditto.
* fold-vec-store-vec_xst-float.c: Ditto.
* fold-vec-store-vec_xst-int.c: Ditto.
* fold-vec-store-vec_xst-longlong.c: Ditto.
* fold-vec-store-vec_xst-short.c: Ditto.


diff --git 
a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-char.c 
b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-char.c
index 9b199c219bf6..104710700c89 100644
--- a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-char.c
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-char.c
@@ -34,6 +34,6 @@ BUILD_CST_TEST( test9, vector signed char,   6, vector signed 
char);
 
 BUILD_VAR_TEST( test10, vector unsigned char, signed long long, vector 
unsigned char);
 BUILD_VAR_TEST( test11, vector unsigned char, signed int, vector unsigned 
char);
 BUILD_CST_TEST( test12, vector unsigned char, 8, vector unsigned char);
 
-/* { dg-final { scan-assembler-times {\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mlvx\M} 
12 } } */
+/* { dg-final { scan-assembler-times 
{\mlxvw4x\M|\mlxvd2x\M|\mlxvx\M|\mlvx\M|\mplxv\M} 12 } } */
diff --git 
a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-double.c 
b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-double.c
index c49dfe8d95b8..bfb3cfbc081e 100644
--- a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-double.c
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-double.c
@@ -26,6 +26,6 @@ BUILD_CST_TEST( test3, vector double, 12, double);
 
 BUILD_VAR_TEST( test4, vector double, signed long long, vector double);
 BUILD_VAR_TEST( test5, vector double, signed int, vector double);
 BUILD_CST_TEST( test6, vector double, 12, vector double);
 
-/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxvx\M|\mlvx\M} 6 } } */
+/* { dg-final { scan-assembler-times {\mlxvd2x\M|\mlxvx\M|\mlvx\M|\mplxv\M} 6 
} } */
diff --git 
a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-float.c 
b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-float.c
index cdded361b128..373bead2e605 100644
--- a/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-float.c
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-load-builtin_vec_xl-float.c
@@ -26,6 +26,6 @@ BUILD_CST_TEST( test3, vector float, 12, float);
 
 BUILD_VAR_TEST( test4, vector float, signed long long, vector float);
 BUILD_VAR_TEST( test5, vector float, signed 

Re: [PATCH,rs6000] Combine patterns for p10 load-cmpi fusion

2020-12-07 Thread will schmidt via Gcc-patches
On Fri, 2020-12-04 at 13:19 -0600, acsawdey--- via Gcc-patches wrote:
> From: Aaron Sawdey 
> 

Assorted comments sprinkled around below.
thanks
-Will


> This patch adds the first batch of patterns to support p10 fusion. These
> will allow combine to create a single insn for a pair of instructions
> that that power10 can fuse and execute. These particular ones have the

Just one that, or maybe 'that the'.
s/ones/fusion pairs/ ?

> requirement that only cr0 can be used when fusing a load with a compare
> immediate of -1/0/1 (if signed) or 0/1 (if unsigned), so we want combine
> to put that requirement in, and if it doesn't work out later the splitter
> can get used.

... splitter can get used, or ... splitter will 

> 
> The patterns are generated by a script genfusion.pl and live in new file
> fusion.md. This script will be expanded to generate more patterns for
> fusion.

ok

> 
> This also adds option -mpower10-fusion which defaults on for power10 and
> will gate all these fusion patterns. In addition I have added an
> undocumented option -mpower10-fusion-ld-cmpi (which may be removed later)
> that just controls the load+compare-immediate patterns. I have make

made

> these default on for power10 but they are not disallowed for earlier
> processors because it is still valid code. This allows us to test the
> correctness of fusion code generation by turning it on explicitly.
> 
> If bootstrap/regtest is clean, ok for trunk?
> 
> Thanks!
> 
>Aaron
> 
> gcc/ChangeLog:
> 
>   * config/rs6000/genfusion.pl: New file, script to generate
>   define_insn_and_split patterns so combine can arrange fused
>   instructions next to each other.

New script to generate ...

>   * config/rs6000/fusion.md: New file, generated fused instruction
>   patterns for combine.

>   * config/rs6000/predicates.md (const_m1_to_1_operand): New predicate.
>   (non_update_memory_operand): New predicate.
ok
>   * config/rs6000/rs6000-cpus.def: Add OPTION_MASK_P10_FUSION and
>   OPTION_MASK_P10_FUSION_LD_CMPI to ISA_3_1_MASKS_SERVER and
>   POWERPC_MASKS.
>   * config/rs6000/rs6000-protos.h (address_is_non_pfx_d_or_x): Add
>   prototype.

All usages of address_is_non_pfx_d_or_x() appear to be negated, i.e. 
+   || !address_is_non_pfx_d_or_x (XEXP (operands[1],0), 
DImode, NON_PREFIXED_DS))" 
Fully understanding that naming is
hard, I'd wonder if that can be adjusted to avoid the double negative. 
something like (address_load_mode_requires_prefix (...foo) ?


>   * config/rs6000/rs6000.c (rs6000_option_override_internal):
>   automatically set -mpower10-fusion and -mpower10-fusion-ld-cmpi
>   if target is power10.  (rs600_opt_masks): Allow -mpower10-fusion
>   in function attributes.  (address_is_non_pfx_d_or_x): New function.

ok

>   * config/rs6000/rs6000.h: Add MASK_P10_FUSION.
>   * config/rs6000/rs6000.md: Include fusion.md.
>   * config/rs6000/rs6000.opt: Add -mpower10-fusion
>   and -mpower10-fusion-ld-cmpi.

ok

>   * config/rs6000/t-rs6000: Add dependencies involving fusion.md.

ok


> ---
>  gcc/config/rs6000/fusion.md   | 357 ++
>  gcc/config/rs6000/genfusion.pl| 144 
>  gcc/config/rs6000/predicates.md   |  14 ++
>  gcc/config/rs6000/rs6000-cpus.def |   6 +-
>  gcc/config/rs6000/rs6000-protos.h |   2 +
>  gcc/config/rs6000/rs6000.c|  51 +
>  gcc/config/rs6000/rs6000.h|   1 +
>  gcc/config/rs6000/rs6000.md   |   1 +
>  gcc/config/rs6000/rs6000.opt  |   8 +
>  gcc/config/rs6000/t-rs6000|   6 +-
>  10 files changed, 588 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/config/rs6000/fusion.md
>  create mode 100755 gcc/config/rs6000/genfusion.pl
> 
> diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
> new file mode 100644
> index 000..a4d3a6ae7f3
> --- /dev/null
> +++ b/gcc/config/rs6000/fusion.md
> @@ -0,0 +1,357 @@
> +;; -*- buffer-read-only: t -*-
> +;; Generated automatically by genfusion.pl
> +
> +;; Copyright (C) 2020 Free Software Foundation, Inc.
> +;;
> +;; This file is part of GCC.
> +;;
> +;; GCC is free software; you can redistribute it and/or modify it under
> +;; the terms of the GNU General Public License as published by the Free
> +;; Software Foundation; either version 3, or (at your option) any later
> +;; version.
> +;;
> +;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
> +;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
> +;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
> +;; for more details.
> +;;
> +;; You should have received a copy of the GNU General Public License
> +;; along with GCC; see the file COPYING3.  If not see
> +;; .
> +
> +;; load-cmpi fusion pattern generated by gen_ld_cmpi_p10
> +;; load mode is DI result mode is clobber compare mode is CC extend is none
> 

Re: [PATCH v2] rs6000, vector integer multiply/divide/modulo instructions

2020-12-03 Thread will schmidt via Gcc-patches
On Tue, 2020-12-01 at 15:48 -0800, Carl Love via Gcc-patches wrote:
> Segher, Pat:
> 
> I have updated the patch to address the comments below.
> 
> On Wed, 2020-11-25 at 20:30 -0600, Segher Boessenkool wrote:
> > On Tue, Nov 24, 2020 at 08:34:51PM -0600, Pat Haugen wrote:
> > > On 11/24/20 8:17 PM, Pat Haugen via Gcc-patches wrote:
> > > > On 11/24/20 12:59 PM, Carl Love via Gcc-patches wrote:
> > > > > +(define_insn "modu_"
> > > > > +  [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
> > > > > + (umod:VIlong (match_operand:VIlong 1
> > > > > "vsx_register_operand" "v")
> > > > > +  (match_operand:VIlong 2
> > > > > "vsx_register_operand" "v")))]
> > > > > +  "TARGET_POWER10"
> > > > > +  "vmodu %0,%1,%2"
> > > > > +  [(set_attr "type" "vecdiv")
> > > > > +   (set_attr "size" "128")])
> > > > 
> > > > We should only be setting "size" "128" for instructions that
> > > > operate on scalar 128-bit data items (i.e. 'vdivesq' etc).
> > > > Since
> > > > the above insns are either V2DI/V4SI (ala VIlong
> > > > mode_iterator),
> > > > they shouldn't be marked as size 128. If you want to set the
> > > > size
> > > > based on mode, (set_attr "size" "") should do the trick I
> > > > believe.
> > > 
> > > Well, after you update "(define_mode_attr bits" in rs6000.md for
> > > V2DI/V4SI.
> > 
> > So far,  was only used for scalars.  I agree that for vectors
> > it
> > makes most sense to do the element size (because the vector size
> > always
> > is 128 bits, and for scheduling the element size can matter).  But,
> > the
> > definitions of  and  now say
> > 
> > ;; What data size does this instruction work on?
> > ;; This is used for insert, mul and others as necessary.
> > (define_attr "size" "8,16,32,64,128" (const_string "32"))
> > 
> > and
> > 
> > ;; How many bits in this mode?
> > (define_mode_attr bits [(QI "8") (HI "16") (SI "32") (DI "64")
> >(SF "32") (DF "64")])
> > so those need a bit of update as well then :-)
> 
> I set the size based on the vector element size, extendeing the
> define_mode_attr bits definition.  Please take a look at the updated
> patch.  Hopefully I have this all correct.  Thanks.
> 
> Note, I retested the updated patch on 
> 
>   powerpc64le-unknown-linux-gnu (Power 9 LE)
>   powerpc64le-unknown-linux-gnu (Power 10 LE)
> 
> Thanks for the help.
> 
>  Carl 
> 

Continued from yesterday..  
Thanks
-Will

> ---
> 
> rs6000, vector integer multiply/divide/modulo instructions
> 
> 2020-12-01  Carl Love  
> 
> gcc/
>   * config/rs6000/altivec.h (vec_mulh, vec_div, vec_dive,
> vec_mod): New
>   defines.
>   * config/rs6000/altivec.md (VIlong): Move define to file
> vsx.md.
>   * config/rs6000/rs6000-builtin.def (DIVES_V4SI, DIVES_V2DI,
>   DIVEU_V4SI, DIVEU_V2DI, DIVS_V4SI, DIVS_V2DI, DIVU_V4SI,
>   DIVU_V2DI, MODS_V2DI, MODS_V4SI, MODU_V2DI, MODU_V4SI,
>   MULHS_V2DI, MULHS_V4SI, MULHU_V2DI, MULHU_V4SI, MULLD_V2DI):
>   Add builtin define.
>   (MULH, DIVE, MOD):  Add new BU_P10_OVERLOAD_2 definitions.
>   * config/rs6000/rs6000-call.c (VSX_BUILTIN_VEC_DIV,
>   P10_BUILTIN_VEC_VDIVE, P10_BUILTIN_VEC_VMOD, 
> P10_BUILTIN_VEC_VMULH):
No mentions of these three P10_BUILTIN_VEC_* in patch below.


>   New overloaded definitions.
>   (builtin_function_type) [P10V_BUILTIN_DIVEU_V4SI,
>   P10V_BUILTIN_DIVEU_V2DI, P10V_BUILTIN_DIVU_V4SI,
>   P10V_BUILTIN_DIVU_V2DI, P10V_BUILTIN_MODU_V2DI,
>   P10V_BUILTIN_MODU_V4SI, P10V_BUILTIN_MULHU_V2DI,
>   P10V_BUILTIN_MULHU_V4SI, P10V_BUILTIN_MULLD_V2DI]: Add case
>   statement for builtins.
>   * config/rs6000/vsx.md (VIlong_char): Add define_mod_attribute.

just VIlong 
Maybe s/define_mod_attribute/define_mod_attr /  ? 

>   (UNSPEC_VDIVES, UNSPEC_VDIVEU): Add enum for UNSPECs.



>   (vsx_mul_v2di, vsx_udiv_v2di): Add if TARGET_POWER10 statement.

I don't see vsx_mul_v2di or vsx_udiv_v2di in the patch contexts, Looks
OK per a look at trunks vsx.md. 

>   (dives_, diveu_, div3, uvdiv3,
>   mods_, modu_, mulhs_, mulhu_,
> mulv2di3):
>   Add define_insn, mode is VIlong.
>   * doc/extend.texi (vec_mulh, vec_mul, vec_div, vec_dive,
> vec_mod): Add
>   builtin descriptions.
> 
> gcc/testsuite/
>   * gcc.target/powerpc/builtins-1-p10-runnable.c: New test file.
> ---
>  gcc/config/rs6000/altivec.h   |   5 +
>  gcc/config/rs6000/altivec.md  |   2 -
>  gcc/config/rs6000/rs6000-builtin.def  |  22 +
>  gcc/config/rs6000/rs6000-call.c   |  49 +++
>  gcc/config/rs6000/rs6000.md   |   3 +-
>  gcc/config/rs6000/vsx.md  | 213 +++---
>  gcc/doc/extend.texi   | 120 ++
>  .../powerpc/builtins-1-p10-runnable.c | 398
> ++
>  8 files changed, 759 insertions(+), 53 

Re: [PATCH v2] rs6000, vector integer multiply/divide/modulo instructions

2020-12-02 Thread will schmidt via Gcc-patches
On Tue, 2020-12-01 at 15:48 -0800, Carl Love via Gcc-patches wrote:
> Segher, Pat:
> 
> I have updated the patch to address the comments below.

In all the excitement, i've lost track of some of the details throughout the 
thread.  :-) 


Subject: Re: [PATCH v2] rs6000, vector integer multiply/divide/modulo 
instructions

This is at least now V3.


Given the number of changes, May be worth re-posting as a a clean [v3] version, 
etc..

> 
> On Wed, 2020-11-25 at 20:30 -0600, Segher Boessenkool wrote:
> > On Tue, Nov 24, 2020 at 08:34:51PM -0600, Pat Haugen wrote:
> > > On 11/24/20 8:17 PM, Pat Haugen via Gcc-patches wrote:
> > > > On 11/24/20 12:59 PM, Carl Love via Gcc-patches wrote:
> > > > > +(define_insn "modu_"
> > > > > +  [(set (match_operand:VIlong 0 "vsx_register_operand" "=v")
> > > > > + (umod:VIlong (match_operand:VIlong 1
> > > > > "vsx_register_operand" "v")
> > > > > +  (match_operand:VIlong 2
> > > > > "vsx_register_operand" "v")))]
> > > > > +  "TARGET_POWER10"
> > > > > +  "vmodu %0,%1,%2"
> > > > > +  [(set_attr "type" "vecdiv")
> > > > > +   (set_attr "size" "128")])
> > > > 
> > > > We should only be setting "size" "128" for instructions that
> > > > operate on scalar 128-bit data items (i.e. 'vdivesq' etc). Since
> > > > the above insns are either V2DI/V4SI (ala VIlong mode_iterator),
> > > > they shouldn't be marked as size 128. If you want to set the size
> > > > based on mode, (set_attr "size" "") should do the trick I
> > > > believe.
> > > 
> > > Well, after you update "(define_mode_attr bits" in rs6000.md for
> > > V2DI/V4SI.
> > 
> > So far,  was only used for scalars.  I agree that for vectors
> > it
> > makes most sense to do the element size (because the vector size
> > always
> > is 128 bits, and for scheduling the element size can matter).  But,
> > the
> > definitions of  and  now say
> > 
> > ;; What data size does this instruction work on?
> > ;; This is used for insert, mul and others as necessary.
> > (define_attr "size" "8,16,32,64,128" (const_string "32"))
> > 
> > and
> > 
> > ;; How many bits in this mode?
> > (define_mode_attr bits [(QI "8") (HI "16") (SI "32") (DI "64")
> >(SF "32") (DF "64")])
> > so those need a bit of update as well then :-)
> 
> I set the size based on the vector element size, extendeing the
> define_mode_attr bits definition.  Please take a look at the updated
> patch.  Hopefully I have this all correct.  Thanks.


Would be useful to include the patch descriptionm as a standalone paragraph 
here.  

I believe the first email in the thread contained this:

> GCC maintainers:
> 
> The following patch adds new builtins for the vector integer multiply,
> divide and modulo operations.  The builtins are:  
> vec_mulh(), vec_div(), vec_dive(), vec_mod() for signed and unsigned
> integers and long long integers.  Support for signed and unsigned long
> long integers the exiting vec_mul() is added.  Note that the existing
> support for the vec_div()and vec_mul() builtins emulate the vector
> operations with multiple scalar instructions.  This patch adds support
> for these builtins to use the new vector instructions.
> 

I don't see an updated in-between version. 
Nit: ".. exiting vec_mul() is added" doesn't read quite right.
First and last sentences there can probably be combined.





> 
> Note, I retested the updated patch on 
> 
>   powerpc64le-unknown-linux-gnu (Power 9 LE)
>   powerpc64le-unknown-linux-gnu (Power 10 LE)
> 
> Thanks for the help.
> 
>  Carl 
> 
> ---
> rs6000, vector integer multiply/divide/modulo instructions
> 
> 2020-12-01  Carl Love  
> 
> gcc/
>   * config/rs6000/altivec.h (vec_mulh, vec_div, vec_dive, vec_mod): New
>   defines.
>   * config/rs6000/altivec.md (VIlong): Move define to file vsx.md.
>   * config/rs6000/rs6000-builtin.def (DIVES_V4SI, DIVES_V2DI,
>   DIVEU_V4SI, DIVEU_V2DI, DIVS_V4SI, DIVS_V2DI, DIVU_V4SI,
>   DIVU_V2DI, MODS_V2DI, MODS_V4SI, MODU_V2DI, MODU_V4SI,
>   MULHS_V2DI, MULHS_V4SI, MULHU_V2DI, MULHU_V4SI, MULLD_V2DI):
>   Add builtin define.
>   (MULH, DIVE, MOD):  Add new BU_P10_OVERLOAD_2 definitions.
>   * config/rs6000/rs6000-call.c (VSX_BUILTIN_VEC_DIV,
>   P10_BUILTIN_VEC_VDIVE, P10_BUILTIN_VEC_VMOD, P10_BUILTIN_VEC_VMULH):
>   New overloaded definitions.
>   (builtin_function_type) [P10V_BUILTIN_DIVEU_V4SI,
>   P10V_BUILTIN_DIVEU_V2DI, P10V_BUILTIN_DIVU_V4SI,
>   P10V_BUILTIN_DIVU_V2DI, P10V_BUILTIN_MODU_V2DI,
>   P10V_BUILTIN_MODU_V4SI, P10V_BUILTIN_MULHU_V2DI,
>   P10V_BUILTIN_MULHU_V4SI, P10V_BUILTIN_MULLD_V2DI]: Add case
>   statement for builtins.
>   * config/rs6000/vsx.md (VIlong_char): Add define_mod_attribute.
>   (UNSPEC_VDIVES, UNSPEC_VDIVEU): Add enum for UNSPECs.
>   (vsx_mul_v2di, vsx_udiv_v2di): Add if TARGET_POWER10 statement.
>   (dives_, 

Re: [PATCH] rs6000: Use subreg for QI/HI vector init

2020-12-02 Thread will schmidt via Gcc-patches
On Wed, 2020-12-02 at 17:44 +0800, Kewen.Lin via Gcc-patches wrote:
> Hi,
> 
> This patch is to use paradoxical subreg instead of
> zero_extend for promoting QI/HI to SI/DI when we
> want to construct one vector with these modes.
> Since we do the gpr->vsx movement and vector merge
> or pack later, the high part is useless and safe to
> use paradoxical subreg.  It can avoid useless rlwinms
> generated for signed cases.
> 
> Bootstrapped/regtested on powerpc64le-linux-gnu P9.
> 
> Is it ok for trunk?

Mostly cosmetic review.  comments sprinkled below.
thanks
-Will

> 
> BR,
> Kewen
> --
> gcc/ChangeLog:
> 
>   * config/rs6000/rs6000.c (rs6000_expand_vector_init): Use
>   paradoxical subreg instead of zero_extend for QI/HI promotion
>   when doing QI/HI vector init.

A bit long, but OK with me. :-)

> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/powerpc/pr96933-1.c: Adjusted to check no rlwinm.
>   * gcc.target/powerpc/pr96933-2.c: Likewise.

Ok.  (I'd hope a few more extend instructions would be eliminated, but
this only covers the tests that explicitly looked/didn't look for them, so OK).



> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index f33fca3982a..9c084b055b8 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -6793,17 +6793,8 @@ rs6000_expand_vector_init (rtx target, rtx vals)

I note that the code changes that follow here are within the code block 

  if (TARGET_DIRECT_MOVE && (mode == V16QImode || mode == V8HImode))
{

This is implied per the patch description, but not obvious from the context of 
the changes here.   (OK).


>/* Force the values into word_mode registers.  */
>for (i = 0; i < n_elts; i++)
> {
> - rtx tmp = force_reg (GET_MODE_INNER (mode), XVECEXP (vals, 0, i));
> - if (TARGET_POWERPC64)
> -   {
> - op[i] = gen_reg_rtx (DImode);
> - emit_insn (gen_zero_extendqidi2 (op[i], tmp));
> -   }
> - else
> -   {
> - op[i] = gen_reg_rtx (SImode);
> - emit_insn (gen_zero_extendqisi2 (op[i], tmp));
> -   }
> + rtx tmp = force_reg (inner_mode, XVECEXP (vals, 0, i));
> + op[i] = simplify_gen_subreg (Pmode, tmp, inner_mode, 0);
> }
> 
>/* Take unsigned char big endianness on 64bit as example for below
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr96933-1.c 
> b/gcc/testsuite/gcc.target/powerpc/pr96933-1.c
> index 3b63865b3b8..71d72084413 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr96933-1.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr96933-1.c
> @@ -13,3 +13,4 @@
>  /* { dg-final { scan-assembler-times {\mvpkudum\M} 12 } } */
>  /* { dg-final { scan-assembler-not {\mstb\M} } } */
>  /* { dg-final { scan-assembler-not {\msth\M} } } */
> +/* { dg-final { scan-assembler-not {\mrlwinm\M} } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr96933-2.c 
> b/gcc/testsuite/gcc.target/powerpc/pr96933-2.c
> index cef8fbd4f35..9fa15125d8d 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr96933-2.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr96933-2.c
> @@ -13,3 +13,4 @@
>  /* { dg-final { scan-assembler-times {\mxxpermdi\M} 4 } } */
>  /* { dg-final { scan-assembler-not {\mstb\M} } } */
>  /* { dg-final { scan-assembler-not {\msth\M} } } */
> +/* { dg-final { scan-assembler-not {\mrlwinm\M} } } */

Ok.
Thanks
-Will





Re: [PATCH] PowerPC: Restrict long double test to use IBM long double.

2020-11-18 Thread will schmidt via Gcc-patches
On Wed, 2020-11-18 at 01:03 -0500, Michael Meissner wrote:
> On Tue, Nov 17, 2020 at 11:33:29PM -0600, will schmidt wrote:
> > On Sun, 2020-11-15 at 12:23 -0500, Michael Meissner via Gcc-patches 
> > wrote:
> > > PowerPC: Restrict long double test to use IBM long double.
> > > 
> > > I posted this patch previously as a set of 3 testsuite
> > > patches.  I have
> > > separated them into separate patches.  This patch marks the
> > > convert-bfp-11.c
> > > patch as needing IBM extended double.  If you look at the code,
> > > it is
> > > specifically designed around testing the limits of the IBM 128-
> > > bit extended
> > > double representation.  I added a new target-supports that says
> > > the test
> > > requires IBM extended long double, and changed the test to
> > > require this
> > > effective test.  Can I check this into the master branch?
> > 
> > 
> > It's harder to review that without all the history handy here.
> > 
> > This will stand alone better if you lead with what you are adding
> > and
> > keep it clean.  i.e.
> 
> The patch I was referring to was posted on October 22nd:
> https://gcc.gnu.org/pipermail/gcc-patches/2020-October/556865.html
> 
> > Subject: PowerPC: Add ppc_long_double_ibm effective-target check
> > 
> > "Add a ppc_long_double_ibm dg-require-effective-target check to
> > ensure
> > tests that require LONG_DOUBLE_IBM128 . "
> > An additional statement to clarify it's relationship with
> > I128
> > wouldn't  hurt if that is the case.  i.e. 
> > "This is a counterpart to LONG_DOUBLE_IEEE 128 " 
> 
> At the moment, we don't need a target supports for long double IEEE
> 128-bit or
> long double 64-bit.  I can add them if needed.

I would probably add one for each of the three so you have the complete
picture of what is going on.

> 
> > Hmm, I have those backwards in my head apparently.  Can the return
> > 1 if
> > not-defined logic be flattened out so we see the direct
> > relationship?
> 
> I'm not sure what you are asking.  These are preprocessor macros that
> are only
> defined in certain cases.  And remember this is main returning a
> value, so
> returning 0 is true and 1 is false.
> 
> In particular:
> 

This:

> If your long double is 128-bits and uses the IEEE 128-bit
> representation, the
> following macros are defined:
> 
>   __LONG_DOUBLE_128__
>   __LONG_DOUBLE_IEEE128__
> 
> If your long double is 128-bit and uses the IBM 128-bit
> representation (current
> default0, the following macros are defined:
> 
>   __LONG_DOUBLE_128__
>   __LONG_DOUBLE_IBM128__
> 
> If your long double is 64 bits, neither of those two macros are
> defined.
> 

.. clearly defines what is going on, and would be good to add as a
comment in/around where the checks are defined.

Thats my perspective of course,...  :-)
thanks
-Will




Re: [PATCH] Include math.h in nextafter-2.c test.

2020-11-18 Thread will schmidt via Gcc-patches
On Wed, 2020-11-18 at 00:55 -0500, Michael Meissner wrote:
> On Tue, Nov 17, 2020 at 11:33:23PM -0600, will schmidt wrote:
> > On Sun, 2020-11-15 at 12:12 -0500, Michael Meissner via Gcc-patches 
> > wrote:
> > > Include math.h in nextafter-2.c test.
> > > 
> > > I previously posted this with two other patches.  I've separated
> > > this into its
> > > own patch.  What happens is because the nextafter-2.c test uses
> > > -fno-builtin,
> > > and it does not include math.h, the wrong nextafterl and
> > > nextforwardl gets
> > > called when long double is not IBM 128-bit (i.e. either 64-bit,
> > > or IEEE
> > > 128-bit).
> > 
> > Thats a sandbox issue, or something upstream ?
> 
> I'm not sure what you are asking.  If you install the three critical
> IEEE
> 128-bit long double patches, and then configure a build with long
> double
> defaulting to IEEE 128-bit, the nextafter-2 test will fail.

That answers my question.. this fixes an issue with patches that are
not upstream yet.  (your sandbox). 

> 
> The reason is the nextafterl function in GLIBC assumes long double is
> IBM
> 128-bit extended double.  The __builtin_nextafterl function calls
> that
> function.
> 
> If you compile it normally (with long double using IEEE 128-bit), the
> compiler
> will automatically map nextafterl to __nextafterieee128.
> 
> Similarly if you include math.h, and use the -fno-builtin option, the
> math.h
> library will still map nextafterl into __nextafterieee128, and the
> compiler
> will call it.
> 
> However, if you do not include math.h and use the -fno-builtin
> option, the
> compiler will call nextafterl, and get the wrong results, because the
> wrong
> function was called.
> 
> What I meant in terms of the 3 patches being separated, the last time
> I posted
> a patch for this problem, I grouped together 3 test suite failures
> into one
> patch.  This time, I separated the cases into 3 separate patches
> (this one, the
> fix for pr70117, and the fix for the decimal conversion test).
> 
> > > 
> > > Rather than add the include only for the PowerPC, I thought it
> > > was better to
> > > always include it.  There might be some port in the future that
> > > has the same
> > > issue with multiple long double types without using multilibs.
> > > 
> > > Can I check this into the master branch.
> > > 
> > > 2020-11-15  Michael Meissner  
> > > 
> > >   * gcc.dg/nextafter-2.c: Include math.h.
> > > ---
> > >  gcc/testsuite/gcc.dg/nextafter-2.c | 12 
> > >  1 file changed, 12 insertions(+)
> > > 
> > > diff --git a/gcc/testsuite/gcc.dg/nextafter-2.c
> > > b/gcc/testsuite/gcc.dg/nextafter-2.c
> > > index e51ae94be0c..8149a709fa5 100644
> > > --- a/gcc/testsuite/gcc.dg/nextafter-2.c
> > > +++ b/gcc/testsuite/gcc.dg/nextafter-2.c
> > > @@ -6,6 +6,18 @@
> > > 
> > >  #include 
> > > 
> > > +/* In order to run on systems like the PowerPC that have 3
> > > different long
> > > +   double types, include math.h so it can choose what is the
> > > appropriate
> > > +   nextafterl function to use.
> > > +
> > > +   If we didn't use -fno-builtin for this test, the PowerPC
> > > compiler would have
> > > +   changed the names of the built-in functions that use long
> > > double.  The
> > > +   nextafter-1.c function runs with this mapping.
> > > +
> > > +   Since this test uses -fno-builtin, include math.h, so that
> > > math.h can make
> > > +   the appropriate choice to use.  */
> > 
> > 
> > 
> > Can this be simplified to stl
> > 
> > /* Include math.h so that systems like PowerPC that have different
> > long
> > double types can choose the appropriate nextafterl function to
> > use.  */
> > 
> > 
> > > +#include 
> > > +
> > >  #if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
> > >  # if !__GLIBC_PREREQ (2, 24)
> > >  /* Workaround buggy nextafterl in glibc 2.23 and earlier,
> > > -- 
> > > 2.22.0
> > > 
> > > 
> 
> Sure, the comment is just trying to explain why math.h needs to be
> included.

Ok.   Your first paragraph in the comment clarifies that.  I'm
uncertain the rest of the comment helps, but i'll defer. 
Thanks. 

> 



Re: [PATCH] PowerPC Fix ibm128 defaults for pr70117.c test.

2020-11-17 Thread will schmidt via Gcc-patches
On Sun, 2020-11-15 at 12:17 -0500, Michael Meissner via Gcc-patches wrote:
> From 698d9fd8a5701fa4ed9690ddf71d57765921778c Mon Sep 17 00:00:00 2001
> From: Michael Meissner 
> Date: Sun, 15 Nov 2020 00:48:23 -0500
> Subject: [PATCH] PowerPC Fix ibm128 defaults for pr70117.c test.
> 
> This patch was previously posted as a combined patch with 2 other testsuite
> patches.  I moved it to a separate patch.

I don't see that thread.  Either really old or differently named ?

> 
> This patch fixes up a failure that I saw when I built a compiler with the long
> double default set to IEEE 128-bit instead of IBM 128-bit.  Now compilers with
> either 128-bit long double default pass this test.  Can I check this into the
> master branch?

sandbox or upstream failure?
Perhaps stands alone better as 
" This patch updates the pr70177.c testcase to define IBM128_MAX as
appropriate for the IBM 128 or IEEE 128 type that is currently in use."
?


> 
> gcc/testsuite/
> 2020-11-15  Michael Meissner  
> 
>   PR target/70117
>   * gcc.target/powerpc/pr70117.c: Add support for long double being
>   IEEE 128-bit.
> ---
>  gcc/testsuite/gcc.target/powerpc/pr70117.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr70117.c 
> b/gcc/testsuite/gcc.target/powerpc/pr70117.c
> index 3bbd2c595e0..928efe39c7b 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr70117.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr70117.c
> @@ -9,9 +9,11 @@
> 128-bit floating point, because the type is not enabled on those
> systems.  */
>  #define LDOUBLE __ibm128
> +#define IBM128_MAX ((__ibm128) 1.79769313486231580793728971405301199e+308L)
> 
>  #elif defined(__LONG_DOUBLE_IBM128__)
>  #define LDOUBLE long double
> +#define IBM128_MAX LDBL_MAX
> 


>  #else
>  #error "long double must be either IBM 128-bit or IEEE 128-bit"
> @@ -75,10 +77,10 @@ main (void)
>if (__builtin_isnormal (ld))
>  __builtin_abort ();
> 
> -  ld = LDBL_MAX;
> +  ld = IBM128_MAX;
>if (!__builtin_isnormal (ld))
>  __builtin_abort ();
> -  ld = -LDBL_MAX;
> +  ld = -IBM128_MAX;
>if (!__builtin_isnormal (ld))
>  __builtin_abort ();
> 
> -- 
> 2.22.0
> 
> 



Re: [PATCH] Include math.h in nextafter-2.c test.

2020-11-17 Thread will schmidt via Gcc-patches
On Sun, 2020-11-15 at 12:12 -0500, Michael Meissner via Gcc-patches wrote:
> Include math.h in nextafter-2.c test.
> 
> I previously posted this with two other patches.  I've separated this into its
> own patch.  What happens is because the nextafter-2.c test uses -fno-builtin,
> and it does not include math.h, the wrong nextafterl and nextforwardl gets
> called when long double is not IBM 128-bit (i.e. either 64-bit, or IEEE
> 128-bit).

Thats a sandbox issue, or something upstream ?

> 
> Rather than add the include only for the PowerPC, I thought it was better to
> always include it.  There might be some port in the future that has the same
> issue with multiple long double types without using multilibs.
> 
> Can I check this into the master branch.
> 
> 2020-11-15  Michael Meissner  
> 
>   * gcc.dg/nextafter-2.c: Include math.h.
> ---
>  gcc/testsuite/gcc.dg/nextafter-2.c | 12 
>  1 file changed, 12 insertions(+)
> 
> diff --git a/gcc/testsuite/gcc.dg/nextafter-2.c 
> b/gcc/testsuite/gcc.dg/nextafter-2.c
> index e51ae94be0c..8149a709fa5 100644
> --- a/gcc/testsuite/gcc.dg/nextafter-2.c
> +++ b/gcc/testsuite/gcc.dg/nextafter-2.c
> @@ -6,6 +6,18 @@
> 
>  #include 
> 
> +/* In order to run on systems like the PowerPC that have 3 different long
> +   double types, include math.h so it can choose what is the appropriate
> +   nextafterl function to use.
> +
> +   If we didn't use -fno-builtin for this test, the PowerPC compiler would 
> have
> +   changed the names of the built-in functions that use long double.  The
> +   nextafter-1.c function runs with this mapping.
> +
> +   Since this test uses -fno-builtin, include math.h, so that math.h can make
> +   the appropriate choice to use.  */



Can this be simplified to stl

/* Include math.h so that systems like PowerPC that have different long
double types can choose the appropriate nextafterl function to use.  */


> +#include 
> +
>  #if defined(__GLIBC__) && defined(__GLIBC_PREREQ)
>  # if !__GLIBC_PREREQ (2, 24)
>  /* Workaround buggy nextafterl in glibc 2.23 and earlier,
> -- 
> 2.22.0
> 
> 



Re: [PATCH] PowerPC: Restrict long double test to use IBM long double.

2020-11-17 Thread will schmidt via Gcc-patches
On Sun, 2020-11-15 at 12:23 -0500, Michael Meissner via Gcc-patches wrote:
> PowerPC: Restrict long double test to use IBM long double.
> 
> I posted this patch previously as a set of 3 testsuite patches.  I have
> separated them into separate patches.  This patch marks the convert-bfp-11.c
> patch as needing IBM extended double.  If you look at the code, it is
> specifically designed around testing the limits of the IBM 128-bit extended
> double representation.  I added a new target-supports that says the test
> requires IBM extended long double, and changed the test to require this
> effective test.  Can I check this into the master branch?


It's harder to review that without all the history handy here.

This will stand alone better if you lead with what you are adding and
keep it clean.  i.e.

Subject: PowerPC: Add ppc_long_double_ibm effective-target check

"Add a ppc_long_double_ibm dg-require-effective-target check to ensure
tests that require LONG_DOUBLE_IBM128 . "
An additional statement to clarify it's relationship with I128
wouldn't  hurt if that is the case.  i.e. 
"This is a counterpart to LONG_DOUBLE_IEEE 128 " 


> 
> gcc/testsuite/
> 2020-11-15  Michael Meissner  
> 
>   * c-c++-common/dfp/convert-bfp-11.c: Require IBM 128-bit long
>   double.
>   * lib/target-supports.exp (check_ppc_long_double_ibm): New
>   function.
>   (is-effective-target): Add ppc_long_double_ibm.




> ---
>  .../c-c++-common/dfp/convert-bfp-11.c |  1 +
>  gcc/testsuite/lib/target-supports.exp | 19 +++
>  2 files changed, 20 insertions(+)
> 
> diff --git a/gcc/testsuite/c-c++-common/dfp/convert-bfp-11.c 
> b/gcc/testsuite/c-c++-common/dfp/convert-bfp-11.c
> index 95c433d2c24..87f6716afb3 100644
> --- a/gcc/testsuite/c-c++-common/dfp/convert-bfp-11.c
> +++ b/gcc/testsuite/c-c++-common/dfp/convert-bfp-11.c
> @@ -1,4 +1,5 @@
>  /* { dg-skip-if "" { ! "powerpc*-*-linux*" } } */
> +/* { dg-require-effective-target ppc_long_double_ibm } */
> 
>  /* Test decimal float conversions to and from IBM 128-bit long double. 
> Checks are skipped at runtime if long double is not 128 bits.

ok

> diff --git a/gcc/testsuite/lib/target-supports.exp 
> b/gcc/testsuite/lib/target-supports.exp
> index ceee78c26a9..dc1100ba96c 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -2336,6 +2336,24 @@ proc check_effective_target_ppc_ieee128_ok { } {
>  }]
>  }
> 
> +# Return 1 if the target is a powerpc with the long double format uses the 
> IBM
> +# extended double format.
> +
> +proc check_ppc_long_double_ibm { } {
> +return [check_cached_effective_target ppc_long_double_ibm {
> + check_runtime_nocache ppc_long_double_ibm {
> + int main()
> + {
> +   #ifndef __LONG_DOUBLE_IBM128__
> + return 1;


Hmm, I have those backwards in my head apparently.  Can the return 1 if
not-defined logic be flattened out so we see the direct relationship?


> +   #else
> + return 0;
> +   #endif
> + }
> + }
> +}]
> +}




> +
>  # Return 1 if the target supports executing VSX instructions, 0
>  # otherwise.  Cache the result.
> 
> @@ -7939,6 +7957,7 @@ proc is-effective-target { arg } {
> "power10_hw" { set selected [check_power10_hw_available] }
> "ppc_float128_sw" { set selected [check_ppc_float128_sw_available] }
> "ppc_float128_hw" { set selected [check_ppc_float128_hw_available] }
> +   "ppc_long_double_ibm" { set selected [check_ppc_long_double_ibm] }
> "ppc_recip_hw"   { set selected [check_ppc_recip_hw_available] }
> "ppc_cpu_supports_hw" { set selected 
> [check_ppc_cpu_supports_hw_available] }
> "ppc_mma_hw" { set selected [check_ppc_mma_hw_available] }
> -- 
> 2.22.0
> 
> 



Re: [PATCH, rs6000] Add Power10 scheduling description

2020-11-17 Thread will schmidt via Gcc-patches
On Fri, 2020-11-13 at 16:04 -0600, Pat Haugen via Gcc-patches wrote:
> Add Power10 scheduling description.
> 
> This patch adds the Power10 scheduling description. Since power10.md
> was pretty much a complete rewrite (existing version of power10.md is
> mostly just a copy of power9.md), I diffed power10.md with /dev/null
> so that the full contents of the file are shown as opposed to a diff.
> This should make it easier to read. This patch will not apply on
> current trunk do to that reason.
> 
> Bootstrap/regtest on powerpc64le (Power8/Power10) with no new
> regressions. Ok for trunk?
> 
> -Pat


(reviewing in 2 parts, another follows tnis one with bulk of the
attachment inline..)


> 
> 
> 2020-11-13  Pat Haugen  
> 
> gcc/
>   * config/rs6000/rs6000.c (struct processor_costs): New.

Should that add/reference the "power10_cost" structure itself?

>   (rs6000_option_override_internal): Set Power10 costs.

ok


>   (rs6000_issue_rate): Set Power10 issue rate.

ok


>   * config/rs6000/power10.md: Rewrite for Power10.
> 

+;; Copyright (C) 2020-2020 Free Software Foundation, Inc.

Nit: Can probably just be a single 2020 reference.  :-)


thanks,
-Will



Re: [PATCH, rs6000] Add Power10 scheduling description

2020-11-17 Thread will schmidt via Gcc-patches
On Fri, 2020-11-13 at 16:04 -0600, Pat Haugen via Gcc-patches wrote:
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 4d528a39a37..85bb42d6dce 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -1080,6 +1080,26 @@ struct processor_costs power9_cost = {
>COSTS_N_INSNS (3), /* SF->DF convert */
>  };
>  
> +/* Instruction costs on POWER10 processors.  */
> +static const
> +struct processor_costs power10_cost = {
> +  COSTS_N_INSNS (1), /* mulsi */
> +  COSTS_N_INSNS (1), /* mulsi_const */
> +  COSTS_N_INSNS (1), /* mulsi_const9 */
> +  COSTS_N_INSNS (1), /* muldi */
> +  COSTS_N_INSNS (4), /* divsi */
> +  COSTS_N_INSNS (4), /* divdi */
> +  COSTS_N_INSNS (2), /* fp */
> +  COSTS_N_INSNS (2), /* dmul */
> +  COSTS_N_INSNS (7), /* sdiv */
> +  COSTS_N_INSNS (9), /* ddiv */
> +  128,   /* cache line size */
> +  32,/* l1 cache */
> +  512,   /* l2 cache */
> +  16,/* prefetch streams */
> +  COSTS_N_INSNS (2), /* SF->DF convert */
> +};
> +


ok

>  /* Instruction costs on POWER A2 processors.  */
>  static const
>  struct processor_costs ppca2_cost = {
> @@ -4734,10 +4754,13 @@ rs6000_option_override_internal (bool global_init_p)
>   break;
>  
>case PROCESSOR_POWER9:
> -  case PROCESSOR_POWER10:
>   rs6000_cost = _cost;
>   break;
>  
> +  case PROCESSOR_POWER10:
> + rs6000_cost = _cost;
> + break;
> +
>case PROCESSOR_PPCA2:
>   rs6000_cost = _cost;
>   break;
> @@ -18001,8 +18024,9 @@ rs6000_issue_rate (void)
>case PROCESSOR_POWER8:
>  return 7;
>case PROCESSOR_POWER9:
> -  case PROCESSOR_POWER10:
>  return 6;
> +  case PROCESSOR_POWER10:
> +return 8;
>default:
>  return 1;
>}

ok


> diff --git a/gcc/config/rs6000/power10.md b/gcc/config/rs6000/power10.md
> new file mode 100644
> index 000..f9ca4cbf10e
> --- /dev/null
> +++ b/gcc/config/rs6000/power10.md
> @@ -0,0 +1,553 @@
> +;; Scheduling description for the IBM POWER10 processor.
> +;; Copyright (C) 2020-2020 Free Software Foundation, Inc.
> +;;
> +;; Contributed by Pat Haugen (pthau...@us.ibm.com).
> +
> +;; This file is part of GCC.
> +;;
> +;; GCC is free software; you can redistribute it and/or modify it
> +;; under the terms of the GNU General Public License as published
> +;; by the Free Software Foundation; either version 3, or (at your
> +;; option) any later version.
> +;;
> +;; GCC is distributed in the hope that it will be useful, but WITHOUT
> +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
> +;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
> +;; License for more details.
> +;;
> +;; You should have received a copy of the GNU General Public License
> +;; along with GCC; see the file COPYING3.  If not see
> +;; .
> +
> +; For Power10 we model (and try to pack) the in-order decode/dispatch groups
> +; which consist of 8 instructions max.  We do not try to model the details of
> +; the out-of-order issue queues and how insns flow to the various execution
> +; units except for the simple representation of the issue limitation of at
> +; most 4 insns to the execution units/2 insns to the load units/2 insns to
> +; the store units.
> +(define_automaton "power10dsp,power10issue,power10div")
> +
> +; Decode/dispatch slots
> +(define_cpu_unit "du0_power10,du1_power10,du2_power10,du3_power10,
> +   du4_power10,du5_power10,du6_power10,du7_power10" "power10dsp")
> +
> +; Four execution units
> +(define_cpu_unit "exu0_power10,exu1_power10,exu2_power10,exu3_power10"
> +  "power10issue")
> +; Two load units and two store units
> +(define_cpu_unit "lu0_power10,lu1_power10" "power10issue")
> +(define_cpu_unit "stu0_power10,stu1_power10" "power10issue")
> +; Create false units for use by non-pipelined div/sqrt
> +(define_cpu_unit "fx_div0_power10,fx_div1_power10" "power10div")
> +(define_cpu_unit "fp_div0_power10,fp_div1_power10,fp_div2_power10,
> +   fp_div3_power10" "power10div")

The spacing catches my eye, I'd want to add spaces around those commas,
etc.   But.. this appears to be consistent with behavior
as seen in the
existing power9.md ; power9.md ; etc. 
So it's either this way per necessity, or this way per history.
Either way, no change requested here given that precedence.
(If this and
the older stuff also needs to be cosmetically tweaked, that can be
handled later on..)


> +
> +
> +; Dispatch slots are allocated in order conforming to program order.
> +(absence_set "du0_power10" "du1_power10,du2_power10,du3_power10,du4_power10,\
> +  du5_power10,du6_power10,du7_power10")
> +(absence_set "du1_power10" "du2_power10,du3_power10,du4_power10,du5_power10,\
> +  du6_power10,du7_power10")
> +(absence_set "du2_power10" "du3_power10,du4_power10,du5_power10,du6_power10,\
> +   

Re: [PATCH, rs6000] Update instruction attributes for Power10

2020-11-06 Thread will schmidt via Gcc-patches
On Fri, 2020-11-06 at 10:46 -0600, Pat Haugen wrote:
> On 11/5/20 4:32 PM, will schmidt wrote:
> > On Wed, 2020-11-04 at 14:42 -0600, Pat Haugen via Gcc-patches
> > wrote:
> > >   * config/rs6000/rs6000.c (rs6000_final_prescan_insn): Only add
> > > 'p' for
> > >   PREFIXED_YES.
> > 
> > The code change reads as roughly 
> > - next_insn_prefixed_p != PREFIXED_NO
> > 
> > + next_insn_prefixed_p == PREFIXED_YES"
> > 
> > So just an inversion of the logic? I don't obviously see the 'p'
> > impact
> > there.
> > 
> 
> It's no longer an inversion of the logic since I added a
> PREFIXED_ALWAYS value. 'next_insn_prefixed' is used by
> rs6000_final_prescan_insn() to determine whether an insn mnemonic
> needs a 'p' prefix. We want it set for PREFIXED_YES, but not for
> PREFIXED_NO or PREFIXED_ALWAYS.

Ok.  So the next_insn_prefixed_p indicates whether the instruction
has/gets/needs a p prefix.  gotcha.  thanks for clarifying.  :-)

thanks
-will


> 
> > 
> > >   * config/rs6000/rs6000.md (define_attr "size"): Add 256.
> > >   (define_attr "prefixed"): Add 'always'.
> > >   (define_mode_attr bits): Add DD/TD modes.
> > >   (cfuged, cntlzdm, cnttzdm, pdepd, pextd, bswaphi2_reg,
> > > bswapsi2_reg,
> > >   bswapdi2_brd, setbc_signed_,
> > >   *setbcr_signed_, *setnbc_signed_,
> > >   *setnbcr_signed_): Update instruction attributes
> > > for
> > >   Power10.
> > 
> > ok.  (assuming the assorted 'integer' -> 'crypto' changes are
> > correct,
> > of course).  
> > 
> 
> Yes, crypto represents the correct pipe the insns are executed on.
> 
> Thanks for the review,
> Pat
> 



Re: [PATCH,rs6000] Add patterns for combine to support p10 fusion

2020-11-05 Thread will schmidt via Gcc-patches
On Wed, 2020-11-04 at 12:12 -0600, Aaron Sawdey via Gcc-patches wrote:
> Ping.
> 
> Aaron Sawdey, Ph.D. saw...@linux.ibm.com
> IBM Linux on POWER Toolchain
>  
> 
> > On Oct 26, 2020, at 4:44 PM, acsaw...@linux.ibm.com wrote:
> > 
> > From: Aaron Sawdey 
> > 

Hi, 

> > This patch adds the first couple patterns to support p10 fusion. These
> > will allow combine to create a single insn for a pair of instructions
> > that that power10 can fuse and execute. These particular ones have the

that the power10

s/particular ones/particular insns/ 

> > requirement that only cr0 can be used when fusing a load with a compare
> > immediate of -1/0/1, so we want combine to put that requirement in, and
> > if it doesn't work out later the splitter can get used.
> > 
> > This also adds option -mpower10-fusion which defaults on for power10 and
> > will gate all these fusion patterns. In addition I have added an
> > undocumented option -mpower10-fusion-ld-cmpi (which may be removed later)
> > that just controls the load+compare-immediate patterns.

ok

> >  I have make

made

> > these default on for power10 but they are not disallowed for earlier

to on

> > processors because it is still valid code. This allows us to test the
> > correctness of fusion code generation by turning it on explicitly.
> > 
> > The intention is to work through more patterns of this style to support
> > the rest of the power10 fusion pairs.
> > 
> > Bootstrap and regtest looks good on ppc64le power9 with these patterns
> > enabled in stage2/stage3 and for regtest. Ok for trunk?
> > 
> > gcc/ChangeLog:
> > 
> > * config/rs6000/predicates.md: Add const_me_to_1_operand.
> > * config/rs6000/rs6000-cpus.def: Add OPTION_MASK_P10_FUSION and
> > OPTION_MASK_P10_FUSION_LD_CMPI to ISA_3_1_MASKS_SERVER.

to ... and OTHER_P9_VECTOR_MASKS

> > * config/rs6000/rs6000-protos.h (address_ok_for_form): Add
> > prototype.



> > * config/rs6000/rs6000.c (rs6000_option_override_internal):
> > automatically set -mpower10-fusion and -mpower10-fusion-ld-cmpi
> > if target is power10.  (rs600_opt_masks): Allow -mpower10-fusion
> > in function attributes.  (address_ok_for_form): New function.

ok


> > * config/rs6000/rs6000.h: Add MASK_P10_FUSION.

> > * config/rs6000/rs6000.md (*ld_cmpi_cr0): New
> > define_insn_and_split.
> > (*lwa_cmpdi_cr0): New define_insn_and_split.
> > (*lwa_cmpwi_cr0): New define_insn_and_split.


> > * config/rs6000/rs6000.opt: Add -mpower10-fusion
> > and -mpower10-fusion-ld-cmpi.
> > ---
> > gcc/config/rs6000/predicates.md   |  5 +++
> > gcc/config/rs6000/rs6000-cpus.def |  6 ++-
> > gcc/config/rs6000/rs6000-protos.h |  2 +
> > gcc/config/rs6000/rs6000.c| 34 
> > gcc/config/rs6000/rs6000.h|  1 +
> > gcc/config/rs6000/rs6000.md   | 68 +++
> > gcc/config/rs6000/rs6000.opt  |  8 
> > 7 files changed, 123 insertions(+), 1 deletion(-)
> > 
> > diff --git a/gcc/config/rs6000/predicates.md 
> > b/gcc/config/rs6000/predicates.md
> > index 4c2fe7fa312..b75c1ddfb69 100644
> > --- a/gcc/config/rs6000/predicates.md
> > +++ b/gcc/config/rs6000/predicates.md
> > @@ -297,6 +297,11 @@ (define_predicate "const_0_to_1_operand"
> >   (and (match_code "const_int")
> >(match_test "IN_RANGE (INTVAL (op), 0, 1)")))
> > 
> > +;; Match op = -1, op = 0, or op = 1.
> > +(define_predicate "const_m1_to_1_operand"
> > +  (and (match_code "const_int")
> > +   (match_test "IN_RANGE (INTVAL (op), -1, 1)")))
> > +
> > ;; Match op = 0..3.
> > (define_predicate "const_0_to_3_operand"
> >   (and (match_code "const_int")

ok

> > diff --git a/gcc/config/rs6000/rs6000-cpus.def 
> > b/gcc/config/rs6000/rs6000-cpus.def
> > index 8d2c1ffd6cf..3e65289d8df 100644
> > --- a/gcc/config/rs6000/rs6000-cpus.def
> > +++ b/gcc/config/rs6000/rs6000-cpus.def
> > @@ -82,7 +82,9 @@
> > 
> > #define ISA_3_1_MASKS_SERVER(ISA_3_0_MASKS_SERVER   
> > \
> >  | OPTION_MASK_POWER10  \
> > -| OTHER_POWER10_MASKS)
> > +| OTHER_POWER10_MASKS  \
> > +| OPTION_MASK_P10_FUSION   \
> > +| OPTION_MASK_P10_FUSION_LD_CMPI)
> > 
> > /* Flags that need to be turned off if -mno-power9-vector.  */
> > #define OTHER_P9_VECTOR_MASKS   (OPTION_MASK_FLOAT128_HW
> > \
> > @@ -129,6 +131,8 @@
> >  | OPTION_MASK_FLOAT128_KEYWORD \
> >  | OPTION_MASK_FPRND\
> >  | OPTION_MASK_POWER10  \
> > +| OPTION_MASK_P10_FUSION   \
> > +| OPTION_MASK_P10_FUSION_LD_CMPI   \
> >  | 

Re: [PATCH, rs6000] Update instruction attributes for Power10

2020-11-05 Thread will schmidt via Gcc-patches
On Wed, 2020-11-04 at 14:42 -0600, Pat Haugen via Gcc-patches wrote:
> Update instruction attributes for Power10.
> 
> 
> This patch updates the type/prefixed/dot/size attributes for various new 
> instructions (and a couple existing that were incorrect) in preparation for 
> the Power10 scheduling patch that will be following.
> 
> Bootstrap/regtest on powerpc64le (Power8/Power10) with no new regressions. Ok 
> for trunk?
> 
> -Pat
> 
> 
> 2020-11-04  Pat Haugen  
> 
> gcc/
>   * config/rs6000/altivec.md (vsdb_, xxspltiw_v4si,
>   xxspltiw_v4sf_inst, xxspltidp_v2df_inst, xxsplti32dx_v4si_inst,
>   xxsplti32dx_v4sf_inst, xxblend_, xxpermx_inst,
>   vstrir_code_, vstrir_p_code_, vstril_code_,
>   vstril_p_code_, altivec_lvsl_reg, altivec_lvsl_direct,
>   altivec_lvsr_reg, altivec_lvsr_direct, xxeval, vcfuged, vclzdm,
>   vctzdm, vpdepd, vpextd, vgnb, vclrlb, vclrrb): Update instruction
>   attributes for Power10.
>   * config/rs6000/dfp.md (extendddtd2, trunctddd2, *cmp_internal1,
>   floatditd2, ftrunc2, fixdi2, dfp_ddedpd_,
>   dfp_denbcd_, dfp_dxex_, dfp_diex_,
>   *dfp_sgnfcnc_, dfp_dscli_, dfp_dscri_): Likewise.
>   * config/rs6000/mma.md (*movpoi, mma_, mma_,
>   mma_, mma_, mma_, mma_,
>   mma_, mma_, mma_, mma_):
>   Likewise.
>   * config/rs6000/rs6000.c (rs6000_final_prescan_insn): Only add 'p' for
>   PREFIXED_YES.

The code change reads as roughly 
- next_insns_prefixed_p != PREFIXED_NO

+ next_insn_prefixed_p == PREFIXED_YES"

So just an inversion of the logic? I don't obviously see the 'p' impact
there.


>   * config/rs6000/rs6000.md (define_attr "size"): Add 256.
>   (define_attr "prefixed"): Add 'always'.
>   (define_mode_attr bits): Add DD/TD modes.
>   (cfuged, cntlzdm, cnttzdm, pdepd, pextd, bswaphi2_reg, bswapsi2_reg,
>   bswapdi2_brd, setbc_signed_,
>   *setbcr_signed_, *setnbc_signed_,
>   *setnbcr_signed_): Update instruction attributes for
>   Power10.

ok.  (assuming the assorted 'integer' -> 'crypto' changes are correct,
of course).  

>   * config/rs6000/sync.md (load_quadpti, store_quadpti, load_lockedpti,
>   store_conditionalpti): Update instruction attributes for Power10.
>   * config/rs6000/vsx.md (*xvtlsbb_internal, xxgenpcvm__internal,
>   vextractl_internal, vextractr_internal,
>   vinsertvl_internal_, vinsertvr_internal_,
>   vinsertgl_internal_, vinsertgr_internal_,
>   vreplace_elt__inst): Likewise.


lgtm, 
thanks
-Will

> 



Re: [PATCH, rs6000] Optimize pcrel access of globals (updated, ping)

2020-11-05 Thread will schmidt via Gcc-patches
On Wed, 2020-11-04 at 12:10 -0600, acsawdey--- via Gcc-patches wrote:
> From: Aaron Sawdey 
> 
> Ping, as it has been a while.
> This also includes a slight fix to make sure that all references can get
> optimized.
> 


I've read over what I could.  a few nits below, nothing significant
jumped out at me, also not my area of expertise.  :-)

comments inline below.
thanks
-WIll


> This patch implements a RTL pass that looks for pc-relative loads of the
> address of an external variable using the PCREL_GOT relocation and a
> single load or store that uses that external address.
> 
> Produced by a cast of thousands:
>  * Michael Meissner
>  * Peter Bergner
>  * Bill Schmidt
>  * Alan Modra
>  * Segher Boessenkool
>  * Aaron Sawdey
> 
> Passes bootstrap/regtest on ppc64le power10. OK for trunk?

Any impact to non-power10 targets?  (power9,power8, or BE, ...)


> 
> gcc/ChangeLog:
> 
>   * config.gcc: Add pcrel-opt.o.

pcrel-opt.c and pcrel-opt.o entries.


>   * config/rs6000/pcrel-opt.c: New file.
>   * config/rs6000/pcrel-opt.md: New file.
>   * config/rs6000/predicates.md: Add d_form_memory predicate.
>   * config/rs6000/rs6000-cpus.def: Add OPTION_MASK_PCREL_OPT.
>   * config/rs6000/rs6000-passes.def: Add pass_pcrel_opt.
>   * config/rs6000/rs6000-protos.h: Add reg_to_non_prefixed(),
>   offsettable_non_prefixed_memory(), output_pcrel_opt_reloc(),
>   and make_pass_pcrel_opt().
>   * config/rs6000/rs6000.c (reg_to_non_prefixed): Make global.
>   (rs6000_option_override_internal): Add pcrel-opt.
>   (rs6000_delegitimize_address): Support pcrel-opt.
>   (rs6000_opt_masks): Add pcrel-opt.
>   (offsettable_non_prefixed_memory): New function.
>   (reg_to_non_prefixed): Make global.
>   (rs6000_asm_output_opcode): Reset next_insn_prefixed_p.
>   (output_pcrel_opt_reloc): New function.
>   * config/rs6000/rs6000.md (loads_extern_addr): New attr.
>   (pcrel_extern_addr): Set loads_extern_addr.
>   Add include for pcrel-opt.md.
>   * config/rs6000/rs6000.opt: Add -mpcrel-opt.
>   * config/rs6000/t-rs6000: Add rules for pcrel-opt.c and
> pcrel-opt.md.

indent.

> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/powerpc/pcrel-opt-inc-di.c: New test.
>   * gcc.target/powerpc/pcrel-opt-ld-df.c: New test.
>   * gcc.target/powerpc/pcrel-opt-ld-di.c: New test.
>   * gcc.target/powerpc/pcrel-opt-ld-hi.c: New test.
>   * gcc.target/powerpc/pcrel-opt-ld-qi.c: New test.
>   * gcc.target/powerpc/pcrel-opt-ld-sf.c: New test.
>   * gcc.target/powerpc/pcrel-opt-ld-si.c: New test.
>   * gcc.target/powerpc/pcrel-opt-ld-vector.c: New test.
>   * gcc.target/powerpc/pcrel-opt-st-df.c: New test.
>   * gcc.target/powerpc/pcrel-opt-st-di.c: New test.
>   * gcc.target/powerpc/pcrel-opt-st-hi.c: New test.
>   * gcc.target/powerpc/pcrel-opt-st-qi.c: New test.
>   * gcc.target/powerpc/pcrel-opt-st-sf.c: New test.
>   * gcc.target/powerpc/pcrel-opt-st-si.c: New test.
>   * gcc.target/powerpc/pcrel-opt-st-vector.c: New test.
> ---
>  gcc/config.gcc|   6 +-
>  gcc/config/rs6000/pcrel-opt.c | 888 ++
>  gcc/config/rs6000/pcrel-opt.md| 386 
>  gcc/config/rs6000/predicates.md   |  23 +
>  gcc/config/rs6000/rs6000-cpus.def |   2 +
>  gcc/config/rs6000/rs6000-passes.def   |   8 +
>  gcc/config/rs6000/rs6000-protos.h |   4 +
>  gcc/config/rs6000/rs6000.c| 116 ++-
>  gcc/config/rs6000/rs6000.md   |   8 +-
>  gcc/config/rs6000/rs6000.opt  |   4 +
>  gcc/config/rs6000/t-rs6000|   7 +-
>  .../gcc.target/powerpc/pcrel-opt-inc-di.c |  18 +
>  .../gcc.target/powerpc/pcrel-opt-ld-df.c  |  36 +
>  .../gcc.target/powerpc/pcrel-opt-ld-di.c  |  43 +
>  .../gcc.target/powerpc/pcrel-opt-ld-hi.c  |  42 +
>  .../gcc.target/powerpc/pcrel-opt-ld-qi.c  |  42 +
>  .../gcc.target/powerpc/pcrel-opt-ld-sf.c  |  42 +
>  .../gcc.target/powerpc/pcrel-opt-ld-si.c  |  41 +
>  .../gcc.target/powerpc/pcrel-opt-ld-vector.c  |  36 +
>  .../gcc.target/powerpc/pcrel-opt-st-df.c  |  36 +
>  .../gcc.target/powerpc/pcrel-opt-st-di.c  |  37 +
>  .../gcc.target/powerpc/pcrel-opt-st-hi.c  |  42 +
>  .../gcc.target/powerpc/pcrel-opt-st-qi.c  |  42 +
>  .../gcc.target/powerpc/pcrel-opt-st-sf.c  |  36 +
>  .../gcc.target/powerpc/pcrel-opt-st-si.c  |  41 +
>  .../gcc.target/powerpc/pcrel-opt-st-vector.c  |  36 +
>  26 files changed, 2013 insertions(+), 9 deletions(-)
>  create mode 100644 gcc/config/rs6000/pcrel-opt.c
>  create mode 100644 gcc/config/rs6000/pcrel-opt.md
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-inc-di.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-df.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pcrel-opt-ld-di.c
>  create 

Re: PowerPC: Allow C/C++ to change long double type on GLIBC 2.32.

2020-10-29 Thread will schmidt via Gcc-patches
On Thu, 2020-10-29 at 13:05 -0400, Michael Meissner wrote:
> On Mon, Oct 26, 2020 at 05:48:48PM -0500, will schmidt wrote:
> > On Thu, 2020-10-22 at 18:15 -0400, Michael Meissner via Gcc-patches 
> > wrote:
> > > PowerPC: Allow C/C++ to change long double type on GLIBC 2.32.
> > > 
> > > This is a new patch.  It turns off the warning about switching
> > > the long double
> > > type via compile line if the GLIBC is 2.32 or newer.  It only
> > > does this if the
> > > languages are C or C++, since those language libraries support
> > > switching the
> > > long double type.  Other languages like Fortran don't have any
> > > current support
> > > to provide both sets of interfaces to the library.
> > > 
> > > 2020-10-21  Michael Meissner  
> > > 
> > >   * config/rs6000/rs6000.c (rs6000_option_override_internal):
> > > Allow
> > >   long double type to be changed for C/C++ if glibc 2.32 or
> > > newer.
> > > ---
> > >  gcc/config/rs6000/rs6000.c | 10 --
> > >  1 file changed, 8 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/gcc/config/rs6000/rs6000.c
> > > b/gcc/config/rs6000/rs6000.c
> > > index 50039c0a53d..940c15f3265 100644
> > > --- a/gcc/config/rs6000/rs6000.c
> > > +++ b/gcc/config/rs6000/rs6000.c
> > > @@ -4158,10 +4158,16 @@ rs6000_option_override_internal (bool
> > > global_init_p)
> > > 
> > >if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT &&
> > > TARGET_LONG_DOUBLE_128)
> > >   {
> > > +   /* Determine if the user can change the default long double
> > > type at
> > > +  compilation time.  Only C and C++ support this, and you
> > > need GLIBC
> > > +  2.32 or newer.  Only issue one warning.  */
> > > static bool warned_change_long_double;
> > > -   if (!warned_change_long_double)
> > > +
> > > +   if (!warned_change_long_double
> > > +   && (!OPTION_GLIBC
> > > +   || (!lang_GNU_C () && !lang_GNU_CXX ())
> > > +   || ((TARGET_GLIBC_MAJOR * 1000) + TARGET_GLIBC_MINOR)
> > > < 2032))
> > >   {
> > > -   warned_change_long_double = true;
> > 
> > Does this need to be added back elsewhere? 
> 
> At the present time, we are not contemplating adding the full support
> to enable
> configuring GCC to use IEEE 128-bit long double in GCC 10 or
> earlier.  This may
> change depending on customer demands.
> 

My question was/is specific with the removal of the assignment

- warned_change_long_double = true;

I didn't see where that line or an equvalent was added back.  It's
either set elsewhere (ok), or no longer used.

Thanks,
-Will





Re: [RS6000] Don't be too clever with dg-do run and dg-do compile

2020-10-28 Thread will schmidt via Gcc-patches
On Wed, 2020-10-28 at 21:20 +1030, Alan Modra via Gcc-patches wrote:
> Otherwise some versions of dejagnu go ahead and run the vsx tests
> below when they should not.  To best cope with older dejagnu, put
> "run" before "compile", the idea being that if the second dg-do always
> wins then that won't cause fails.
> 
> The altivec tests also need -save-temps for the scan-assembler test to
> occur when vms_hw.

vmx_hw ? :)

> 
> Regression tested powerpc64le-linux and powerpc64-linux.  OK?
> 
>   * gcc.target/powerpc/vsx-load-element-extend-char.c: Put "dg-do run"
>   before "dg-do compile", and make them mutually exclusive.
>   * gcc.target/powerpc/vsx-load-element-extend-int.c: Likewise.
>   * gcc.target/powerpc/vsx-load-element-extend-longlong.c: Likewise.
>   * gcc.target/powerpc/vsx-load-element-extend-short.c: Likewise.
>   * gcc.target/powerpc/vsx-store-element-truncate-char.c: Likewise.
>   * gcc.target/powerpc/vsx-store-element-truncate-int.c: Likewise.
>   * gcc.target/powerpc/vsx-store-element-truncate-longlong.c: Likewise.
>   * gcc.target/powerpc/vsx-store-element-truncate-short.c: Likewise.
>   * gcc.target/powerpc/altivec-consts.c: Likewise, add -save-temps.
>   * gcc.target/powerpc/le-altivec-consts.c: Likewise.
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/altivec-consts.c 
> b/gcc/testsuite/gcc.target/powerpc/altivec-consts.c
> index d59f9b4cf1c..c68c68125d1 100644
> --- a/gcc/testsuite/gcc.target/powerpc/altivec-consts.c
> +++ b/gcc/testsuite/gcc.target/powerpc/altivec-consts.c
> @@ -1,7 +1,7 @@
>  /* { dg-do run { target vmx_hw } } */
> -/* { dg-do compile } */
> +/* { dg-do compile { target { ! vmx_hw } } } */
>  /* { dg-require-effective-target powerpc_altivec_ok } */
> -/* { dg-options "-maltivec -mabi=altivec -O2" } */
> +/* { dg-options "-maltivec -mabi=altivec -O2 -save-temps" } */
> 
>  /* Check that "easy" AltiVec constants are correctly synthesized.  */
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/le-altivec-consts.c 
> b/gcc/testsuite/gcc.target/powerpc/le-altivec-consts.c
> index f48ef44e676..a1db5e92f87 100644
> --- a/gcc/testsuite/gcc.target/powerpc/le-altivec-consts.c
> +++ b/gcc/testsuite/gcc.target/powerpc/le-altivec-consts.c
> @@ -1,7 +1,7 @@
>  /* { dg-do run { target vmx_hw } } */
> -/* { dg-do compile } */
> +/* { dg-do compile { target { ! vmx_hw } } } */
>  /* { dg-require-effective-target powerpc_altivec_ok } */
> -/* { dg-options "-maltivec -mabi=altivec -O2" } */
> +/* { dg-options "-maltivec -mabi=altivec -O2 -save-temps" } */
> 
>  /* Check that "easy" AltiVec constants are correctly synthesized.  */
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-char.c 
> b/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-char.c
> index f386346e059..c23a9128680 100644
> --- a/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-char.c
> +++ b/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-char.c
> @@ -2,8 +2,9 @@
> Test of vec_xl_sext and vec_xl_zext (load into rightmost
> vector element and zero/sign extend). */
> 
> -/* { dg-do compile {target power10_ok} } */
> -/* { dg-do run {target power10_hw} } */
> +/* { dg-do run { target power10_hw } } */
> +/* { dg-do compile { target { ! power10_hw } } } */
> +/* { dg-require-effective-target power10_ok } */
>  /* { dg-require-effective-target int128 } */
>  /* { dg-options "-mdejagnu-cpu=power10 -O3 -save-temps" } */


Ok.   These are from some tests I recently committed,   I obviously
missed this combo (testing with older dejagnu).. I think I've updated
my dejagnu versions all over the place for other reasons.  Do you
consider this a non-typical corner case with older dejagnu, or should I
try to explicitly check for this in the future?

Similar/same changes below.  These changes all seem reasonable.

lgtm, 
thanks
-Will



> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-int.c 
> b/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-int.c
> index ea737466a58..c40e1a3a0f7 100644
> --- a/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-int.c
> +++ b/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-int.c
> @@ -2,8 +2,9 @@
> Test of vec_xl_sext and vec_xl_zext (load into rightmost
> vector element and zero/sign extend). */
> 
> -/* { dg-do compile {target power10_ok} } */
> -/* { dg-do run {target power10_hw} } */
> +/* { dg-do run { target power10_hw } } */
> +/* { dg-do compile { target { ! power10_hw } } } */
> +/* { dg-require-effective-target power10_ok } */
>  /* { dg-require-effective-target int128 } */
> 
>  /* Deliberately set optization to zero for this test to confirm
> diff --git 
> a/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-longlong.c 
> b/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-longlong.c
> index cd155c2013d..405b4245f8e 100644
> --- a/gcc/testsuite/gcc.target/powerpc/vsx-load-element-extend-longlong.c
> +++ 

Re: PowerPC: Map q built-ins to *l instead of *f128 if IEEE 128-bit long double.

2020-10-27 Thread will schmidt via Gcc-patches
On Thu, 2020-10-22 at 18:08 -0400, Michael Meissner via Gcc-patches wrote:
> PowerPC: Map q built-ins to *l instead of *f128 if IEEE 128-bit long double.
> 
> I have split all of these patches into separate patches to hopefully get them
> into the tree.
> 
> If we map nanq to nanf128 when long double is IEEE, it seems to lose the
> special signaling vs. non-signaling NAN support.  This patch maps the 
> functions
> to the long double version if long double is IEEE 128-bit.  If this patch
> is not applied, a few tests in the testsuite will start failing.
> 
> I have tested this patch with bootstrap builds on a little endian power9 
> system
> running Linux.  With the other patches, I have built two full bootstrap builds
> using this patch and the patches after this patch.  One build used the current
> default for long double (IBM extended double) and the other build switched the
> default to IEEE 128-bit.  I used the Advance Toolchain AT 14.0 compiler as the
> library used by this compiler.  There are no regressions between the tests.
> There are 3 fortran benchmarks (ieee/large_2.f90, default_format_2.f90, and
> default_format_denormal_2.f90) that now pass.
> 
> Can I install this into the trunk?
> 
> We have gotten some requests to back port these changes to GCC 10.x.  At the
> moment, I am not planning to do the back port, but I may need to in the 
> future.
> 
> gcc/
> 2020-10-22  Michael Meissner  
> 
>   * config/rs6000/rs6000-c.c (rs6000_cpu_cpp_builtins): If long
>   double is IEEE-128 map the nanq built-in functions to the long
>   double function, not the f128 function.

A bit long, but I think its OK.

> ---
>  gcc/config/rs6000/rs6000-c.c | 31 ---
>  1 file changed, 24 insertions(+), 7 deletions(-)
> 
> diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
> index cc1e997524e..bee78fcbac4 100644
> --- a/gcc/config/rs6000/rs6000-c.c
> +++ b/gcc/config/rs6000/rs6000-c.c
> @@ -684,15 +684,32 @@ rs6000_cpu_cpp_builtins (cpp_reader *pfile)
>builtin_define ("__builtin_vsx_xvnmsubmsp=__builtin_vsx_xvnmsubsp");
>  }
> 
> -  /* Map the old _Float128 'q' builtins into the new 'f128' builtins.  */
> +  /* Map the old _Float128 'q' builtins into the new 'f128' builtins if long
> + double is IBM or 64-bit.
> +
> + However, if long double is IEEE 128-bit, map both sets of built-in
> + functions to the normal long double version.  This shows up in nansf128
> + vs. nanf128.  */
>if (TARGET_FLOAT128_TYPE)
>  {
> -  builtin_define ("__builtin_fabsq=__builtin_fabsf128");
> -  builtin_define ("__builtin_copysignq=__builtin_copysignf128");
> -  builtin_define ("__builtin_nanq=__builtin_nanf128");
> -  builtin_define ("__builtin_nansq=__builtin_nansf128");
> -  builtin_define ("__builtin_infq=__builtin_inff128");
> -  builtin_define ("__builtin_huge_valq=__builtin_huge_valf128");
> +  if (FLOAT128_IEEE_P (TFmode))
> + {
> +   builtin_define ("__builtin_fabsq=__builtin_fabsl");
> +   builtin_define ("__builtin_copysignq=__builtin_copysignl");
> +   builtin_define ("__builtin_nanq=__builtin_nanl");
> +   builtin_define ("__builtin_nansq=__builtin_nansl");
> +   builtin_define ("__builtin_infq=__builtin_infl");
> +   builtin_define ("__builtin_huge_valq=__builtin_huge_vall");
> + }
> +  else
> + {
> +   builtin_define ("__builtin_fabsq=__builtin_fabsf128");
> +   builtin_define ("__builtin_copysignq=__builtin_copysignf128");
> +   builtin_define ("__builtin_nanq=__builtin_nanf128");
> +   builtin_define ("__builtin_nansq=__builtin_nansf128");
> +   builtin_define ("__builtin_infq=__builtin_inff128");
> +   builtin_define ("__builtin_huge_valq=__builtin_huge_valf128");
> + }
>  }

ok
lgtm, 
thanks
-Will

> 
>/* Tell users they can use __builtin_bswap{16,64}.  */
> -- 
> 2.22.0
> 
> 



Re: PowerPC: Update long double IEEE 128-bit tests.

2020-10-27 Thread will schmidt via Gcc-patches
On Thu, 2020-10-22 at 18:07 -0400, Michael Meissner via Gcc-patches wrote:
> PowerPC: Update long double IEEE 128-bit tests.
> 
> I have split all of these patches into separate patches to hopefully get them
> into the tree.
> 
> This patch fixes 3 tests in the testsuite that fail if long double is set
> to IEEE 128-bit.
> 
> I have tested this patch with bootstrap builds on a little endian power9 
> system
> running Linux.  With the other patches, I have built two full bootstrap builds
> using this patch and the patches after this patch.  One build used the current
> default for long double (IBM extended double) and the other build switched the
> default to IEEE 128-bit.  I used the Advance Toolchain AT 14.0 compiler as the
> library used by this compiler.  There are no regressions between the tests.
> There are 3 fortran benchmarks (ieee/large_2.f90, default_format_2.f90, and
> default_format_denormal_2.f90) that now pass.
> 
> Can I install this into the trunk?
> 
> We have gotten some requests to back port these changes to GCC 10.x.  At the
> moment, I am not planning to do the back port, but I may need to in the 
> future.
> 
> gcc/testsuite/
> 2020-10-22  Michael Meissner  
> 
>   * c-c++-common/dfp/convert-bfp-11.c: If long double is IEEE
>   128-bit, skip the test.
>   * gcc.dg/nextafter-2.c: On PowerPC, if long double is IEEE
>   128-bit, include math.h to get the built-in mapped correctly.
>   * gcc.target/powerpc/pr70117.c: Add support for long double being
>   IEEE 128-bit.
> ---
>  gcc/testsuite/c-c++-common/dfp/convert-bfp-11.c |  7 +++
>  gcc/testsuite/gcc.dg/nextafter-2.c  | 10 ++
>  gcc/testsuite/gcc.target/powerpc/pr70117.c  |  6 --
>  3 files changed, 21 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/testsuite/c-c++-common/dfp/convert-bfp-11.c 
> b/gcc/testsuite/c-c++-common/dfp/convert-bfp-11.c
> index 95c433d2c24..6ee0c1c6ae9 100644
> --- a/gcc/testsuite/c-c++-common/dfp/convert-bfp-11.c
> +++ b/gcc/testsuite/c-c++-common/dfp/convert-bfp-11.c
> @@ -5,6 +5,7 @@
> Don't force 128-bit long doubles because runtime support depends
> on glibc.  */
> 
> +#include 
>  #include "convert.h"
> 
>  volatile _Decimal32 sd;
> @@ -39,6 +40,12 @@ main ()
>if (sizeof (long double) != 16)
>  return 0;
> 
> +  /* This test is written to test IBM extended double, which is a pair of
> + doubles.  If long double can hold a larger value than a double can, such
> + as when long double is IEEE 128-bit, just exit immediately.  */
> +  if (LDBL_MAX_10_EXP > DBL_MAX_10_EXP)
> +return 0;
> +
>convert_101 ();
>convert_102 ();
> 
> diff --git a/gcc/testsuite/gcc.dg/nextafter-2.c 
> b/gcc/testsuite/gcc.dg/nextafter-2.c
> index e51ae94be0c..64e9e3c485f 100644
> --- a/gcc/testsuite/gcc.dg/nextafter-2.c
> +++ b/gcc/testsuite/gcc.dg/nextafter-2.c
> @@ -13,4 +13,14 @@
>  #  define NO_LONG_DOUBLE 1
>  # endif
>  #endif
> +
> +#if defined(_ARCH_PPC) && defined(__LONG_DOUBLE_IEEE128__)
> +/* On PowerPC systems, long double uses either the IBM long double format, or
> +   IEEE 128-bit format.  The compiler switches the long double built-in
> +   function names and glibc switches the names when math.h is included.
> +   Because this test is run with -fno-builtin, include math.h so that the
> +   appropriate nextafter functions are called.  */


Great comment. :-)


> +#include 
> +#endif
> +
>  #include "nextafter-1.c"
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr70117.c 
> b/gcc/testsuite/gcc.target/powerpc/pr70117.c
> index 3bbd2c595e0..928efe39c7b 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr70117.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr70117.c
> @@ -9,9 +9,11 @@
> 128-bit floating point, because the type is not enabled on those
> systems.  */
>  #define LDOUBLE __ibm128
> +#define IBM128_MAX ((__ibm128) 1.79769313486231580793728971405301199e+308L)
> 
>  #elif defined(__LONG_DOUBLE_IBM128__)
>  #define LDOUBLE long double
> +#define IBM128_MAX LDBL_MAX
> 
>  #else
>  #error "long double must be either IBM 128-bit or IEEE 128-bit"
> @@ -75,10 +77,10 @@ main (void)
>if (__builtin_isnormal (ld))
>  __builtin_abort ();
> 
> -  ld = LDBL_MAX;
> +  ld = IBM128_MAX;
>if (!__builtin_isnormal (ld))
>  __builtin_abort ();
> -  ld = -LDBL_MAX;
> +  ld = -IBM128_MAX;
>if (!__builtin_isnormal (ld))
>  __builtin_abort ();
> 

ok

lgtm, 
thanks
-Will

> -- 
> 2.22.0
> 
> 



Re: PowerPC: Add __float128 conversions to/from Decimal

2020-10-27 Thread will schmidt via Gcc-patches
On Thu, 2020-10-22 at 18:06 -0400, Michael Meissner via Gcc-patches wrote:
> PowerPC: Add __float128 conversions to/from Decimal.
> 
> I have split all of these patches into separate patches to hopefully get them
> into the tree.
> 
> This patch adds the various decimal to/from IEEE 128-bit conversions.  I
> had to make some changes to the infrastructure, since that infrastructure
> assumed that there is a sprintf/scanf format modifier to convert floating
> point.  Instead, I used to str* conversion functions.
> 
> I have tested this patch with bootstrap builds on a little endian power9 
> system
> running Linux.  With the other patches, I have built two full bootstrap builds
> using this patch and the patches after this patch.  One build used the current
> default for long double (IBM extended double) and the other build switched the
> default to IEEE 128-bit.  I used the Advance Toolchain AT 14.0 compiler as the
> library used by this compiler.  There are no regressions between the tests.
> There are 3 fortran benchmarks (ieee/large_2.f90, default_format_2.f90, and
> default_format_denormal_2.f90) that now pass.
> 
> Can I install this into the trunk?
> 
> We have gotten some requests to back port these changes to GCC 10.x.  At the
> moment, I am not planning to do the back port, but I may need to in the 
> future.
> 
> libgcc/
> 2020-10-22  Michael Meissner  
> 
>   * config/rs6000/_dd_to_kf.c: New file.
>   * config/rs6000/_kf_to_dd.c: New file.
>   * config/rs6000/_kf_to_sd.c: New file.
>   * config/rs6000/_kf_to_td.c: New file.
>   * config/rs6000/_sd_to_kf.c: New file.
>   * config/rs6000/_td_to_kf.c: New file.
>   * config/rs6000/t-float128: Build __float128 conversions to and
>   from Decimal support functions.
ok

>   * dfp-bit.c: Add support for building the PowerPC _Float128
>   to/from Decimal conversion functions.
>   * dfp-bit.h: Likewise.

These are non-arch, so attention to anyone who also needs to bless this
generically.  :-)


> ---
>  libgcc/config/rs6000/_dd_to_kf.c | 30 ++
>  libgcc/config/rs6000/_kf_to_dd.c | 30 ++
>  libgcc/config/rs6000/_kf_to_sd.c | 30 ++
>  libgcc/config/rs6000/_kf_to_td.c | 30 ++
>  libgcc/config/rs6000/_sd_to_kf.c | 30 ++
>  libgcc/config/rs6000/_td_to_kf.c | 30 ++
>  libgcc/config/rs6000/t-float128  | 20 -
>  libgcc/dfp-bit.c | 10 +++--
>  libgcc/dfp-bit.h | 37 +---
>  9 files changed, 241 insertions(+), 6 deletions(-)
>  create mode 100644 libgcc/config/rs6000/_dd_to_kf.c
>  create mode 100644 libgcc/config/rs6000/_kf_to_dd.c
>  create mode 100644 libgcc/config/rs6000/_kf_to_sd.c
>  create mode 100644 libgcc/config/rs6000/_kf_to_td.c
>  create mode 100644 libgcc/config/rs6000/_sd_to_kf.c
>  create mode 100644 libgcc/config/rs6000/_td_to_kf.c
> 
> diff --git a/libgcc/config/rs6000/_dd_to_kf.c 
> b/libgcc/config/rs6000/_dd_to_kf.c
> new file mode 100644
> index 000..081415fd393
> --- /dev/null
> +++ b/libgcc/config/rs6000/_dd_to_kf.c
> @@ -0,0 +1,30 @@
> +/* Copyright (C) 1989-2020 Free Software Foundation, Inc.

Should that (new file) have the 1989 start date (since it is presumably
based on an existing file), or start with 2020?
Same with the others here.

> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify it under
> +the terms of the GNU General Public License as published by the Free
> +Software Foundation; either version 3, or (at your option) any later
> +version.
> +
> +GCC is distributed in the hope that it will be useful, but WITHOUT ANY
> +WARRANTY; without even the implied warranty of MERCHANTABILITY or
> +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
> +for more details.
> +
> +Under Section 7 of GPL version 3, you are granted additional
> +permissions described in the GCC Runtime Library Exception, version
> +3.1, as published by the Free Software Foundation.
> +
> +You should have received a copy of the GNU General Public License and
> +a copy of the GCC Runtime Library Exception along with this program;
> +see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
> +.  */
> +
> +/* Decimal64 -> _Float128 conversion.  */
> +#define FINE_GRAINED_LIBRARIES   1
> +#define L_dd_to_kf   1
> +#define WIDTH64
> +
> +/* Use dfp-bit.c to do the real work.  */
> +#include "dfp-bit.c"
> diff --git a/libgcc/config/rs6000/_kf_to_dd.c 
> b/libgcc/config/rs6000/_kf_to_dd.c
> new file mode 100644
> index 000..09a62cbe629
> --- /dev/null
> +++ b/libgcc/config/rs6000/_kf_to_dd.c
> @@ -0,0 +1,30 @@
> +/* Copyright (C) 1989-2020 Free Software Foundation, Inc.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can 

Re: PowerPC: Update IEEE 128-bit built-ins for long double is IEEE 128-bit.

2020-10-27 Thread will schmidt via Gcc-patches
On Thu, 2020-10-22 at 18:09 -0400, Michael Meissner via Gcc-patches wrote:
> PowerPC: Update IEEE 128-bit built-ins for long double is IEEE 128-bit.

"for when .."

> 
> I have split all of these patches into separate patches to hopefully get them
> into the tree.
> 
> This patch adds long double variants of the power10 __float128 built-in
> functions.  This is needed when long double uses IEEE 128-bit because
> __float128 uses TFmode in this case instead of KFmode.  If this patch is not
> applied, these built-in functions can't be used when long double is IEEE
> 128-bit.
> 
> I have tested this patch with bootstrap builds on a little endian power9 
> system
> running Linux.  With the other patches, I have built two full bootstrap builds
> using this patch and the patches after this patch.  One build used the current
> default for long double (IBM extended double) and the other build switched the
> default to IEEE 128-bit.  I used the Advance Toolchain AT 14.0 compiler as the
> library used by this compiler.  There are no regressions between the tests.
> There are 3 fortran benchmarks (ieee/large_2.f90, default_format_2.f90, and
> default_format_denormal_2.f90) that now pass.
> 
> Can I install this into the trunk?
> 
> We have gotten some requests to back port these changes to GCC 10.x.  At the
> moment, I am not planning to do the back port, but I may need to in the 
> future.
> 



> gcc/
> 2020-10-22  Michael Meissner  
> 
>   * config/rs6000/rs6000-call.c (altivec_overloaded_builtins): Add
>   built-in functions for long double built-ins that use IEEE
>   128-bit.
>   (rs6000_expand_builtin): Change the KF IEEE 128-bit comparison
>   insns to TF if long double is IEEE 128-bit.
>   * config/rs6000/rs6000-builtin.def (scalar_extract_exptf): Add
>   support for long double being IEEE 128-bit built-in functions.
>   (scalar_extract_sigtf): Likewise.
>   (scalar_test_neg_tf): Likewise.
>   (scalar_insert_exp_tf): Likewise.
>   (scalar_insert_exp_tfp): Likewise.
>   (scalar_cmp_exp_tf_gt): Likewise.
>   (scalar_cmp_exp_tf_lt): Likewise.
>   (scalar_cmp_exp_tf_eq): Likewise.
>   (scalar_cmp_exp_tf_unordered): Likewise.
>   (scalar_test_data_class_tf): Likewise.
> ---
>  gcc/config/rs6000/rs6000-builtin.def | 11 
>  gcc/config/rs6000/rs6000-call.c  | 40 
>  2 files changed, 51 insertions(+)
> 
> diff --git a/gcc/config/rs6000/rs6000-builtin.def 
> b/gcc/config/rs6000/rs6000-builtin.def
> index 3eb55f0ae43..6f5685bf697 100644
> --- a/gcc/config/rs6000/rs6000-builtin.def
> +++ b/gcc/config/rs6000/rs6000-builtin.def
> @@ -2401,8 +2401,11 @@ BU_P9V_64BIT_VSX_1 (VSESDP,"scalar_extract_sig",   
> CONST,  xsxsigdp)
> 
>  BU_FLOAT128_HW_VSX_1 (VSEEQP,"scalar_extract_expq",  CONST,  
> xsxexpqp_kf)
>  BU_FLOAT128_HW_VSX_1 (VSESQP,"scalar_extract_sigq",  CONST,  
> xsxsigqp_kf)
> +BU_FLOAT128_HW_VSX_1 (VSEETF,"scalar_extract_exptf", CONST,  
> xsxexpqp_tf)
> +BU_FLOAT128_HW_VSX_1 (VSESTF,"scalar_extract_sigtf", CONST,  
> xsxsigqp_tf)
> 
>  BU_FLOAT128_HW_VSX_1 (VSTDCNQP, "scalar_test_neg_qp",CONST,  
> xststdcnegqp_kf)
> +BU_FLOAT128_HW_VSX_1 (VSTDCNTF, "scalar_test_neg_tf",CONST,  
> xststdcnegqp_tf)
>  BU_P9V_VSX_1 (VSTDCNDP,  "scalar_test_neg_dp",   CONST,  xststdcnegdp)
>  BU_P9V_VSX_1 (VSTDCNSP,  "scalar_test_neg_sp",   CONST,  xststdcnegsp)
> 
> @@ -2420,6 +2423,8 @@ BU_P9V_64BIT_VSX_2 (VSIEDPF,"scalar_insert_exp_dp", 
> CONST,  xsiexpdpf)
> 
>  BU_FLOAT128_HW_VSX_2 (VSIEQP,"scalar_insert_exp_q",  CONST,  
> xsiexpqp_kf)
>  BU_FLOAT128_HW_VSX_2 (VSIEQPF,   "scalar_insert_exp_qp", CONST,  
> xsiexpqpf_kf)
> +BU_FLOAT128_HW_VSX_2 (VSIETF,"scalar_insert_exp_tf", CONST,  
> xsiexpqp_tf)
> +BU_FLOAT128_HW_VSX_2 (VSIETFF,   "scalar_insert_exp_tfp", CONST, 
> xsiexpqpf_tf)

Ok if its ok, but the pattern catches my eye.  Should that be VSIETFP ?
(or named "scalar_insert_exp_tff")?


> 
>  BU_P9V_VSX_2 (VSCEDPGT,  "scalar_cmp_exp_dp_gt", CONST,  xscmpexpdp_gt)
>  BU_P9V_VSX_2 (VSCEDPLT,  "scalar_cmp_exp_dp_lt", CONST,  xscmpexpdp_lt)
> @@ -2431,7 +2436,13 @@ BU_P9V_VSX_2 (VSCEQPLT,"scalar_cmp_exp_qp_lt", 
> CONST,  xscmpexpqp_lt_kf)
>  BU_P9V_VSX_2 (VSCEQPEQ,  "scalar_cmp_exp_qp_eq", CONST,  
> xscmpexpqp_eq_kf)
>  BU_P9V_VSX_2 (VSCEQPUO,  "scalar_cmp_exp_qp_unordered",  CONST,  
> xscmpexpqp_unordered_kf)
> 
> +BU_P9V_VSX_2 (VSCETFGT,  "scalar_cmp_exp_tf_gt", CONST,  
> xscmpexpqp_gt_tf)
> +BU_P9V_VSX_2 (VSCETFLT,  "scalar_cmp_exp_tf_lt", CONST,  
> xscmpexpqp_lt_tf)
> +BU_P9V_VSX_2 (VSCETFEQ,  "scalar_cmp_exp_tf_eq", CONST,  
> xscmpexpqp_eq_tf)
> +BU_P9V_VSX_2 (VSCETFUO,  "scalar_cmp_exp_tf_unordered", CONST, 
> xscmpexpqp_unordered_tf)
> +
>  BU_FLOAT128_HW_VSX_2 (VSTDCQP, "scalar_test_data_class_qp",  CONST,  
> xststdcqp_kf)
> +BU_FLOAT128_HW_VSX_2 (VSTDCTF, "scalar_test_data_class_tf", 

Re: PowerPC: Use __builtin_pack_ieee128 if long double is IEEE 128-bit.

2020-10-27 Thread will schmidt via Gcc-patches
On Thu, 2020-10-22 at 18:10 -0400, Michael Meissner via Gcc-patches wrote:
> PowerPC: Use __builtin_pack_ieee128 if long double is IEEE 128-bit.
> 
> I have split all of these patches into separate patches to hopefully get them
> into the tree.
> 
> This patch changes the __ibm128 emulator to use __builtin_pack_ieee128
> instead of __builtin_pack_longdouble if long double is IEEE 128-bit, and
> we need to use the __ibm128 type.  The code will run without this patch,
> but this patch slightly optimizes it better.
> 
> I have tested this patch with bootstrap builds on a little endian power9 
> system
> running Linux.  With the other patches, I have built two full bootstrap builds
> using this patch and the patches after this patch.  One build used the current
> default for long double (IBM extended double) and the other build switched the
> default to IEEE 128-bit.  I used the Advance Toolchain AT 14.0 compiler as the
> library used by this compiler.  There are no regressions between the tests.
> There are 3 fortran benchmarks (ieee/large_2.f90, default_format_2.f90, and
> default_format_denormal_2.f90) that now pass.

good. :-)A quick search of gcc bugzilla shows there is an existing
PR 67531 that includes ieee rounding support for powerpc long double. 
Does this (partially?) address that? 
  
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67531


> 
> Can I install this into the trunk?
> 
> We have gotten some requests to back port these changes to GCC 10.x.  At the
> moment, I am not planning to do the back port, but I may need to in the 
> future.
> 
> libgcc/
> 2020-10-22  Michael Meissner  
> 
>   * config/rs6000/ibm-ldouble.c (pack_ldouble): Use
>   __builtin_pack_ieee128 if long double is IEEE 128-bit.
> ---
>  libgcc/config/rs6000/ibm-ldouble.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/libgcc/config/rs6000/ibm-ldouble.c 
> b/libgcc/config/rs6000/ibm-ldouble.c
> index dd2a02373f2..767fdd72683 100644
> --- a/libgcc/config/rs6000/ibm-ldouble.c
> +++ b/libgcc/config/rs6000/ibm-ldouble.c
> @@ -102,9 +102,17 @@ __asm__ (".symver __gcc_qadd,_xlqadd@GCC_3.4\n\t"
>  static inline IBM128_TYPE
>  pack_ldouble (double dh, double dl)
>  {
> +  /* If we are building on a non-VSX system, the __ibm128 type is not 
> defined.
> + This means we can't always use __builtin_pack_ibm128.  Instead, we use
> + __builtin_pack_longdouble if long double uses the IBM extended double
> + 128-bit format, and use the explicit __builtin_pack_ibm128 if long 
> double
> + is IEEE 128-bit.  */
>  #if defined (__LONG_DOUBLE_128__) && defined (__LONG_DOUBLE_IBM128__)
> \
>  && !(defined (_SOFT_FLOAT) || defined (__NO_FPRS__))
>return __builtin_pack_longdouble (dh, dl);
> +#elif defined (__LONG_DOUBLE_128__) && defined (__LONG_DOUBLE_IEEE128__) \
> +&& !(defined (_SOFT_FLOAT) || defined (__NO_FPRS__))
> +  return __builtin_pack_ibm128 (dh, dl);

ok

lgtm,
thanks
-Will


>  #else
>union
>{
> -- 
> 2.22.0
> 
> 



Re: PowerPC: Update __float128 and __ibm128 error messages.

2020-10-27 Thread will schmidt via Gcc-patches
On Thu, 2020-10-22 at 18:11 -0400, Michael Meissner via Gcc-patches wrote:
> PowerPC: Update __float128 and __ibm128 error messages.
> 
> I have split all of these patches into separate patches to hopefully get them
> into the tree.
> 
> This patch attempts to make the error messages for intermixing IEEE 128-bit
> floating point with IBM 128-bit extended double types to be clearer if the 
> long
> double type uses the IEEE 128-bit format.
> 
> I have tested this patch with bootstrap builds on a little endian power9 
> system
> running Linux.  With the other patches, I have built two full bootstrap builds
> using this patch and the patches after this patch.  One build used the current
> default for long double (IBM extended double) and the other build switched the
> default to IEEE 128-bit.  I used the Advance Toolchain AT 14.0 compiler as the
> library used by this compiler.  There are no regressions between the tests.
> There are 3 fortran benchmarks (ieee/large_2.f90, default_format_2.f90, and
> default_format_denormal_2.f90) that now pass.
> 
> Can I install this into the trunk?
> 
> We have gotten some requests to back port these changes to GCC 10.x.  At the
> moment, I am not planning to do the back port, but I may need to in the 
> future.
> 
> gcc/
> 2020-10-22  Michael Meissner  
> 
>   * config/rs6000/rs6000.c (rs6000_invalid_binary_op): Update error
>   messages about mixing IBM long double and IEEE 128-bit.
> 
> gcc/testsuite/
> 2020-10-22  Michael Meissner  
> 
>   * gcc.target/powerpc/bfp/scalar-extract-exp-4.c: Update failure
>   messages.
>   * gcc.target/powerpc/bfp/scalar-extract-sig-4.c: Update failure
>   messages.
>   * gcc.target/powerpc/bfp/scalar-test-data-class-11.c: Update
>   failure messages.
>   * gcc.target/powerpc/bfp/scalar-test-neg-5.c: Update failure
>   messages.
>   * gcc.target/powerpc/float128-mix-2.c: New test.
>   * gcc.target/powerpc/float128-mix-3.c: New test.
>   * gcc.target/powerpc/float128-mix.c: Update failure messages.
> ---
>  gcc/config/rs6000/rs6000.c| 20 ---
>  .../powerpc/bfp/scalar-extract-exp-4.c|  4 +---
>  .../powerpc/bfp/scalar-extract-sig-4.c|  2 +-
>  .../powerpc/bfp/scalar-test-data-class-11.c   |  2 +-
>  .../powerpc/bfp/scalar-test-neg-5.c   |  2 +-
>  .../gcc.target/powerpc/float128-mix-2.c   | 17 
>  .../gcc.target/powerpc/float128-mix-3.c   | 17 
>  .../gcc.target/powerpc/float128-mix.c | 19 ++
>  8 files changed, 53 insertions(+), 30 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/float128-mix-2.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/float128-mix-3.c
> 

ok

> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 8c2544ee88d..50039c0a53d 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -14386,22 +14386,10 @@ rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
> 
>if (!TARGET_FLOAT128_CVT)
>  {
> -  if ((mode1 == KFmode && mode2 == IFmode)
> -   || (mode1 == IFmode && mode2 == KFmode))
> - return N_("__float128 and __ibm128 cannot be used in the same "
> -   "expression");
> -
> -  if (TARGET_IEEEQUAD
> -   && ((mode1 == IFmode && mode2 == TFmode)
> -   || (mode1 == TFmode && mode2 == IFmode)))
> - return N_("__ibm128 and long double cannot be used in the same "
> -   "expression");
> -
> -  if (!TARGET_IEEEQUAD
> -   && ((mode1 == KFmode && mode2 == TFmode)
> -   || (mode1 == TFmode && mode2 == KFmode)))
> - return N_("__float128 and long double cannot be used in the same "
> -   "expression");
> +  if ((FLOAT128_IEEE_P (mode1) && FLOAT128_IBM_P (mode2))
> +   || (FLOAT128_IBM_P (mode1) && FLOAT128_IEEE_P (mode2)))
> + return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
> +   "point types");

ok

>  }
> 
>return NULL;
> diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-4.c 
> b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-4.c
> index 850ff620490..2065a287bb3 100644
> --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-4.c
> +++ b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-exp-4.c
> @@ -11,7 +11,5 @@ get_exponent (__ieee128 *p)
>  {
>__ieee128 source = *p;
> 
> -  return __builtin_vec_scalar_extract_exp (source); /* { dg-error 
> "'__builtin_vsx_scalar_extract_expq' requires" } */
> +  return __builtin_vec_scalar_extract_exp (source); /* { dg-error 
> "'__builtin_vsx_scalar_extract_exp.*' requires" } */
>  }
> -
> -
ok


> diff --git a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-4.c 
> b/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-4.c
> index 32a53c6fffd..37bc8332961 100644
> --- a/gcc/testsuite/gcc.target/powerpc/bfp/scalar-extract-sig-4.c
> +++ 

Re: PowerPC: Use __float128 instead of __ieee128 in tests.

2020-10-26 Thread will schmidt via Gcc-patches
On Thu, 2020-10-22 at 18:12 -0400, Michael Meissner via Gcc-patches wrote:
> PowerPC: Use __float128 instead of __ieee128 in tests.
> 
> I have split all of these patches into separate patches to hopefully get them
> into the tree.
> 
> Two of the tests used the __ieee128 keyword instead of __float128.  This
> patch changes those cases to use the official keyword.
> 
> I have tested this patch with bootstrap builds on a little endian power9 
> system
> running Linux.  With the other patches, I have built two full bootstrap builds
> using this patch and the patches after this patch.  One build used the current
> default for long double (IBM extended double) and the other build switched the
> default to IEEE 128-bit.  I used the Advance Toolchain AT 14.0 compiler as the
> library used by this compiler.  There are no regressions between the tests.
> There are 3 fortran benchmarks (ieee/large_2.f90, default_format_2.f90, and
> default_format_denormal_2.f90) that now pass.
> 
> Can I install this into the trunk?
> 
> We have gotten some requests to back port these changes to GCC 10.x.  At the
> moment, I am not planning to do the back port, but I may need to in the 
> future.
> 
> gcc/testsuite/
> 2020-10-22  Michael Meissner  
> 
>   * gcc.target/powerpc/float128-cmp2-runnable.c: Use __float128
>   keyword instead of __ieee128.
>   * gcc.target/powerpc/pr92796.c: Use __float128 keyword instead of
>   __ieee128.
> ---
>  gcc/testsuite/gcc.target/powerpc/float128-cmp2-runnable.c | 2 +-
>  gcc/testsuite/gcc.target/powerpc/pr92796.c| 8 
>  2 files changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/gcc/testsuite/gcc.target/powerpc/float128-cmp2-runnable.c 
> b/gcc/testsuite/gcc.target/powerpc/float128-cmp2-runnable.c
> index d376a3ca68e..16b70877355 100644
> --- a/gcc/testsuite/gcc.target/powerpc/float128-cmp2-runnable.c
> +++ b/gcc/testsuite/gcc.target/powerpc/float128-cmp2-runnable.c
> @@ -18,7 +18,7 @@ int main(void)
>  {
>int result;
>double a_dble, b_dble;
> -  __ieee128 a_ieee128, b_ieee128;
> +  __float128 a_ieee128, b_ieee128;
> 
>a_dble = 3.10;
>b_dble = 3.10;
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr92796.c 
> b/gcc/testsuite/gcc.target/powerpc/pr92796.c
> index 1e671e175de..f2c6b8b7f5c 100644
> --- a/gcc/testsuite/gcc.target/powerpc/pr92796.c
> +++ b/gcc/testsuite/gcc.target/powerpc/pr92796.c
> @@ -4,14 +4,14 @@
> 
>  typedef union
>  {
> -  __ieee128 a;
> +  __float128 a;
>int b;
>  } c;
> 
> -__ieee128
> -d (__ieee128 x)
> +__float128
> +d (__float128 x)
>  {
> -  __ieee128 g;
> +  __float128 g;
>c h;
>h.a = x;
>g = h.b & 5;
> -- 
> 2.22.0
> 

lgtm, 
thanks
-Will


> 



Re: PowerPC: Add -mno-gnu-attributes to ibm-ldouble.o

2020-10-26 Thread will schmidt via Gcc-patches
On Thu, 2020-10-22 at 18:05 -0400, Michael Meissner via Gcc-patches wrote:
> PowerPC: Add -mno-gnu-attributes to ibm-ldouble.o.
> 
> I have split all of these patches into separate patches to hopefully get them
> into the tree.
> 
> This patch is split off from the patch adding __float128 <-> Decimal
> conversions.  This patch is needed to prevent the linker from issuing a
> warning when the long double format is IEEE 128-bit.
> 
> I have tested this patch with bootstrap builds on a little endian power9 
> system
> running Linux.  With the other patches, I have built two full bootstrap builds
> using this patch and the patches after this patch.  One build used the current
> default for long double (IBM extended double) and the other build switched the
> default to IEEE 128-bit.  I used the Advance Toolchain AT 14.0 compiler as the
> library used by this compiler.  There are no regressions between the tests.
> There are 3 fortran benchmarks (ieee/large_2.f90, default_format_2.f90, and
> default_format_denormal_2.f90) that now pass.
> 
> Can I install this into the trunk?
> 
> We have gotten some requests to back port these changes to GCC 10.x.  At the
> moment, I am not planning to do the back port, but I may need to in the 
> future.
> 
> libgcc/
> 2020-10-22  Michael Meissner  
> 
>   * config/rs6000/t-float128: Compile ibm-ldouble.o with
>   -mno-gnu-attributes.
> ---
>  libgcc/config/rs6000/t-float128 | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/libgcc/config/rs6000/t-float128 b/libgcc/config/rs6000/t-float128
> index d5413445189..c2cb3e2b705 100644
> --- a/libgcc/config/rs6000/t-float128
> +++ b/libgcc/config/rs6000/t-float128
> @@ -69,6 +69,11 @@ $(fp128_ppc_obj): INTERNAL_CFLAGS += $(FP128_CFLAGS_SW)
>  $(fp128_obj)  : $(fp128_includes)
>  $(fp128_obj)  : $(srcdir)/config/rs6000/quad-float128.h
> 
> +
> +# Turn off gnu attributes to allow using the IBM extended double support for
> +# __ibm128 if long double uses the IEEE 128-bit representation.
> +ibm-ldouble$(objext) : INTERNAL_CFLAGS += -mno-gnu-attribute
> +

I think from context you can drop the "if long double uses the IEEE
128-bit representation." portion of that comment. 

(Though this corrects
a condition based on the configury, etc, there is no conditional logic
here..)



>  $(fp128_softfp_src) : $(srcdir)/soft-fp/$(subst -sw,,$(subst kf,tf,$@)) 
> $(fp128_dep)
>   @src="$(srcdir)/soft-fp/$(subst -sw,,$(subst kf,tf,$@))"; \
>   echo "Create $@"; \
> -- 
> 2.22.0
> 
> 



Re: PowerPC: Allow C/C++ to change long double type on GLIBC 2.32.

2020-10-26 Thread will schmidt via Gcc-patches
On Thu, 2020-10-22 at 18:15 -0400, Michael Meissner via Gcc-patches wrote:
> PowerPC: Allow C/C++ to change long double type on GLIBC 2.32.
> 
> This is a new patch.  It turns off the warning about switching the long double
> type via compile line if the GLIBC is 2.32 or newer.  It only does this if the
> languages are C or C++, since those language libraries support switching the
> long double type.  Other languages like Fortran don't have any current support
> to provide both sets of interfaces to the library.
> 
> 2020-10-21  Michael Meissner  
> 
>   * config/rs6000/rs6000.c (rs6000_option_override_internal): Allow
>   long double type to be changed for C/C++ if glibc 2.32 or newer.
> ---
>  gcc/config/rs6000/rs6000.c | 10 --
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 50039c0a53d..940c15f3265 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -4158,10 +4158,16 @@ rs6000_option_override_internal (bool global_init_p)
> 
>if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && 
> TARGET_LONG_DOUBLE_128)
>   {
> +   /* Determine if the user can change the default long double type at
> +  compilation time.  Only C and C++ support this, and you need GLIBC
> +  2.32 or newer.  Only issue one warning.  */

> static bool warned_change_long_double;
> -   if (!warned_change_long_double)
> +
> +   if (!warned_change_long_double
> +   && (!OPTION_GLIBC
> +   || (!lang_GNU_C () && !lang_GNU_CXX ())
> +   || ((TARGET_GLIBC_MAJOR * 1000) + TARGET_GLIBC_MINOR) < 2032))
>   {
> -   warned_change_long_double = true;

Does this need to be added back elsewhere? 



> if (TARGET_IEEEQUAD)
>   warning (OPT_Wpsabi, "Using IEEE extended precision "
>"%");
> -- 
> 2.22.0
> 
> 



Re: PowerPC: Map IEEE 128-bit long double built-in functions

2020-10-26 Thread will schmidt via Gcc-patches
On Thu, 2020-10-22 at 18:03 -0400, Michael Meissner via Gcc-patches wrote:
> PowerPC: Map IEEE 128-bit long double built-in functions
> 
> This patch is revised from the first and second versions of the patch posted.
> It now uses the names that are not in the user's namespace (i.e. __sinieee128
> instead of sinf128) that Joseph Myers suggested.
> 
> In addition, I added the changes suggested by Segher the last time this patch
> was submitted (changing where the default is, fixing the scalbl built-in name,
> using strlen and xaprintf).
> 
> I have split all of these patches into separate patches to hopefully get them
> into the tree.
> 
> This patch goes through the built-in functions and changes the name of the
> math, scanf, and printf built-in functions to use the functions that GLIBC
> provides when long double uses the IEEE 128-bit representation.
> 
> In addition, changing the name in GCC allows the Fortran compiler to
> automatically use the correct name.
> 
> To map the math functions, typically this patch changes l to
> __ieee128.  However there are some exceptions that are handled with this
> patch.
> 
> To map the printf functions,  is mapped to __ieee128.
> 
> To map the scanf functions,  is mapped to __isoc99_ieee128.
> 
> I have tested this patch with bootstrap builds on a little endian power9 
> system
> running Linux.  With the other patches, I have built two full bootstrap builds
> using this patch and the patches after this patch.  One build used the current
> default for long double (IBM extended double) and the other build switched the
> default to IEEE 128-bit.  I used the Advance Toolchain AT 14.0 compiler as the
> library used by this compiler.  There are no regressions between the tests.
> There are 3 fortran benchmarks (ieee/large_2.f90, default_format_2.f90, and
> default_format_denormal_2.f90) that now pass.
> 
> Can I install this into the trunk?
> 
> We have gotten some requests to back port these changes to GCC 10.x.  At the
> moment, I am not planning to do the back port, but I may need to in the 
> future.
> 
> gcc/
> 2020-10-22  Michael Meissner  
> 
>   * config/rs6000/rs6000.c (rs6000_mangle_decl_assembler_name): Add
>   support for mapping built-in function names for long double
>   built-in functions if long double is IEEE 128-bit.

possibly redundant "built-in functions" in there, but OK. 

> 
> gcc/testsuite/
> 2020-10-22  Michael Meissner  
> 
>   * gcc.target/powerpc/float128-longdouble-math.c: New test.
>   * gcc.target/powerpc/float128-longdouble-stdio.c: New test.
>   * gcc.target/powerpc/float128-math.c: Adjust test for new name
>   being generated.
> ---
>  gcc/config/rs6000/rs6000.c| 135 -
>  .../powerpc/float128-longdouble-math.c| 567 ++
>  .../powerpc/float128-longdouble-stdio.c   |  37 ++
>  .../gcc.target/powerpc/float128-math.c|   6 +-
>  4 files changed, 710 insertions(+), 35 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.target/powerpc/float128-longdouble-math.c
>  create mode 100644 
> gcc/testsuite/gcc.target/powerpc/float128-longdouble-stdio.c
> 
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 141684e3157..8c2544ee88d 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -26893,56 +26893,127 @@ rs6000_globalize_decl_name (FILE * stream, tree 
> decl)
> library before you can switch the real*16 type at compile time.
> 
> We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name.  We
> -   only do this if the default is that long double is IBM extended double, 
> and
> -   the user asked for IEEE 128-bit.  */
> +   only do this transformation if the __float128 type is enabled.  This
> +   prevents us from doing the transformation on older 32-bit ports that might
> +   have enabled using IEEE 128-bit floating point as the default long double
> +   type.  */
> 

ok


>  static tree
>  rs6000_mangle_decl_assembler_name (tree decl, tree id)
>  {
> -  if (!TARGET_IEEEQUAD_DEFAULT && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
> -  && TREE_CODE (decl) == FUNCTION_DECL && DECL_IS_BUILTIN (decl) )
> +  if (TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
> +  && TREE_CODE (decl) == FUNCTION_DECL
> +  && fndecl_built_in_p (decl, BUILT_IN_NORMAL))
>  {
>size_t len = IDENTIFIER_LENGTH (id);
>const char *name = IDENTIFIER_POINTER (id);
> +  char *newname = NULL;
> 
> -  if (name[len - 1] == 'l')
> +  /* See if it is one of the built-in functions with an unusual name.  */
> +  switch (DECL_FUNCTION_CODE (decl))
>   {
> -   bool uses_ieee128_p = false;
> -   tree type = TREE_TYPE (decl);
> -   machine_mode ret_mode = TYPE_MODE (type);
> + case BUILT_IN_DREML:
> +   newname = xstrdup ("__remainderieee128");
> +   break;
> 
> -   /* See if the function returns a IEEE 128-bit floating point type or
> - 

[PATCH, rs6000] improve vec_ctf invalid parameter handling. (pr91903)

2020-10-26 Thread will schmidt via Gcc-patches
[PATCH, rs6000] improve vec_ctf invalid parameter handling.

Hi,
  Per PR91903, GCC ICEs when we attempt to pass a variable
(or out of range value) into the vec_ctf() builtin.  Per
investigation, the parameter checking exists for this
builtin with the int types, but was missing for
the long long types.

This patch adds the missing CODE_FOR_* entries to the
rs6000_expand_binup_builtin to cover that scenario.
This patch also updates some existing tests to remove
calls to vec_ctf() and vec_cts() that contain negative
values.

Regtested clean on power7, power8, power9 Linux targets.

OK for trunk?

THanks,
-Will

PR target/91903

2020-10-26  Will Schmidt  

gcc/ChangeLog:
* config/rs6000/rs6000-call.c (rs6000_expand_binup_builtin): Add
clauses for CODE_FOR_vsx_xvcvuxddp_scale and
CODE_FOR_vsx_xvcvsxddp_scale to the parameter checking code.

gcc/testsuite/ChangeLog:
* testsuite/gcc.target/powerpc/pr91903.c: New test.
* testsuite/gcc.target/powerpc/builtins-1.fold.h: Update.
* testsuite/gcc.target/powerpc/builtins-2.c: Update.

diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index b044778a7ae4..eb7e007e68d3 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9447,11 +9447,13 @@ rs6000_expand_binop_builtin (enum insn_code icode, tree 
exp, rtx target)
}
 }
   else if (icode == CODE_FOR_altivec_vcfux
   || icode == CODE_FOR_altivec_vcfsx
   || icode == CODE_FOR_altivec_vctsxs
-  || icode == CODE_FOR_altivec_vctuxs)
+  || icode == CODE_FOR_altivec_vctuxs
+  || icode == CODE_FOR_vsx_xvcvuxddp_scale
+  || icode == CODE_FOR_vsx_xvcvsxddp_scale)
 {
   /* Only allow 5-bit unsigned literals.  */
   STRIP_NOPS (arg1);
   if (TREE_CODE (arg1) != INTEGER_CST
  || TREE_INT_CST_LOW (arg1) & ~0x1f)
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1.fold.h 
b/gcc/testsuite/gcc.target/powerpc/builtins-1.fold.h
index 8bc5f5e43366..42d552295e3e 100644
--- a/gcc/testsuite/gcc.target/powerpc/builtins-1.fold.h
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-1.fold.h
@@ -212,14 +212,14 @@ int main ()
   extern vector unsigned long long u9; u9 = vec_mergeo (u3, u4);
 
   extern vector long long l8; l8 = vec_mul (l3, l4);
   extern vector unsigned long long u6; u6 = vec_mul (u3, u4);
 
-  extern vector double dh; dh = vec_ctf (la, -2);
+  extern vector double dh; dh = vec_ctf (la, 2);
   extern vector double di; di = vec_ctf (ua, 2);
   extern vector int sz; sz = vec_cts (fa, 0x1F);
-  extern vector long long l9; l9 = vec_cts (dh, -2);
+  extern vector long long l9; l9 = vec_cts (dh, 2);
   extern vector unsigned long long u7; u7 = vec_ctu (di, 2);
   extern vector unsigned int usz; usz = vec_ctu (fa, 0x1F);
 
   extern vector float f1; f1 = vec_mergee (fa, fb);
   extern vector float f2; f2 = vec_mergeo (fa, fb);
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-2.c 
b/gcc/testsuite/gcc.target/powerpc/builtins-2.c
index 2aa23a377992..30acae47faff 100644
--- a/gcc/testsuite/gcc.target/powerpc/builtins-2.c
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-2.c
@@ -40,16 +40,16 @@ int main ()
 
   if (se[0] != 27L || se[1] != 27L || sf[0] != -14L || sf[1] != -14L
   || ue[0] != 27L || ue[1] != 27L || uf[0] != 14L || uf[1] != 14L)
 abort ();
 
-  vector double da = vec_ctf (sa, -2);
+  vector double da = vec_ctf (sa, 2);
   vector double db = vec_ctf (ua, 2);
-  vector long long sg = vec_cts (da, -2);
+  vector long long sg = vec_cts (da, 2);
   vector unsigned long long ug = vec_ctu (db, 2);
 
-  if (da[0] != 108.0 || da[1] != -56.0 || db[0] != 6.75 || db[1] != 3.5
+  if (da[0] != 6.75 || da[1] != -3.5 || db[0] != 6.75 || db[1] != 3.5
   || sg[0] != 27L || sg[1] != -14L || ug[0] != 27L || ug[1] != 14L)
 abort ();
 
   vector float fa = vec_ctf (inta, 5);
   if (fa[0] != 0.843750 || fa[1] != -0.031250 || fa[2] != 0.125000 || fa[3] != 
0.281250)
diff --git a/gcc/testsuite/gcc.target/powerpc/pr91903.c 
b/gcc/testsuite/gcc.target/powerpc/pr91903.c
new file mode 100644
index ..f0792117a88f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr91903.c
@@ -0,0 +1,74 @@
+/* { dg-do compile */
+/* { dg-require-effective-target p8vector_hw } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-options "-mdejagnu-cpu=power8" } */
+
+#include 
+#include 
+#include 
+
+vector double retd;
+vector float retf;
+vector signed int retsi;
+
+void test_int(vector signed int a, const int b)
+{
+   retf = vec_ctf(a,b); /* { dg-error "argument 2 must be a 5-bit unsigned 
literal" } */
+   retf = vec_ctf(a,-1); /* { dg-error "argument 2 must be a 5-bit 
unsigned literal" } */
+   retf = vec_ctf(a,-31); /* { dg-error "argument 2 must be a 5-bit 
unsigned literal" } */
+   retf = vec_ctf(a,-32); /* { dg-error "argument 2 must be a 5-bit 
unsigned literal" } */
+   retf = vec_ctf(a,1);
+   retf = vec_ctf(a,31);
+   retf = 

[PATCH 2/2, rs6000, V2] VSX load/store rightmost element operations

2020-10-20 Thread will schmidt via Gcc-patches
[PATCH 2/2, rs6000, v2] VSX load/store rightmost element operations

Hi,
This adds support for the VSX load/store rightmost element operations.
This includes the instructions lxvrbx, lxvrhx, lxvrwx, lxvrdx,
stxvrbx, stxvrhx, stxvrwx, stxvrdx; And the builtins
vec_xl_sext() /* vector load sign extend */
vec_xl_zext() /* vector load zero extend */
vec_xst_trunc() /* vector store truncate */.

Testcase results show that the instructions added with this patch show
up at low/no optimization (-O0), with a number of those being replaced
with other load and store instructions at higher optimization levels.
I've kept those tests at -O0 to confirm these newly added instructions
are generated.

[v2] Refreshed per review comments.
Comments cleaned up, indentation corrected, dg-* stanzas updated.

Regtested OK for Linux on power8,power9 targets.  Sniff-regtested OK on
power10 simulator.
OK for trunk?

Thanks,
-Will

gcc/ChangeLog:
* config/rs6000/altivec.h (vec_xl_zext, vec_xl_sext, vec_xst_trunc): New
defines.
* config/rs6000/rs6000-builtin.def (BU_P10V_OVERLOAD_X): New builtin macro.
(BU_P10V_AV_X): New builtin macro.
(se_lxvrhbx, se_lxrbhx, se_lxvrwx, se_lxvrdx): Define internal names for
load and sign extend vector element.
(ze_lxvrbx, ze_lxvrhx, ze_lxvrwx, ze_lxvrdx): Define internal names for
load and zero extend vector element.
(tr_stxvrbx, tr_stxvrhx, tr_stxvrwx, tr_stxvrdx): Define internal names
for truncate and store vector element.
(se_lxvrx, ze_lxvrx, tr_stxvrx): Define internal names for overloaded
load/store rightmost element.
* config/rs6000/rs6000-call.c (altivec_builtin_types): Define the internal
monomorphs P10_BUILTIN_SE_LXVRBX, P10_BUILTIN_SE_LXVRHX,
P10_BUILTIN_SE_LXVRWX, P10_BUILTIN_SE_LXVRDX,
P10_BUILTIN_ZE_LXVRBX, P10_BUILTIN_ZE_LXVRHX, P10_BUILTIN_ZE_LXVRWX,
P10_BUILTIN_ZE_LXVRDX,
P10_BUILTIN_TR_STXVRBX, P10_BUILTIN_TR_STXVRHX, P10_BUILTIN_TR_STXVRWX,
P10_BUILTIN_TR_STXVRDX,
(altivec_expand_lxvr_builtin): New expansion for load element builtins.
(altivec_expand_stv_builtin): Update to support truncate and store builtins.
(altivec_expand_builtin): Add clases for the load/store rightmost builtins.
(altivec_init_builtins): Add def_builtin entries for
__builtin_altivec_se_lxvrbx, __builtin_altivec_se_lxvrhx,
__builtin_altivec_se_lxvrwx, __builtin_altivec_se_lxvrdx,
__builtin_altivec_ze_lxvrbx, __builtin_altivec_ze_lxvrhx,
__builtin_altivec_ze_lxvrwx, __builtin_altivec_ze_lxvrdx,
__builtin_altivec_tr_stxvrbx, __builtin_altivec_tr_stxvrhx,
__builtin_altivec_tr_stxvrwx, __builtin_altivec_tr_stxvrdx,
__builtin_vec_se_lxvrx, __builtin_vec_ze_lxvrx, __builtin_vec_tr_stxvrx.
* config/rs6000/vsx.md (vsx_lxvrx, vsx_stxvrx, vsx_stxvrx):
New define_insn entries.
* gcc/doc/extend.texi:  Add documentation for vsx_xl_sext, vsx_xl_zext,
and vec_xst_trunc.

gcc/testsuite/ChangeLog:
* gcc.target/powerpc/vsx-load-element-extend-char.c: New test.
* gcc.target/powerpc/vsx-load-element-extend-int.c: New test.
* gcc.target/powerpc/vsx-load-element-extend-longlong.c: New test.
* gcc.target/powerpc/vsx-load-element-extend-short.c: New test.
* gcc.target/powerpc/vsx-store-element-truncate-char.c: New test.
* gcc.target/powerpc/vsx-store-element-truncate-int.c: New test.
* gcc.target/powerpc/vsx-store-element-truncate-longlong.c: New test.
* gcc.target/powerpc/vsx-store-element-truncate-short.c: New test.

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 8a2dcda01442..df10a8c498dd 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -234,10 +234,13 @@
 #define vec_lde __builtin_vec_lde
 #define vec_ldl __builtin_vec_ldl
 #define vec_lvebx __builtin_vec_lvebx
 #define vec_lvehx __builtin_vec_lvehx
 #define vec_lvewx __builtin_vec_lvewx
+#define vec_xl_zext __builtin_vec_ze_lxvrx
+#define vec_xl_sext __builtin_vec_se_lxvrx
+#define vec_xst_trunc __builtin_vec_tr_stxvrx
 #define vec_neg __builtin_vec_neg
 #define vec_pmsum_be __builtin_vec_vpmsum
 #define vec_shasigma_be __builtin_crypto_vshasigma
 /* Cell only intrinsics.  */
 #ifdef __PPU__
diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index 3eb55f0ae434..5b05da87f4bf 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1143,10 +1143,18 @@
(RS6000_BTC_ ## ATTR/* ATTR */  \
 | RS6000_BTC_BINARY),  \
CODE_FOR_ ## ICODE) /* ICODE */
 #endif
 
+#define BU_P10V_OVERLOAD_X(ENUM, NAME) \
+  RS6000_BUILTIN_X (P10_BUILTIN_VEC_ ## ENUM,  /* ENUM */  \
+   "__builtin_vec_" NAME,  /* NAME */  \
+   RS6000_BTM_P10, /* MASK */  \
+   

Re: [RS6000] rotate and mask constants

2020-10-08 Thread will schmidt via Gcc-patches
On Thu, 2020-10-08 at 09:36 +1030, Alan Modra via Gcc-patches wrote:
> Implement more two insn constants.  rotate_and_mask_constant covers
> 64-bit constants that can be formed by rotating a 16-bit signed
> constant, rotating a 16-bit signed constant masked on left or right
> (rldicl and rldicr), rotating a 16-bit signed constant masked by
> rldic, and unusual "lis; rldicl" and "lis; rldicr" patterns.  All the
> values possible for DImode rs6000_is_valid_and_mask are covered.

lgtm, 

Just a couple cosmetic nits, since I was reading through.. :-)

> 
> Bootstrapped and regression tested powerpc64le-linux.
> 
>   PR 94393


PR Target/94393 

(unless the hooks currently handle that for us? )


> gcc/

gcc/ChangeLog:

>   * config/rs6000/rs6000.c (rotate_and_mask_constant): New function.
>   (num_insns_constant_multi, rs6000_emit_set_long_const): Use it here.
>   * config/rs6000/rs6000.md (*movdi_internal64+1 splitter): Delete.
> gcc/testsuite/
>   * gcc.target/powerpc/rot_cst.h,
>   * gcc.target/powerpc/rot_cst1.c,
>   * gcc.target/powerpc/rot_cst2.c: New tests.
> 



> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 14ecbad5df4..9809d11f47a 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -1129,6 +1129,8 @@ static tree rs6000_handle_altivec_attribute (tree *, 
> tree, tree, int, bool *);
>  static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
>  static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
>  static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
> +static bool rotate_and_mask_constant (unsigned HOST_WIDE_INT, HOST_WIDE_INT 
> *,
> +   int *, unsigned HOST_WIDE_INT *);
>  static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
>  static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, 
> bool);
>  static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
> @@ -5877,7 +5879,7 @@ num_insns_constant_gpr (HOST_WIDE_INT value)
>  }
> 
>  /* Helper for num_insns_constant.  Allow constants formed by the
> -   num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
> +   num_insns_constant_gpr sequences, and li/lis+rldicl/rldicr/rldic/rlwinm,
> and handle modes that require multiple gprs.  */
> 
>  static int
> @@ -5892,8 +5894,8 @@ num_insns_constant_multi (HOST_WIDE_INT value, 
> machine_mode mode)
>if (insns > 2
> /* We won't get more than 2 from num_insns_constant_gpr
>except when TARGET_POWERPC64 and mode is DImode or
> -  wider, so the register mode must be DImode.  */
> -   && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
> +  wider.  */
> +   && rotate_and_mask_constant (low, NULL, NULL, NULL))
>   insns = 2;
>total += insns;
>/* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
> @@ -9524,6 +9526,244 @@ rs6000_emit_set_const (rtx dest, rtx source)
>return true;
>  }
> 
> +/* Rotate DImode word, being careful to handle the case where
> +   HOST_WIDE_INT is larger than DImode.  */
> +
> +static inline unsigned HOST_WIDE_INT
> +rotate_di (unsigned HOST_WIDE_INT x, unsigned int shift)
> +{
> +  unsigned HOST_WIDE_INT mask_hi, mask_lo;
> +
> +  mask_hi = (HOST_WIDE_INT_1U << 63 << 1) - (HOST_WIDE_INT_1U << shift);
> +  mask_lo = (HOST_WIDE_INT_1U << shift) - 1;
> +  x = ((x << shift) & mask_hi) | ((x >> (64 - shift)) & mask_lo);
> +  x = (x ^ (HOST_WIDE_INT_1U << 63)) - (HOST_WIDE_INT_1U << 63);
> +  return x;
> +}
> +
> +/* Can C be formed by rotating a 16-bit positive value left by C16LSB?  */
> +
> +static inline bool
> +is_rotate_positive_constant (unsigned HOST_WIDE_INT c, int c16lsb,
> +  HOST_WIDE_INT *val, int *shift,
> +  unsigned HOST_WIDE_INT *mask)
> +{
> +  if ((c & ~(HOST_WIDE_INT_UC (0x7fff) << c16lsb)) == 0)
> +{
> +  /* eg. c = 1100   ... 
> +  -> val = 0x3000, shift = 49, mask = -1ull.  */
> +  if (val)
> + {
> +   c >>= c16lsb;
> +   /* Make the value and shift canonical in the sense of
> +  selecting the smallest value.  For the example above
> +  -> val = 3, shift = 61.  */
> +   int trail_zeros = ctz_hwi (c);
> +   c >>= trail_zeros;
> +   c16lsb += trail_zeros;
> +   *val = c;
> +   *shift = c16lsb;
> +   *mask = HOST_WIDE_INT_M1U;
> + }
> +  return true;
> +}
> +  return false;
> +}
> +
> +/* Can C be formed by rotating a 16-bit negative value left by C16LSB?  */
> +
> +static inline bool
> +is_rotate_negative_constant (unsigned HOST_WIDE_INT c, int c16lsb,
> +  HOST_WIDE_INT *val, int *shift,
> +  unsigned HOST_WIDE_INT *mask)
> +{
> +  if ((c | (HOST_WIDE_INT_UC (0x7fff) << c16lsb)) == HOST_WIDE_INT_M1U)
> +{
> +  if (val)
> + {
> +   c >>= c16lsb;
> +   

Re: [PATCH 7/8] [RS6000] rs6000_rtx_costs reduce cost for SETs

2020-10-08 Thread will schmidt via Gcc-patches
On Thu, 2020-10-08 at 09:27 +1030, Alan Modra via Gcc-patches wrote:
> The aim of this patch is to make rtx_costs for SETs closer to
> insn_cost for SETs.  One visible effect on powerpc code is increased
> if-conversion.
> 
>   * config/rs6000/rs6000.c (rs6000_rtx_costs): Reduce cost of SET
>   operands.
> 
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 76aedbfae6f..d455aa52427 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -21684,6 +21684,35 @@ rs6000_rtx_costs (rtx x, machine_mode mode,
> int outer_code,
>   }
>return false;
> 
> +case SET:
> +  /* On entry the value in *TOTAL is the number of general
> purpose
> +  regs being set, multiplied by COSTS_N_INSNS (1).  Handle
> +  costing of set operands specially since in most cases we have
> +  an instruction rather than just a piece of RTL and should
> +  return a cost comparable to insn_cost.  That's a little
> +  complicated because in some cases the cost of SET operands is
> +  non-zero, see point 5 above and cost of PLUS for example, and
> +  in others it is zero, for example for (set (reg) (reg)).
> +  But (set (reg) (reg)) has the same insn_cost as
> +  (set (reg) (plus (reg) (reg))).  Hack around this by
> +  subtracting COSTS_N_INSNS (1) from the operand cost in cases
> +  were we add at least COSTS_N_INSNS (1) for some operation.

s/were/where/   :-)

> +  However, don't do so for constants.  Constants might cost
> +  more than zero when they require more than one instruction,
> +  and we do want the cost of extra instructions.  */
> +  {
> + rtx_code src_code = GET_CODE (SET_SRC (x));
> + if (src_code == CONST_INT
> + || src_code == CONST_DOUBLE
> + || src_code == CONST_WIDE_INT)
> +   return false;
> + int set_cost = (rtx_cost (SET_SRC (x), mode, SET, 1, speed)
> + + rtx_cost (SET_DEST (x), mode, SET, 0,
> speed));
> + if (set_cost >= COSTS_N_INSNS (1))
> +   *total += set_cost - COSTS_N_INSNS (1);
> + return true;
> +  }
> +
>  default:
>return false;
>  }

lgtm, 
thanks
-Will



Re: [PATCH 5/5] Conversions between 128-bit integer and floating point values.

2020-10-08 Thread will schmidt via Gcc-patches
On Mon, 2020-10-05 at 11:52 -0700, Carl Love wrote:
> Will, Segher:
> 
> This patch adds support for converting to/from 128-bit integers and
> 128-bit decimal floating point formats using the new P10 instructions
> dcffixqq and dctfixqq.  The new instructions are only used on P10 HW,
> otherwise the conversions continue to use the existing SW routines.
> 
> The changes from the previous version include:
> 
> Fixed up the change log entry issues noted by Will.
> 
> Regression tests reran on Power 9 LE with no regression errors.
> 
> Please let me know if it looks OK to commit to mainline.
> 
>   Carl 
> -
> 
> gcc/ChangeLog
> 
> 2020-10-05  Carl Love  
>   * config/rs6000/rs6000.md (floatti2, floatunsti2,
>   fix_truncti2, fixuns_truncti2): Add
>   define_insn for mode IEEE 128.
>   * libgcc/config/rs6000/fixkfti.c: Renamed to fixkfti-sw.c.
>   Update source function name.  White space fixes.

I'd move the generic 'Update source ... White space...' bits to the
patch description.  In addition to the 'Rename' statement, some form of
'Change calls of __fixkfti to __fixkfti_sw` would be more useful here.

>   * libgcc/config/rs6000/fixunskfti.c: Renamed to fixunskfti-sw.c.
>   Update source function name.  White space fixes.
>   * libgcc/config/rs6000/float128-hw.c (__floattikf_hw,
>   __floatuntikf_hw, __fixkfti_hw, __fixunskfti_hw):
>   New functions.
ok
>   * libgcc/config/rs6000/float128-ifunc.c (SW_OR_HW_ISA3_1):
>   New macro.
>   (__floattikf_resolve, __floatuntikf_resolve, __fixkfti_resolve,
>   __fixunskfti_resolve): Add resolve functions.
>   (__floattikf, __floatuntikf, __fixkfti, __fixunskfti): New
>   functions.
ok
>   * libgcc/config/rs6000/float128-sed (floattitf, __floatuntitf,
>   __fixtfti, __fixunstfti): Add editor commands to change
>   names.
>   * libgcc/config/rs6000/float128-sed-hw (__floattitf,
>   __floatuntitf, __fixtfti, __fixunstfti): Add editor commands
>   to change names.
ok

>   * libgcc/config/rs6000/floattikf.c: Renamed to floattikf-sw.c.
>   * libgcc/config/rs6000/floatuntikf.c: Renamed to floatuntikf-sw.c.
>   * libgcc/config/rs6000/quaad-float128.h (__floattikf_sw,
>   __floatuntikf_sw, __fixkfti_sw, __fixunskfti_sw, __floattikf_hw,
>   __floatuntikf_hw, __fixkfti_hw, __fixunskfti_hw, __floattikf,
>   __floatuntikf, __fixkfti, __fixunskfti): New extern declarations.
>   * libgcc/config/rs6000/t-float128 (floattikf, floatuntikf,
>   fixkfti, fixunskfti): Remove file names from fp128_ppc_funcs.
>   (floattikf-sw, floatuntikf-sw, fixkfti-sw, fixunskfti-sw): Add
>   file names to fp128_ppc_funcs.

> 
> gcc/testsuite/ChangeLog
> 
> 2020-10-05  Carl Love  
>   * gcc.target/powerpc/fl128_conversions.c: New file.

fp128_conversions.c



> ---
>  gcc/config/rs6000/rs6000.md   |  36 +++
>  .../gcc.target/powerpc/fp128_conversions.c| 286 ++
>  .../config/rs6000/{fixkfti.c => fixkfti-sw.c} |   4 +-
>  .../rs6000/{fixunskfti.c => fixunskfti-sw.c}  |   7 +-
>  libgcc/config/rs6000/float128-hw.c|  24 ++
>  libgcc/config/rs6000/float128-ifunc.c |  44 ++-
>  libgcc/config/rs6000/float128-sed |   4 +
>  libgcc/config/rs6000/float128-sed-hw  |   4 +
>  .../rs6000/{floattikf.c => floattikf-sw.c}|   4 +-
>  .../{floatuntikf.c => floatuntikf-sw.c}   |   4 +-
>  libgcc/config/rs6000/quad-float128.h  |  17 +-
>  libgcc/config/rs6000/t-float128   |   3 +-
>  12 files changed, 417 insertions(+), 20 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/fp128_conversions.c
>  rename libgcc/config/rs6000/{fixkfti.c => fixkfti-sw.c} (96%)
>  rename libgcc/config/rs6000/{fixunskfti.c => fixunskfti-sw.c} (90%)
>  rename libgcc/config/rs6000/{floattikf.c => floattikf-sw.c} (96%)
>  rename libgcc/config/rs6000/{floatuntikf.c => floatuntikf-sw.c} (96%)
> 
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 694ff70635e..5db5d0b4505 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -6390,6 +6390,42 @@
> xscvsxddp %x0,%x1"
>[(set_attr "type" "fp")])
> 
> +(define_insn "floatti2"
> +  [(set (match_operand:IEEE128 0 "vsx_register_operand" "=v")
> +   (float:IEEE128 (match_operand:TI 1 "vsx_register_operand" "v")))]
> +  "TARGET_POWER10"
> +{
> +  return  "xscvsqqp %0,%1";
> +}
> +  [(set_attr "type" "fp")])
> +
> +(define_insn "floatunsti2"
> +  [(set (match_operand:IEEE128 0 "vsx_register_operand" "=v")
> +   (unsigned_float:IEEE128 (match_operand:TI 1 "vsx_register_operand" 
> "v")))]
> +  "TARGET_POWER10"
> +{
> +  return  "xscvuqqp %0,%1";
> +}
> +  [(set_attr "type" "fp")])
> +
> +(define_insn "fix_truncti2"
> +  [(set (match_operand:TI 0 "vsx_register_operand" "=v")
> +   (fix:TI (match_operand:IEEE128 1 

Re: [PATCH 4/5] Test 128-bit shifts for just the int128 type.

2020-10-08 Thread will schmidt via Gcc-patches
On Mon, 2020-10-05 at 11:52 -0700, Carl Love wrote:
> Will, Segher:
> 
> Patch 4 adds the vector 128-bit integer shift instruction support for
> the V1TI type.
> 
> The changes from the previous version include:
> 
> Fixed up the change log entry issues noted by Will.
> 
> Regression tests reran on Power 9 LE with no regression errors.
> 
> Please let me know if it looks OK to commit to mainline.
> 
>   Carl 
> -
> 
> gcc/ChangeLog
> 
> 2020-10-05  Carl Love  
>   * config/rs6000/altivec.md (altivec_vslq, altivec_vsrq):
>   Rename to altivec_vslq_, altivec_vsrq_, mode VEC_TI.
>   * config/rs6000/vector.md (VEC_TI): New name for VSX_TI iterator.

What was the old name?   (Maybe just 'New iterator' ?)
Ok, back from below.  this is new name and location for what was
previously named VSX_TI in vsx.md.
Wouldn't hurt to have a statement in the description to clarify that.
"This patch renames the VSX_TI iterator to VEC_TI, and updates the
users." 


>   (vashlv1ti3): Change to vashl3, mode VEC_TI.
>   (vlshrv1ti3): Change to vlshr3, mode VEC_TI.
>   * config/rs6000/vsx.md (VSX_TI): Remove define_mode_iterator.

> 
> gcc/testsuite/ChangeLog
> 
> 2020-10-05  Carl Love  
>   gcc.target/powerpc/int_128bit-runnable.c: Add shift_right, shift_left
>   tests.
> ---
>  gcc/config/rs6000/altivec.md  | 16 -
>  gcc/config/rs6000/vector.md   | 27 ---
>  gcc/config/rs6000/vsx.md  | 33 +--
>  .../gcc.target/powerpc/int_128bit-runnable.c  | 16 +++--
>  4 files changed, 52 insertions(+), 40 deletions(-)
> 

ok

> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
> index 34a4731342a..5db3de3cc9f 100644
> --- a/gcc/config/rs6000/altivec.md
> +++ b/gcc/config/rs6000/altivec.md
> @@ -2219,10 +2219,10 @@
>"vsl %0,%1,%2"
>[(set_attr "type" "vecsimple")])
> 
> -(define_insn "altivec_vslq"
> -  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
> - (ashift:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
> -  (match_operand:V1TI 2 "vsx_register_operand" "v")))]
> +(define_insn "altivec_vslq_"
> +  [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=v")
> + (ashift:VEC_TI (match_operand:VEC_TI 1 "vsx_register_operand" "v")
> +  (match_operand:VEC_TI 2 "vsx_register_operand" "v")))]
>"TARGET_POWER10"
>/* Shift amount in needs to be in bits[57:63] of 128-bit operand. */
>"vslq %0,%1,%2"
> @@ -2236,10 +2236,10 @@
>"vsr %0,%1,%2"
>[(set_attr "type" "vecsimple")])
> 
> -(define_insn "altivec_vsrq"
> -  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
> - (lshiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
> -(match_operand:V1TI 2 "vsx_register_operand" "v")))]
> +(define_insn "altivec_vsrq_"
> +  [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=v")
> + (lshiftrt:VEC_TI (match_operand:VEC_TI 1 "vsx_register_operand" "v")
> +(match_operand:VEC_TI 2 "vsx_register_operand" 
> "v")))]
>"TARGET_POWER10"
>/* Shift amount in needs to be in bits[57:63] of 128-bit operand. */
>"vsrq %0,%1,%2"

ok


> diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
> index 0cca4232619..3ea3a91845a 100644
> --- a/gcc/config/rs6000/vector.md
> +++ b/gcc/config/rs6000/vector.md
> @@ -26,6 +26,9 @@
>  ;; Vector int modes
>  (define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI])
> 
> +;; 128-bit int modes
> +(define_mode_iterator VEC_TI [V1TI TI])
> +
>  ;; Vector int modes for parity
>  (define_mode_iterator VEC_IP [V8HI
> V4SI
> @@ -1627,17 +1630,17 @@
>"")
> 
>  ;; No immediate version of this 128-bit instruction
> -(define_expand "vashlv1ti3"
> -  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
> - (ashift:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
> -  (match_operand:V1TI 2 "vsx_register_operand" "v")))]
> +(define_expand "vashl3"
> +  [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=v")
> + (ashift:VEC_TI (match_operand:VEC_TI 1 "vsx_register_operand")
> +  (match_operand:VEC_TI 2 "vsx_register_operand")))]
>"TARGET_POWER10"
>  {
>/* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */
> -  rtx tmp = gen_reg_rtx (V1TImode);
> +  rtx tmp = gen_reg_rtx (mode);
> 
>emit_insn(gen_xxswapd_v1ti (tmp, operands[2]));
> -  emit_insn(gen_altivec_vslq (operands[0], operands[1], tmp));
> +  emit_insn(gen_altivec_vslq_ (operands[0], operands[1], tmp));
>DONE;
>  })
> 
> @@ -1650,17 +1653,17 @@
>"")
> 
>  ;; No immediate version of this 128-bit instruction
> -(define_expand "vlshrv1ti3"
> -  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
> - (lshiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
> -

Re: [PATCH 3/5] Add TI to TD (128-bit DFP) and TD to TI support

2020-10-08 Thread will schmidt via Gcc-patches
On Mon, 2020-10-05 at 11:52 -0700, Carl Love wrote:
> Will, Segher:
> 
> Add support for converting to/from 128-bit integers and 128-bit 
> decimal floating point formats.
> 
> The updates from the previous version of the patch:
> 
> Just a fix for the change log per Will's comments.
> 
> No regression failures were found when run on a P9.
> 
> Please let me know if this is ready for mainline. 
> 
>Carl
> 
> --
> 
> 
> gcc/ChangeLog
> 
> 2020-10-05  Carl Love  
>   * config/rs6000/dfp.md (floattitd2, fixtdti2): New define_insns.
>   * config/rs6000/rs6000-call.c (P10V_BUILTIN_VCMPNET_P, 
> P10V_BUILTIN_VCMPAET_P):
>   New overloaded definitions.
> 
> gcc/testsuite/ChangeLog
> 
> 2020-10-05  Carl Love  
>   * gcc.target/powerpc/int_128bit-runnable.c:  Update test.


Maybe 'Add 128-bit DFP conversion tests' to give it better meaning.


> ---
>  gcc/config/rs6000/dfp.md  | 14 +
>  gcc/config/rs6000/rs6000-call.c   |  4 ++
>  .../gcc.target/powerpc/int_128bit-runnable.c  | 62 +++
>  3 files changed, 80 insertions(+)
> 
> diff --git a/gcc/config/rs6000/dfp.md b/gcc/config/rs6000/dfp.md
> index 8f822732bac..0e82e315fee 100644
> --- a/gcc/config/rs6000/dfp.md
> +++ b/gcc/config/rs6000/dfp.md
> @@ -222,6 +222,13 @@
>"dcffixq %0,%1"
>[(set_attr "type" "dfp")])
> 
> +(define_insn "floattitd2"
> +  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
> + (float:TD (match_operand:TI 1 "gpc_reg_operand" "v")))]
> +  "TARGET_POWER10"
> +  "dcffixqq %0,%1"
> +  [(set_attr "type" "dfp")])
> +
>  ;; Convert a decimal64/128 to a decimal64/128 whose value is an integer.
>  ;; This is the first stage of converting it to an integer type.
> 
> @@ -241,6 +248,13 @@
>"TARGET_DFP"
>"dctfix %0,%1"
>[(set_attr "type" "dfp")])
> +
> +(define_insn "fixtdti2"
> +  [(set (match_operand:TI 0 "gpc_reg_operand" "=v")
> + (fix:TI (match_operand:TD 1 "gpc_reg_operand" "d")))]
> +  "TARGET_POWER10"
> +  "dctfixqq %0,%1"
> +  [(set_attr "type" "dfp")])
> 
>  ;; Decimal builtin support


ok

> 
> diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
> index 87fff5c1c80..8d00a25d806 100644
> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -4967,6 +4967,8 @@ const struct altivec_builtin_types 
> altivec_overloaded_builtins[] = {
>  RS6000_BTI_bool_V2DI, 0 },
>{ P9V_BUILTIN_VEC_VCMPNE_P, P10V_BUILTIN_VCMPNET_P,
>  RS6000_BTI_INTSI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
> +  { P9V_BUILTIN_VEC_VCMPNE_P, P10V_BUILTIN_VCMPNET_P,
> +RS6000_BTI_INTSI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0 
> },
> 
>{ P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEFP_P,
>  RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
> @@ -5074,6 +5076,8 @@ const struct altivec_builtin_types 
> altivec_overloaded_builtins[] = {
>  RS6000_BTI_bool_V2DI, 0 },
>{ P9V_BUILTIN_VEC_VCMPAE_P, P10V_BUILTIN_VCMPAET_P,
>  RS6000_BTI_INTSI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
> +  { P9V_BUILTIN_VEC_VCMPAE_P, P10V_BUILTIN_VCMPAET_P,
> +RS6000_BTI_INTSI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0 
> },
>{ P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEFP_P,
>  RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
>{ P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEDP_P,

ok

> diff --git a/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c 
> b/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c
> index 85ad544e22b..ec3dcf3dff1 100644
> --- a/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c
> +++ b/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c
> @@ -38,6 +38,7 @@
>  #if DEBUG
>  #include 
>  #include 
> +#include 
> 
> 
>  void print_i128(__int128_t val)
> @@ -59,6 +60,13 @@ int main ()
>__int128_t arg1, result;
>__uint128_t uarg2;
> 
> +  _Decimal128 arg1_dfp128, result_dfp128, expected_result_dfp128;
> +
> +  struct conv_t {
> +__uint128_t u128;
> +_Decimal128 d128;
> +  } conv, conv2;
> +
>vector signed long long int vec_arg1_di, vec_arg2_di;
>vector unsigned long long int vec_uarg1_di, vec_uarg2_di, vec_uarg3_di;
>vector unsigned long long int vec_uresult_di;
> @@ -2249,6 +2257,60 @@ int main ()
>  abort();
>  #endif
>}
> +  
> +  /* DFP to __int128 and __int128 to DFP conversions */
> +  /* Can't get printing of DFP values to work.  Print the DFP value as an
> + unsigned int so we can see the bit patterns.  */

Drop 'Can't get ...', just 'Print the DFP...' should be sufficient.

> +  conv.u128 = 0x2208ULL;
> +  conv.u128 = (conv.u128 << 64) | 0x4ULL;   //DFP bit pattern for integer 4
> +  expected_result_dfp128 = conv.d128;
> 
> +  arg1 = 4;
> +
> +  conv.d128 = (_Decimal128) arg1;
> +
> +  result_dfp128 = (_Decimal128) arg1;
> +  if (((conv.u128 >>64) != 0x2208ULL) &&
> +  ((conv.u128 & 

Re: [PATCH 2b/5] RS6000 add 128-bit Integer Operations

2020-10-07 Thread will schmidt via Gcc-patches
On Mon, 2020-10-05 at 11:52 -0700, Carl Love wrote:
> Will and Segher:
> 
> This is the rest of the second patch which adds the 128-bit integer
> support for divide, modulo, shift, compare of 128-bit
> integers instructions and builtin support.
> 
> In the last round of changes, the flag for the 128-bit operations was
> removed.  Per Will's comments, the  BU_P10_128BIT_* builtin definitions
> can be removed.  Instead we can just use P10V_BUILTIN. Similarly for
> the BU_P10_P builtin definition.  The commit log was updated to reflect
> the change.  There were a few change log entries for the 128-bit
> operations flag that needed removing.  As well as other fixes noted by
> Will.
> 
> The changes are all name changes not functional changes.  
> 
> No regression failures were found when run on a P9.
> 
> Please let me know if this is ready for mainline.  
> 
>Carl
> 
> 
> 
> gcc/ChangeLog
> 
>   2020-10/05  Carl Love  
>   * config/rs6000/altivec.h (vec_signextq, vec_dive, vec_mod): Add define
>   for new builtins.
>   * config/rs6000/altivec.md (UNSPEC_VMULEUD, UNSPEC_VMULESD,
>   UNSPEC_VMULOUD, UNSPEC_VMULOSD): New unspecs.
>   (altivec_eqv1ti, altivec_gtv1ti, altivec_gtuv1ti, altivec_vmuleud,
>   altivec_vmuloud, altivec_vmulesd, altivec_vmulosd, altivec_vrlq,
>   altivec_vrlqmi, altivec_vrlqmi_inst, altivec_vrlqnm,
>   altivec_vrlqnm_inst, altivec_vslq, altivec_vsrq, altivec_vsraq,
>   altivec_vcmpequt_p, altivec_vcmpgtst_p, altivec_vcmpgtut_p): New
>   define_insn.
>   (vec_widen_umult_even_v2di, vec_widen_smult_even_v2di,
>   vec_widen_umult_odd_v2di, vec_widen_smult_odd_v2di, altivec_vrlqmi,
>   altivec_vrlqnm): New define_expands.
>   * config/rs6000/rs6000-builtin.def (VCMPEQUT_P, VCMPGTST_P,
>   VCMPGTUT_P): Add macro expansions.
>   (VCMPGTUT, VCMPGTST, VCMPEQUT, CMPNET, CMPGE_1TI,
>   CMPGE_U1TI, CMPLE_1TI, CMPLE_U1TI, VNOR_V1TI_UNS, VNOR_V1TI, VCMPNET_P,
>   VCMPAET_P, VSIGNEXTSD2Q, VMULEUD, VMULESD, VMULOUD, VMULOSD, VRLQ,
>   VSLQ, VSRQ, VSRAQ, VRLQNM, DIV_V1TI, UDIV_V1TI, DIVES_V1TI, DIVEU_V1TI,
>   MODS_V1TI, MODU_V1TI, VRLQMI): New macro expansions.
>   (VRLQ, VSLQ, VSRQ, VSRAQ, DIVE, MOD, SIGNEXT): New overload expansions.
>   * config/rs6000/rs6000-call.c (P10_BUILTIN_VCMPEQUT,
>   P10V_BUILTIN_CMPGE_1TI, P10V_BUILTIN_CMPGE_U1TI,
>   P10V_BUILTIN_VCMPGTUT, P10V_BUILTIN_VCMPGTST,
>   P10V_BUILTIN_CMPLE_1TI, P10V_BUILTIN_VCMPLE_U1TI,
>   P10V_BUILTIN_128BIT_DIV_V1TI, P10V_BUILTIN_128BIT_UDIV_V1TI,
>   P10V_BUILTIN_128BIT_VMULESD, P10V_BUILTIN_128BIT_VMULEUD,
>   P10V_BUILTIN_128BIT_VMULOSD, P10V_BUILTIN_128BIT_VMULOUD,

Just sniff-checked a few.  Don't see the P10V_BUILTIN_128BIT_* entries
below.

>   P10V_BUILTIN_VNOR_V1TI, P10V_BUILTIN_VNOR_V1TI_UNS,
>   P10V_BUILTIN_128BIT_VRLQ, P10V_BUILTIN_128BIT_VRLQMI,
>   P10V_BUILTIN_128BIT_VRLQNM, P10V_BUILTIN_128BIT_VSLQ,
>   P10V_BUILTIN_128BIT_VSRQ, P10V_BUILTIN_128BIT_VSRAQ,
>   P10V_BUILTIN_VCMPGTUT_P, P10V_BUILTIN_VCMPGTST_P,
>   P10V_BUILTIN_VCMPEQUT_P, P10V_BUILTIN_VCMPGTUT_P,
>   P10V_BUILTIN_VCMPGTST_P, P10V_BUILTIN_CMPNET,
>   P10V_BUILTIN_VCMPNET_P, P10V_BUILTIN_VCMPAET_P,
>   P10V_BUILTIN_128BIT_VSIGNEXTSD2Q, P10V_BUILTIN_128BIT_DIVES_V1TI,
>   P10V_BUILTIN_128BIT_MODS_V1TI, P10V_BUILTIN_128BIT_MODU_V1TI):
>   New overloaded definitions.
>   (rs6000_gimple_fold_builtin) [P10V_BUILTIN_VCMPEQUT,
>   P10_BUILTIN_CMPNET, P10_BUILTIN_CMPGE_1TI,
>   P10_BUILTIN_CMPGE_U1TI, P10_BUILTIN_VCMPGTUT,
>   P10_BUILTIN_VCMPGTST, P10_BUILTIN_CMPLE_1TI,
>   P10_BUILTIN_CMPLE_U1TI]: New case statements.
>   (rs6000_init_builtins) [bool_V1TI_type_node, int_ftype_int_v1ti_v1ti]:
>   New assignments.
>   (altivec_init_builtins): New E_V1TImode case statement.
>   (builtin_function_type)[P10_BUILTIN_128BIT_VMULEUD,
>   P10_BUILTIN_128BIT_VMULOUD, P10_BUILTIN_128BIT_DIVEU_V1TI,
>   P10_BUILTIN_128BIT_MODU_V1TI, P10_BUILTIN_CMPGE_U1TI,
>   P10_BUILTIN_VCMPGTUT, P10_BUILTIN_VCMPEQUT]: New case statements.

I don't see these keywords below.  Possibly P10V_*



>   * config/rs6000/r6000.c (rs6000_handle_altivec_attribute)[E_TImode,
>   E_V1TImode]: New case statements.
>   * config/rs6000/r6000.h (RS6000_BTM_TI_VECTOR_OPS): New defines.
>   (rs6000_builtin_type_index): New enum value RS6000_BTI_bool_V1TI.
>   * config/rs6000/vector.md (vector_gtv1ti,vector_nltv1ti,
>   vector_gtuv1ti, vector_nltuv1ti, vector_ngtv1ti, vector_ngtuv1ti,
>   vector_eq_v1ti_p, vector_ne_v1ti_p, vector_ae_v1ti_p,
>   vector_gt_v1ti_p, vector_gtu_v1ti_p, vrotlv1ti3, vashlv1ti3,
>   vlshrv1ti3, vashrv1ti3): New define_expands.
>   * config/rs6000/vsx.md (UNSPEC_VSX_DIVSQ, UNSPEC_VSX_DIVUQ,
>   UNSPEC_VSX_DIVESQ, UNSPEC_VSX_DIVEUQ, UNSPEC_VSX_MODSQ,
>  

Re: [PATCH 2a/5] rs6000, vec_rlnm builtin fix arguments

2020-10-07 Thread will schmidt via Gcc-patches
On Mon, 2020-10-05 at 11:52 -0700, Carl Love wrote:
> Will, Segher:
> 
> 
> 
> The following changes were made from the previous version:
> 
> Per Will's comments, I split the bug fix from patch 2 into a separate
> patch.  This patch is the bug fix for the vec_rlnm builtin.

I recommend trying to keep a clean paragraph appropriate for the commit
log, separate from the history of the patch.  i.e. 

"This patch fixes an error in how the vec_rlnm() builtin parameters
were handled."




> 
> Regression tests reran on Power 9 LE with no regression errors.
> 
> Please let me know if it looks OK to commit to mainline.
> 
>   Carl 
> 
> --
> 
> 
> gcc/ChangeLog
> 
> 2020-10-05  Carl Love  
> 
>   * config/rs6000/altivec.h (vec_rlnm): Fix bug in argument generation.


Is there any testcase impact?  (If this doesn't fix an existing test,
may be worth adding one..)

LGTM,
Thanks
-Will

> ---
>  gcc/config/rs6000/altivec.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
> index 8a2dcda0144..f7720d136c9 100644
> --- a/gcc/config/rs6000/altivec.h
> +++ b/gcc/config/rs6000/altivec.h
> @@ -183,7 +183,7 @@
>  #define vec_recipdiv __builtin_vec_recipdiv
>  #define vec_rlmi __builtin_vec_rlmi
>  #define vec_vrlnm __builtin_vec_rlnm
> -#define vec_rlnm(a,b,c) (__builtin_vec_rlnm((a),((c)<<8)|(b)))
> +#define vec_rlnm(a,b,c) (__builtin_vec_rlnm((a),((b)<<8)|(c)))
>  #define vec_rsqrt __builtin_vec_rsqrt
>  #define vec_rsqrte __builtin_vec_rsqrte
>  #define vec_signed __builtin_vec_vsigned



Re: [PATCH 1/5] RS6000 Add 128-bit Binary Integer sign extend operations

2020-10-07 Thread will schmidt via Gcc-patches
On Mon, 2020-10-05 at 11:51 -0700, Carl Love wrote:
> Will, Segher:
> 
> Patch 1, adds the 128-bit sign extension instruction support and
> corresponding builtin support.


> 
> I updated the change log per the comments from Will.
> 
> Patch has been retested on Power 9 LE.
> 
> Pet me know if it is ready to commit to mainline.
> 
>  Carl 
> 
> ---
> 
> 
> gcc/ChangeLog
> 
> 2020-10-05  Carl Love  
>   * config/rs6000/altivec.h (vec_signextll, vec_signexti): Add define
>   for new builtins.
>   * config/rs6000/rs6000-builtin.def (VSIGNEXTI, VSIGNEXTLL):  Add
>   overloaded builtin definitions.
>   (VSIGNEXTSB2W, VSIGNEXTSH2W, VSIGNEXTSB2D, VSIGNEXTSH2D,VSIGNEXTSW2D):
>   Add builtin expansions.
>   * config/rs6000-call.c (P9V_BUILTIN_VEC_VSIGNEXTI,
>   P9V_BUILTIN_VEC_VSIGNEXTLL): Add overloaded argument definitions.
>   * config/rs6000/vsx.md: Make define_insn vsx_sign_extend_si_v2di
>   visible.
>   * doc/extend.texi:  Add documentation for the vec_signexti and
>   vec_signextll builtins.
> 
> gcc/testsuite/ChangeLog
> 
> 2020-10-05  Carl Love  
>   * gcc.target/powerpc/p9-sign_extend-runnable.c:  New test case.
> ---
>  gcc/config/rs6000/altivec.h   |   3 +
>  gcc/config/rs6000/rs6000-builtin.def  |   9 ++
>  gcc/config/rs6000/rs6000-call.c   |  13 ++
>  gcc/config/rs6000/vsx.md  |   2 +-
>  gcc/doc/extend.texi   |  15 ++
>  .../powerpc/p9-sign_extend-runnable.c | 128 ++
>  6 files changed, 169 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/p9-sign_extend-runnable.c
> 
> diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
> index f7720d136c9..cfa5eda4cd5 100644
> --- a/gcc/config/rs6000/altivec.h
> +++ b/gcc/config/rs6000/altivec.h
> @@ -494,6 +494,9 @@
> 
>  #define vec_xlx __builtin_vec_vextulx
>  #define vec_xrx __builtin_vec_vexturx
> +#define vec_signexti  __builtin_vec_vsignexti
> +#define vec_signextll __builtin_vec_vsignextll
> +
>  #endif

Can probably drop that blank line.


> 
>  /* Predicates.
> diff --git a/gcc/config/rs6000/rs6000-builtin.def 
> b/gcc/config/rs6000/rs6000-builtin.def
> index e91a48ddf5f..4c2e9460949 100644
> --- a/gcc/config/rs6000/rs6000-builtin.def
> +++ b/gcc/config/rs6000/rs6000-builtin.def
> @@ -2715,6 +2715,8 @@ BU_P9V_OVERLOAD_1 (VPRTYBD, "vprtybd")
>  BU_P9V_OVERLOAD_1 (VPRTYBQ,  "vprtybq")
>  BU_P9V_OVERLOAD_1 (VPRTYBW,  "vprtybw")
>  BU_P9V_OVERLOAD_1 (VPARITY_LSBB, "vparity_lsbb")
> +BU_P9V_OVERLOAD_1 (VSIGNEXTI,"vsignexti")
> +BU_P9V_OVERLOAD_1 (VSIGNEXTLL,   "vsignextll")
> 
>  /* 2 argument functions added in ISA 3.0 (power9).  */
>  BU_P9_2 (CMPRB,  "byte_in_range",CONST,  cmprb)
> @@ -2726,6 +2728,13 @@ BU_P9_OVERLOAD_2 (CMPRB,   "byte_in_range")
>  BU_P9_OVERLOAD_2 (CMPRB2,"byte_in_either_range")
>  BU_P9_OVERLOAD_2 (CMPEQB,"byte_in_set")
>  
> +/* Sign extend builtins that work on ISA 3.0, but not defined until ISA 3.1. 
>  */

I have mixed feelings about straddling the ISA 3.0 and 3.1 ; but not
sure how to properly improve.  (I defer).
The rest LGTM, 
Thanks
-Will




> +BU_P9V_AV_1 (VSIGNEXTSB2W,   "vsignextsb2w", CONST,  
> vsx_sign_extend_qi_v4si)
> +BU_P9V_AV_1 (VSIGNEXTSH2W,   "vsignextsh2w", CONST,  
> vsx_sign_extend_hi_v4si)
> +BU_P9V_AV_1 (VSIGNEXTSB2D,   "vsignextsb2d", CONST,  
> vsx_sign_extend_qi_v2di)
> +BU_P9V_AV_1 (VSIGNEXTSH2D,   "vsignextsh2d", CONST,  
> vsx_sign_extend_hi_v2di)
> +BU_P9V_AV_1 (VSIGNEXTSW2D,   "vsignextsw2d", CONST,  
> vsx_sign_extend_si_v2di)
> +
>  /* Builtins for scalar instructions added in ISA 3.1 (power10).  */
>  BU_P10_MISC_2 (CFUGED, "cfuged", CONST, cfuged)
>  BU_P10_MISC_2 (CNTLZDM, "cntlzdm", CONST, cntlzdm)
> diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
> index a8b520834c7..9e514a01012 100644
> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -5527,6 +5527,19 @@ const struct altivec_builtin_types 
> altivec_overloaded_builtins[] = {
>  RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
>  RS6000_BTI_INTSI, RS6000_BTI_INTSI },
> 
> +  /* Sign extend builtins that work work on ISA 3.0, not added until ISA 3.1 
> */
> +  { P9V_BUILTIN_VEC_VSIGNEXTI, P9V_BUILTIN_VSIGNEXTSB2W,
> +RS6000_BTI_V4SI, RS6000_BTI_V16QI, 0, 0 },
> +  { P9V_BUILTIN_VEC_VSIGNEXTI, P9V_BUILTIN_VSIGNEXTSH2W,
> +RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
> +
> +  { P9V_BUILTIN_VEC_VSIGNEXTLL, P9V_BUILTIN_VSIGNEXTSB2D,
> +RS6000_BTI_V2DI, RS6000_BTI_V16QI, 0, 0 },
> +  { P9V_BUILTIN_VEC_VSIGNEXTLL, P9V_BUILTIN_VSIGNEXTSH2D,
> +RS6000_BTI_V2DI, RS6000_BTI_V8HI, 0, 0 },
> +  { P9V_BUILTIN_VEC_VSIGNEXTLL, P9V_BUILTIN_VSIGNEXTSW2D,
> +RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
> +
>   

[PATCH, rs6000] rename BU_P10_MISC_2 define to BU_P10_POWERPC64_MISC_2

2020-10-07 Thread will schmidt via Gcc-patches


Hi,
  Rename our BU_P10_MISC_2 built-in define macro to be
BU_P10_POWERPC64_MISC_2.   This more accurately reflects
that the macro includes the RS6000_BTM_POWERPC64 entry
that is not present in the other BU_P10_MISC macros, 
and matches the style we used for the P7 equivalent.

Should be entirely cosmetic, no codegen changes.
A regtest is underway just in case.
OK for trunk?

Thanks,
-Will

gcc/ChangeLog:
* gcc/config/rs6000/rs6000-builtin.def (BU_P10_MISC_2): Rename
to BU_P10_POWERPC64_MISC_2.
(CFUGED,CNTLZDM,CNTTZDM,PDEPD,PEXTD): Call renamed macro.

diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index e91a48ddf5fe..3eb55f0ae434 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1109,11 +1109,11 @@
RS6000_BTM_P10, /* MASK */  \
(RS6000_BTC_ ## ATTR/* ATTR */  \
 | RS6000_BTC_UNARY),   \
CODE_FOR_ ## ICODE) /* ICODE */
 
-#define BU_P10_MISC_2(ENUM, NAME, ATTR, ICODE) \
+#define BU_P10_POWERPC64_MISC_2(ENUM, NAME, ATTR, ICODE)   \
   RS6000_BUILTIN_2 (P10_BUILTIN_ ## ENUM,  /* ENUM */  \
"__builtin_" NAME,  /* NAME */  \
RS6000_BTM_P10  \
| RS6000_BTM_POWERPC64, /* MASK */  \
(RS6000_BTC_ ## ATTR/* ATTR */  \
@@ -2725,15 +2725,15 @@ BU_P9_64BIT_2 (CMPEQB,  "byte_in_set",  CONST,  cmpeqb)
 BU_P9_OVERLOAD_2 (CMPRB,   "byte_in_range")
 BU_P9_OVERLOAD_2 (CMPRB2,  "byte_in_either_range")
 BU_P9_OVERLOAD_2 (CMPEQB,  "byte_in_set")
 
 /* Builtins for scalar instructions added in ISA 3.1 (power10).  */
-BU_P10_MISC_2 (CFUGED, "cfuged", CONST, cfuged)
-BU_P10_MISC_2 (CNTLZDM, "cntlzdm", CONST, cntlzdm)
-BU_P10_MISC_2 (CNTTZDM, "cnttzdm", CONST, cnttzdm)
-BU_P10_MISC_2 (PDEPD, "pdepd", CONST, pdepd)
-BU_P10_MISC_2 (PEXTD, "pextd", CONST, pextd)
+BU_P10_POWERPC64_MISC_2 (CFUGED, "cfuged", CONST, cfuged)
+BU_P10_POWERPC64_MISC_2 (CNTLZDM, "cntlzdm", CONST, cntlzdm)
+BU_P10_POWERPC64_MISC_2 (CNTTZDM, "cnttzdm", CONST, cnttzdm)
+BU_P10_POWERPC64_MISC_2 (PDEPD, "pdepd", CONST, pdepd)
+BU_P10_POWERPC64_MISC_2 (PEXTD, "pextd", CONST, pextd)
 
 /* Builtins for vector instructions added in ISA 3.1 (power10).  */
 BU_P10V_AV_2 (VCLRLB, "vclrlb", CONST, vclrlb)
 BU_P10V_AV_2 (VCLRRB, "vclrrb", CONST, vclrrb)
 BU_P10V_AV_2 (VCFUGED, "vcfuged", CONST, vcfuged)



Re: [PATCH] rs6000: Fix extraneous characters in the documentation

2020-10-06 Thread will schmidt via Gcc-patches
On Mon, 2020-10-05 at 17:23 -0300, Tulio Magno Quites Machado Filho via 
Gcc-patches wrote:
> Ping?
+cc Segher  :-)

> 
> Tulio Magno Quites Machado Filho via Gcc-patches  
> writes:
> 
> > Replace them with a whitespace in order to avoid artifacts in the HTML
> > document.
> > 
> > 2020-08-19  Tulio Magno Quites Machado Filho  
> > 
> > gcc/
> > * doc/extend.texi (PowerPC Built-in Functions): Replace
> > extraneous characters with whitespace.
> > ---
> >  gcc/doc/extend.texi | 6 +++---
> >  1 file changed, 3 insertions(+), 3 deletions(-)
> > 
> > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> > index bcc251481ca..0c380322280 100644
> > --- a/gcc/doc/extend.texi
> > +++ b/gcc/doc/extend.texi
> > @@ -21538,10 +21538,10 @@ void amo_stdat_smin (int64_t *, int64_t);
> >  ISA 3.1 of the PowerPC added new Matrix-Multiply Assist (MMA) instructions.
> >  GCC provides support for these instructions through the following built-in
> >  functions which are enabled with the @code{-mmma} option.  The vec_t type
> > -below is defined to be a normal vector unsigned char type.  The uint2, 
> > uint4
> > +below is defined to be a normal vector unsigned char type.  The uint2, 
> > uint4

That looks like a non-breaking space.  (ascii c2 a0) so 
2e c2 a0 20 becomes 2e 20 20 


> >  and uint8 parameters are 2-bit, 4-bit and 8-bit unsigned integer constants
> > -respectively.  The compiler will verify that they are constants and that
> > -their values are within range. 
> > +respectively.  The compiler will verify that they are constants and that
> > +their values are within range.

2e c2 a0 20 becomes 2e 20 20

And drops a trailing whitespace.

Those seem reasonable. 
lgtm

Thanks
-Will

> >  
> >  The built-in functions supported are:
> >  
> > -- 
> > 2.25.4
> > 
> 
> 



<    2   3   4   5   6   7   8   9   10   >