This patch adds two new GCN-specific options: -mxnack and -msram-ecc={on,off,any}.

The primary purpose is to ensure that we have an explicit default setting for these features and that this is passed to the assembler. This will ensure that if LLVM defaults change, again, GCC won't get caught out and stop working with attribute mismatches.

The new options will provide a means to adjust these features in future, but this patch does not actually add any new support for either XNACK or SRAM-ECC.

The XNACK feature has two settings, "on" (-mxnack) and "off" (-mno-xnack). The default is "off", and trying to turn it on will give a "sorry, unimplemented" message. To implement this will require changes to the load/store instruction early-clobber rules (actually, clobbering across multiple contiguous load/store instructions is a problem too), and a new xnack-enabled multilib for each supported ISA.

The SRAM-ECC feature has three settings, "on", "off" and "any" (in which the generated code must work with the device configures to either mode). The current implementation is actually "any" already, but as that attribute setting is not available in the HSACOv3 binary standard we target right now we just set it to "on" or "off" according to which makes sense for the configured ISA. We'll have to revisit this when we implement HSACOv4 compatibility.

Andrew
amdgcn: Add -mxnack and -msram-ecc [PR 100208]

gcc/ChangeLog:

        PR target/100208
        * config/gcn/gcn-hsa.h (DRIVER_SELF_SPECS): New.
        (ASM_SPEC): Set -mattr for xnack and sram-ecc.
        * config/gcn/gcn-opts.h (enum sram_ecc_type): New.
        * config/gcn/gcn-valu.md: Add a warning comment.
        * config/gcn/gcn.c (gcn_option_override): Add "sorry" for -mxnack.
        (output_file_start): Add xnack and sram-ecc state to ".amdgcn_target".
        * config/gcn/gcn.md: Add a warning comment.
        * config/gcn/gcn.opt: Add -mxnack and -msram-ecc.
        * config/gcn/mkoffload.c (EF_AMDGPU_MACH_AMDGCN_GFX908): Remove
        SRAM-ECC flag.
        (EF_AMDGPU_XNACK): New.
        (EF_AMDGPU_SRAM_ECC): New.
        (elf_flags): New.
        (copy_early_debug_info): Use elf_flags.
        (main): Handle -mxnack and -msram-ecc options.
        * doc/invoke.texi: Document -mxnack and -msram-ecc.

gcc/testsuite/ChangeLog:

        PR target/100208
        * gcc.target/gcn/sram-ecc-1.c: New test.
        * gcc.target/gcn/sram-ecc-2.c: New test.
        * gcc.target/gcn/sram-ecc-3.c: New test.
        * gcc.target/gcn/sram-ecc-4.c: New test.
        * gcc.target/gcn/sram-ecc-5.c: New test.
        * gcc.target/gcn/sram-ecc-6.c: New test.
        * gcc.target/gcn/sram-ecc-7.c: New test.
        * gcc.target/gcn/sram-ecc-8.c: New test.

diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h
index 61cdb312c2e..724e9a381ba 100644
--- a/gcc/config/gcn/gcn-hsa.h
+++ b/gcc/config/gcn/gcn-hsa.h
@@ -75,9 +75,15 @@ extern unsigned int gcn_local_sym_hash (const char *name);
    supported for gcn.  */
 #define GOMP_SELF_SPECS ""
 
+#define DRIVER_SELF_SPECS \
+  "%{march=fiji|march=gfx900|march=gfx906:%{!msram-ecc=*:-msram-ecc=off}}"
+
 /* Use LLVM assembler and linker options.  */
 #define ASM_SPEC  "-triple=amdgcn--amdhsa "  \
                  "%:last_arg(%{march=*:-mcpu=%*}) " \
+                 "-mattr=%{mxnack:+xnack;:-xnack} " \
+                 /* FIXME: support "any" when we move to HSACOv4.  */ \
+                 "-mattr=%{!msram-ecc=off:+sram-ecc;:-sram-ecc} " \
                  "-filetype=obj"
 #define LINK_SPEC "--pie --export-dynamic"
 #define LIB_SPEC  "-lc"
diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h
index ed67d015ff8..b25516060e1 100644
--- a/gcc/config/gcn/gcn-opts.h
+++ b/gcc/config/gcn/gcn-opts.h
@@ -34,4 +34,11 @@ extern int gcn_isa;
 #define TARGET_GCN5 (gcn_isa == 5)
 #define TARGET_GCN5_PLUS (gcn_isa >= 5)
 
+enum sram_ecc_type
+{
+  SRAM_ECC_OFF,
+  SRAM_ECC_ON,
+  SRAM_ECC_ANY
+};
+
 #endif
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index beefcf754d7..84ff67508b9 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -703,6 +703,8 @@ (define_expand "vec_init<mode><scalar_mode>"
 ;; - The address space and glc (volatile) fields are there to replace the
 ;;   fields normally found in a MEM.
 ;; - Multiple forms of address expression are supported, below.
+;;
+;; TODO: implement combined gather and zero_extend, but only for -msram-ecc=on
 
 (define_expand "gather_load<mode><vnsi>"
   [(match_operand:V_ALL 0 "register_operand")
diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 6d02a4a02dd..385b90c4b00 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -144,6 +144,10 @@ gcn_option_override (void)
        /* 1MB total.  */
        stack_size_opt = 1048576;
     }
+
+  /* The xnack option is a placeholder, for now.  */
+  if (flag_xnack)
+    sorry ("XNACK support");
 }
 
 /* }}}  */
@@ -5182,11 +5186,16 @@ output_file_start (void)
     case PROCESSOR_FIJI: cpu = "gfx803"; break;
     case PROCESSOR_VEGA10: cpu = "gfx900"; break;
     case PROCESSOR_VEGA20: cpu = "gfx906"; break;
-    case PROCESSOR_GFX908: cpu = "gfx908+sram-ecc"; break;
+    case PROCESSOR_GFX908: cpu = "gfx908"; break;
     default: gcc_unreachable ();
     }
 
-  fprintf(asm_out_file, "\t.amdgcn_target \"amdgcn-unknown-amdhsa--%s\"\n", 
cpu);
+  const char *xnack = (flag_xnack ? "+xnack" : "");
+  /* FIXME: support "any" when we move to HSACOv4.  */
+  const char *sram_ecc = (flag_sram_ecc ? "+sram-ecc" : "");
+
+  fprintf(asm_out_file, "\t.amdgcn_target \"amdgcn-unknown-amdhsa--%s%s%s\"\n",
+         cpu, xnack, sram_ecc);
 }
 
 /* Implement ASM_DECLARE_FUNCTION_NAME via gcn-hsa.h.
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index ae7249aac0e..8ffa43c030e 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -569,6 +569,7 @@ (define_insn "*mov<mode>_insn"
    (set_attr "length" "4,4,8,12,12,12,12,4,8,8,12,12,8,12,12,8,12,12")])
 
 ; 8/16bit move pattern
+; TODO: implement combined load and zero_extend, but *only* for -msram-ecc=on
 
 (define_insn "*mov<mode>_insn"
   [(set (match_operand:QIHI 0 "nonimmediate_operand"
diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt
index 767d45826c2..b2b10b0794c 100644
--- a/gcc/config/gcn/gcn.opt
+++ b/gcc/config/gcn/gcn.opt
@@ -76,3 +76,24 @@ Target RejectNegative Joined UInteger Var(stack_size_opt) 
Init(-1)
 Wopenacc-dims
 Target Var(warn_openacc_dims) Warning
 Warn about invalid OpenACC dimensions.
+
+mxnack
+Target Var(flag_xnack) Init(0)
+Compile for devices requiring XNACK enabled. Default off.
+
+Enum
+Name(sram_ecc_type) Type(enum sram_ecc_type)
+SRAM-ECC modes:
+
+EnumValue
+Enum(sram_ecc_type) String(off) Value(SRAM_ECC_OFF)
+
+EnumValue
+Enum(sram_ecc_type) String(on) Value(SRAM_ECC_ON)
+
+EnumValue
+Enum(sram_ecc_type) String(any) Value(SRAM_ECC_ANY)
+
+msram-ecc=
+Target RejectNegative Joined ToLower Enum(sram_ecc_type) Var(flag_sram_ecc) 
Init(SRAM_ECC_ANY)
+Compile for devices with the SRAM ECC feature enabled, or not. Default \"any\".
diff --git a/gcc/config/gcn/mkoffload.c b/gcc/config/gcn/mkoffload.c
index 1469a688929..804cc26471b 100644
--- a/gcc/config/gcn/mkoffload.c
+++ b/gcc/config/gcn/mkoffload.c
@@ -52,7 +52,10 @@
 #undef  EF_AMDGPU_MACH_AMDGCN_GFX906
 #define EF_AMDGPU_MACH_AMDGCN_GFX906 0x2f
 #undef  EF_AMDGPU_MACH_AMDGCN_GFX908
-#define EF_AMDGPU_MACH_AMDGCN_GFX908 0x230  // Assume SRAM-ECC enabled.
+#define EF_AMDGPU_MACH_AMDGCN_GFX908 0x30
+
+#define EF_AMDGPU_XNACK    0x100
+#define EF_AMDGPU_SRAM_ECC 0x200
 
 #ifndef R_AMDGPU_NONE
 #define R_AMDGPU_NONE          0
@@ -77,6 +80,7 @@ static struct obstack files_to_cleanup;
 
 enum offload_abi offload_abi = OFFLOAD_ABI_UNSET;
 uint32_t elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX803;  // Default GPU architecture.
+uint32_t elf_flags = 0;
 
 /* Delete tempfiles.  */
 
@@ -298,7 +302,7 @@ copy_early_debug_info (const char *infile, const char 
*outfile)
   ehdr.e_ident[8] = ELFABIVERSION_AMDGPU_HSA;
   ehdr.e_type = ET_REL;
   ehdr.e_machine = EM_AMDGPU;
-  ehdr.e_flags = elf_arch;
+  ehdr.e_flags = elf_arch | elf_flags;
 
   /* Load the section headers so we can walk them later.  */
   Elf64_Shdr *sections = (Elf64_Shdr *)xmalloc (sizeof (Elf64_Shdr)
@@ -823,6 +827,7 @@ main (int argc, char **argv)
   bool fopenacc = false;
   bool fPIC = false;
   bool fpic = false;
+  bool sram_seen = false;
   for (int i = 1; i < argc; i++)
     {
 #define STR "-foffload-abi="
@@ -845,6 +850,26 @@ main (int argc, char **argv)
        fPIC = true;
       else if (strcmp (argv[i], "-fpic") == 0)
        fpic = true;
+      else if (strcmp (argv[i], "-mxnack") == 0)
+       elf_flags |= EF_AMDGPU_XNACK;
+      else if (strcmp (argv[i], "-mno-xnack") == 0)
+       elf_flags &= ~EF_AMDGPU_XNACK;
+      else if (strcmp (argv[i], "-msram-ecc=on") == 0)
+       {
+         elf_flags |= EF_AMDGPU_SRAM_ECC;
+         sram_seen = true;
+       }
+      else if (strcmp (argv[i], "-msram-ecc=any") == 0)
+       {
+         /* FIXME: change this when we move to HSACOv4.  */
+         elf_flags |= EF_AMDGPU_SRAM_ECC;
+         sram_seen = true;
+       }
+      else if (strcmp (argv[i], "-msram-ecc=off") == 0)
+       {
+         elf_flags &= ~EF_AMDGPU_SRAM_ECC;
+         sram_seen = true;
+       }
       else if (strcmp (argv[i], "-save-temps") == 0)
        save_temps = true;
       else if (strcmp (argv[i], "-v") == 0)
@@ -865,6 +890,21 @@ main (int argc, char **argv)
   if (!(fopenacc ^ fopenmp))
     fatal_error (input_location, "either -fopenacc or -fopenmp must be set");
 
+  /* The SRAM-ECC feature defaults to "any" on GPUs where the feature is
+     available.  */
+  if (!sram_seen)
+    switch (elf_arch)
+      {
+      case EF_AMDGPU_MACH_AMDGCN_GFX803:
+      case EF_AMDGPU_MACH_AMDGCN_GFX900:
+      case EF_AMDGPU_MACH_AMDGCN_GFX906:
+       break;
+      default:
+       /* FIXME: change this when we move to HSACOv4.  */
+       elf_flags |= EF_AMDGPU_SRAM_ECC;
+       break;
+      }
+
   const char *abi;
   switch (offload_abi)
     {
@@ -892,6 +932,12 @@ main (int argc, char **argv)
   obstack_ptr_grow (&cc_argv_obstack, "-xlto");
   if (fopenmp)
     obstack_ptr_grow (&cc_argv_obstack, "-mgomp");
+  obstack_ptr_grow (&cc_argv_obstack,
+                   (elf_flags & EF_AMDGPU_XNACK
+                    ? "-mxnack" : "-mno-xnack"));
+  obstack_ptr_grow (&cc_argv_obstack,
+                   (elf_flags & EF_AMDGPU_SRAM_ECC
+                    ? "-msram-ecc=on" : "-msram-ecc=off"));
 
   for (int ix = 1; ix != argc; ix++)
     {
@@ -993,6 +1039,14 @@ main (int argc, char **argv)
        }
       obstack_ptr_grow (&ld_argv_obstack, gcn_s2_name);
       obstack_ptr_grow (&ld_argv_obstack, "-lgomp");
+      obstack_ptr_grow (&ld_argv_obstack,
+                       (elf_flags & EF_AMDGPU_XNACK
+                        ? "-mxnack" : "-mno-xnack"));
+      obstack_ptr_grow (&ld_argv_obstack,
+                       (elf_flags & EF_AMDGPU_SRAM_ECC
+                        ? "-msram-ecc=on" : "-msram-ecc=off"));
+      if (verbose)
+       obstack_ptr_grow (&ld_argv_obstack, "-v");
 
       for (int i = 1; i < argc; i++)
        if (startswith (argv[i], "-l")
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index b16176ea560..32697e6117c 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -18847,6 +18847,15 @@ Compile for GCN5 Vega 20 devices (gfx906).
 
 @end table
 
+@item -msram-ecc=on
+@itemx -msram-ecc=off
+@itemx -msram-ecc=any
+@opindex msram-ecc
+Compile binaries suitable for devices with the SRAM-ECC feature enabled,
+disabled, or either mode.  This feature can be enabled per-process on some
+devices.  The compiled code must match the device mode. The default is
+@samp{any}, for devices that support it.
+
 @item -mstack-size=@var{bytes}
 @opindex mstack-size
 Specify how many @var{bytes} of stack space will be requested for each GPU
@@ -18855,6 +18864,14 @@ available.  The size of the stack allocation may also 
have an impact on
 run-time performance.  The default is 32KB when using OpenACC or OpenMP, and
 1MB otherwise.
 
+@item -mxnack
+@opindex mxnack
+Compile binaries suitable for devices with the XNACK feature enabled.  Some
+devices always require XNACK and some allow the user to configure XNACK.  The
+compiled code must match the device mode.  The default is @samp{-mno-xnack}.
+At present this option is a placeholder for support that is not yet
+implemented.
+
 @end table
 
 @node ARC Options
diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-1.c 
b/gcc/testsuite/gcc.target/gcn/sram-ecc-1.c
new file mode 100644
index 00000000000..d46c3027a78
--- /dev/null
+++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-1.c
@@ -0,0 +1,17 @@
+/* Ensure that explicit zero-entend instructions are present when compiling
+   for targets without sram-ecc enabled (in which sub-dword loads do not
+   zero the high bits of the target register).  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -msram-ecc=off" } */
+
+extern unsigned char c;
+
+unsigned int
+f ()
+{
+  return c;
+}
+
+/* { dg-final { scan-assembler "lshl.* 24" } } */
+/* { dg-final { scan-assembler "lshr.* 24" } } */
diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-2.c 
b/gcc/testsuite/gcc.target/gcn/sram-ecc-2.c
new file mode 100644
index 00000000000..351d43ca42c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-2.c
@@ -0,0 +1,17 @@
+/* Ensure that explicit zero-entend instructions are present when compiling
+   for targets without sram-ecc enabled (in which sub-dword loads do not
+   zero the high bits of the target register).  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -msram-ecc=off" } */
+
+extern unsigned short s;
+
+unsigned short
+f ()
+{
+  return s;
+}
+
+/* { dg-final { scan-assembler "lshl.* 16" } } */
+/* { dg-final { scan-assembler "lshr.* 16" } } */
diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-3.c 
b/gcc/testsuite/gcc.target/gcn/sram-ecc-3.c
new file mode 100644
index 00000000000..692d4578b66
--- /dev/null
+++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-3.c
@@ -0,0 +1,21 @@
+/* Ensure that explicit zero-entend instructions are present when compiling
+   for targets without sram-ecc enabled (in which sub-dword loads do not
+   zero the high bits of the target register).  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -dp -msram-ecc=off" } */
+
+typedef unsigned int v64si __attribute__ ((vector_size (64*4)));
+typedef unsigned char v64qi __attribute__ ((vector_size (64*1)));
+
+extern v64si a;
+extern v64qi b;
+
+void
+f ()
+{
+  for (int n = 0; n < 64; n++)
+    a[n] = b[n];
+}
+
+/* { dg-final { scan-assembler "zero_extendv64qiv64si2" } } */
diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-4.c 
b/gcc/testsuite/gcc.target/gcn/sram-ecc-4.c
new file mode 100644
index 00000000000..61b8d552759
--- /dev/null
+++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-4.c
@@ -0,0 +1,21 @@
+/* Ensure that explicit zero-entend instructions are present when compiling
+   for targets without sram-ecc enabled (in which sub-dword loads do not
+   zero the high bits of the target register).  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -dp -msram-ecc=off" } */
+
+typedef unsigned int v64si __attribute__ ((vector_size (64*4)));
+typedef unsigned short v64hi __attribute__ ((vector_size (64*2)));
+
+extern v64si a;
+extern v64hi b;
+
+void
+f ()
+{
+  for (int n = 0; n < 64; n++)
+    a[n] = b[n];
+}
+
+/* { dg-final { scan-assembler "zero_extendv64hiv64si2" } } */
diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-5.c 
b/gcc/testsuite/gcc.target/gcn/sram-ecc-5.c
new file mode 100644
index 00000000000..4f0543b5f89
--- /dev/null
+++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-5.c
@@ -0,0 +1,17 @@
+/* Ensure that explicit zero-entend instructions are present when compiling
+   for targets that may not have sram-ecc enabled (in which sub-dword loads do
+   not zero the high bits of the target register).  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -msram-ecc=any" } */
+
+extern unsigned char c;
+
+unsigned int
+f ()
+{
+  return c;
+}
+
+/* { dg-final { scan-assembler "lshl.* 24" } } */
+/* { dg-final { scan-assembler "lshr.* 24" } } */
diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-6.c 
b/gcc/testsuite/gcc.target/gcn/sram-ecc-6.c
new file mode 100644
index 00000000000..9dfceaf90af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-6.c
@@ -0,0 +1,17 @@
+/* Ensure that explicit zero-entend instructions are present when compiling
+   for targets that may not have sram-ecc enabled (in which sub-dword loads do
+   not zero the high bits of the target register).  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -msram-ecc=any" } */
+
+extern unsigned short s;
+
+unsigned short
+f ()
+{
+  return s;
+}
+
+/* { dg-final { scan-assembler "lshl.* 16" } } */
+/* { dg-final { scan-assembler "lshr.* 16" } } */
diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-7.c 
b/gcc/testsuite/gcc.target/gcn/sram-ecc-7.c
new file mode 100644
index 00000000000..9d0ce6f6b5a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-7.c
@@ -0,0 +1,21 @@
+/* Ensure that explicit zero-entend instructions are present when compiling
+   for targets that may not have sram-ecc enabled (in which sub-dword loads do
+   not zero the high bits of the target register).  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -dp -msram-ecc=any" } */
+
+typedef unsigned int v64si __attribute__ ((vector_size (64*4)));
+typedef unsigned char v64qi __attribute__ ((vector_size (64*1)));
+
+extern v64si a;
+extern v64qi b;
+
+void
+f ()
+{
+  for (int n = 0; n < 64; n++)
+    a[n] = b[n];
+}
+
+/* { dg-final { scan-assembler "zero_extendv64qiv64si2" } } */
diff --git a/gcc/testsuite/gcc.target/gcn/sram-ecc-8.c 
b/gcc/testsuite/gcc.target/gcn/sram-ecc-8.c
new file mode 100644
index 00000000000..76e02882798
--- /dev/null
+++ b/gcc/testsuite/gcc.target/gcn/sram-ecc-8.c
@@ -0,0 +1,21 @@
+/* Ensure that explicit zero-entend instructions are present when compiling
+   for targets that may not have sram-ecc enabled (in which sub-dword loads do
+   not zero the high bits of the target register).  */
+
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -dp -msram-ecc=any" } */
+
+typedef unsigned int v64si __attribute__ ((vector_size (64*4)));
+typedef unsigned short v64hi __attribute__ ((vector_size (64*2)));
+
+extern v64si a;
+extern v64hi b;
+
+void
+f ()
+{
+  for (int n = 0; n < 64; n++)
+    a[n] = b[n];
+}
+
+/* { dg-final { scan-assembler "zero_extendv64hiv64si2" } } */

Reply via email to