I've committed the attached. The patch adjusts the GCN kernel metadata so that it is correct for GFX9 devices.

The existing implementation was correct for GFX8, and seems to work on GFX9, but wasn't technically correct.

--
Andrew Stubbs
CodeSourcery / Mentor Graphics
Use GFX9 granulated sgprs count correctly.

2019-11-22  Andrew Stubbs  <a...@codesourcery.com>

	gcc/
	* config/gcn/gcn.c (gcn_hsa_declare_function_name): Calculate
	granulated_sgprs according to architecture.

diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 4401896d441..b34e8e7f5e2 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -4922,6 +4922,14 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
 	sgpr = MAX_NORMAL_SGPR_COUNT - extra_regs;
     }
 
+  /* GFX8 allocates SGPRs in blocks of 8.
+     GFX9 uses blocks of 16.  */
+  int granulated_sgprs;
+  if (TARGET_GCN3)
+    granulated_sgprs = (sgpr + extra_regs + 7) / 8 - 1;
+  else if (TARGET_GCN5)
+    granulated_sgprs = 2 * ((sgpr + extra_regs + 15) / 16 - 1);
+
   fputs ("\t.align\t256\n", file);
   fputs ("\t.type\t", file);
   assemble_name (file, name);
@@ -4960,7 +4968,7 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
 	   "\t\tcompute_pgm_rsrc2_excp_en = 0\n",
 	   (vgpr - 1) / 4,
 	   /* Must match wavefront_sgpr_count */
-	   (sgpr + extra_regs + 7) / 8 - 1,
+	   granulated_sgprs,
 	   /* The total number of SGPR user data registers requested.  This
 	      number must match the number of user data registers enabled.  */
 	   cfun->machine->args.nsgprs);

Reply via email to