================
@@ -537,59 +537,46 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const 
Function &F,
                                                    raw_ostream &O) const {
   // If the NVVM IR has some of reqntid* specified, then output
   // the reqntid directive, and set the unspecified ones to 1.
-  // If none of reqntid* is specified, don't output reqntid directive.
-  unsigned reqntidx, reqntidy, reqntidz;
-  bool specified = false;
-  if (!getReqNTIDx(F, reqntidx))
-    reqntidx = 1;
-  else
-    specified = true;
-  if (!getReqNTIDy(F, reqntidy))
-    reqntidy = 1;
-  else
-    specified = true;
-  if (!getReqNTIDz(F, reqntidz))
-    reqntidz = 1;
-  else
-    specified = true;
-
-  if (specified)
-    O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
+  // If none of Reqntid* is specified, don't output reqntid directive.
+  unsigned Reqntidx, Reqntidy, Reqntidz;
+  Reqntidx = Reqntidy = Reqntidz = 1;
+  bool ReqSpecified = false;
+  ReqSpecified |= getReqNTIDx(F, Reqntidx);
+  ReqSpecified |= getReqNTIDy(F, Reqntidy);
+  ReqSpecified |= getReqNTIDz(F, Reqntidz);
+
+  if (ReqSpecified)
+    O << ".reqntid " << Reqntidx << ", " << Reqntidy << ", " << Reqntidz
       << "\n";
 
   // If the NVVM IR has some of maxntid* specified, then output
   // the maxntid directive, and set the unspecified ones to 1.
   // If none of maxntid* is specified, don't output maxntid directive.
-  unsigned maxntidx, maxntidy, maxntidz;
-  specified = false;
-  if (!getMaxNTIDx(F, maxntidx))
-    maxntidx = 1;
-  else
-    specified = true;
-  if (!getMaxNTIDy(F, maxntidy))
-    maxntidy = 1;
-  else
-    specified = true;
-  if (!getMaxNTIDz(F, maxntidz))
-    maxntidz = 1;
-  else
-    specified = true;
-
-  if (specified)
-    O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
+  unsigned Maxntidx, Maxntidy, Maxntidz;
+  Maxntidx = Maxntidy = Maxntidz = 1;
+  bool MaxSpecified = false;
+  MaxSpecified |= getMaxNTIDx(F, Maxntidx);
+  MaxSpecified |= getMaxNTIDy(F, Maxntidy);
+  MaxSpecified |= getMaxNTIDz(F, Maxntidz);
+
+  if (MaxSpecified)
+    O << ".maxntid " << Maxntidx << ", " << Maxntidy << ", " << Maxntidz
       << "\n";
 
-  unsigned mincta;
-  if (getMinCTASm(F, mincta))
-    O << ".minnctapersm " << mincta << "\n";
+  unsigned Mincta = 0;
+  if (getMinCTASm(F, Mincta))
+    O << ".minnctapersm " << Mincta << "\n";
 
-  unsigned maxnreg;
-  if (getMaxNReg(F, maxnreg))
-    O << ".maxnreg " << maxnreg << "\n";
+  unsigned Maxnreg = 0;
+  if (getMaxNReg(F, Maxnreg))
+    O << ".maxnreg " << Maxnreg << "\n";
+
+  unsigned Maxclusterrank = 0;
----------------
Artem-B wrote:

Do we want to ignore this directive if the metadata exists, but we're targeting 
a pre-sm_90 GPU? 

It may be useful for non-clang LLVM users (e.g XLA) to be able to always 
specify launch bounds metadata, and let LLVM decide on what it can do with it. 
Generating the directive for older GPUs would result in ptxas error, while 
ignoring it would still allow the kernels to compile and work, the same as 
would be the case if the metadata was correctly absent. I don't think there's 
not much point to require users to jump through more hoops just to achieve 
exactly the same result.


https://github.com/llvm/llvm-project/pull/66496
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to