arsenm created this revision.
arsenm added reviewers: yaxunl, gregrodgers.
Herald added subscribers: kerbowa, tpr, nhaehnle, wdng, jvesely.

Apparently HIPToolChain does not subclass from AMDGPUToolChain, so
this was not applying the new denormal attributes. I'm not sure why
this doesn't subclass. Just copy the implementation for now.


https://reviews.llvm.org/D76862

Files:
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/lib/Driver/ToolChains/AMDGPU.h
  clang/lib/Driver/ToolChains/HIP.cpp
  clang/lib/Driver/ToolChains/HIP.h
  clang/test/Driver/cuda-flush-denormals-to-zero.cu

Index: clang/test/Driver/cuda-flush-denormals-to-zero.cu
===================================================================
--- clang/test/Driver/cuda-flush-denormals-to-zero.cu
+++ clang/test/Driver/cuda-flush-denormals-to-zero.cu
@@ -7,6 +7,16 @@
 // RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fcuda-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=FTZ %s
 // RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=sm_70 -fno-cuda-flush-denormals-to-zero -nocudainc -nocudalib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
 
+// Test explicit argument.
+// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fcuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -fno-cuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -fcuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -fno-cuda-flush-denormals-to-zero -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+
+// Test the default changing with no argument based on the subtarget.
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
+
 // CPUFTZ-NOT: -fdenormal-fp-math
 
 // FTZ-NOT: -fdenormal-fp-math-f32=
Index: clang/lib/Driver/ToolChains/HIP.h
===================================================================
--- clang/lib/Driver/ToolChains/HIP.h
+++ clang/lib/Driver/ToolChains/HIP.h
@@ -115,6 +115,11 @@
 
   unsigned GetDefaultDwarfVersion() const override { return 4; }
 
+  llvm::DenormalMode getDefaultDenormalModeForType(
+    const llvm::opt::ArgList &DriverArgs,
+    Action::OffloadKind DeviceOffloadKind,
+    const llvm::fltSemantics *FPType = nullptr) const override;
+
   const ToolChain &HostTC;
 
 protected:
Index: clang/lib/Driver/ToolChains/HIP.cpp
===================================================================
--- clang/lib/Driver/ToolChains/HIP.cpp
+++ clang/lib/Driver/ToolChains/HIP.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "HIP.h"
+#include "AMDGPU.h"
 #include "CommonArgs.h"
 #include "InputInfo.h"
 #include "clang/Basic/Cuda.h"
@@ -16,6 +17,7 @@
 #include "clang/Driver/Options.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/TargetParser.h"
 
 using namespace clang::driver;
 using namespace clang::driver::toolchains;
@@ -272,6 +274,34 @@
   getProgramPaths().push_back(getDriver().Dir);
 }
 
+// FIXME: Duplicated in AMDGPUToolChain
+llvm::DenormalMode HIPToolChain::getDefaultDenormalModeForType(
+    const llvm::opt::ArgList &DriverArgs, Action::OffloadKind DeviceOffloadKind,
+    const llvm::fltSemantics *FPType) const {
+  // Denormals should always be enabled for f16 and f64.
+  if (!FPType || FPType != &llvm::APFloat::IEEEsingle())
+    return llvm::DenormalMode::getIEEE();
+
+  if (DeviceOffloadKind == Action::OFK_Cuda) {
+    if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
+        DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
+                           options::OPT_fno_cuda_flush_denormals_to_zero,
+                           false))
+      return llvm::DenormalMode::getPreserveSign();
+  }
+
+  const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
+  auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
+
+  // TODO: There are way too many flags that change this. Do we need to check
+  // them all?
+  bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
+    !AMDGPUToolChain::getDefaultDenormsAreZeroForTarget(Kind);
+  // Outputs are flushed to zero, preserving sign
+  return DAZ ? llvm::DenormalMode::getPreserveSign() :
+               llvm::DenormalMode::getIEEE();
+}
+
 void HIPToolChain::addClangTargetOptions(
     const llvm::opt::ArgList &DriverArgs,
     llvm::opt::ArgStringList &CC1Args,
Index: clang/lib/Driver/ToolChains/AMDGPU.h
===================================================================
--- clang/lib/Driver/ToolChains/AMDGPU.h
+++ clang/lib/Driver/ToolChains/AMDGPU.h
@@ -13,6 +13,8 @@
 #include "clang/Driver/Options.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
+#include "llvm/Support/TargetParser.h"
+
 #include <map>
 
 namespace clang {
@@ -67,6 +69,10 @@
                              llvm::opt::ArgStringList &CC1Args,
                              Action::OffloadKind DeviceOffloadKind) const override;
 
+  /// Return whether denormals should be flushed, and treated as 0 by default
+  /// for the subtarget.
+  static bool getDefaultDenormsAreZeroForTarget(llvm::AMDGPU::GPUKind GPUKind);
+
   llvm::DenormalMode getDefaultDenormalModeForType(
       const llvm::opt::ArgList &DriverArgs,
       Action::OffloadKind DeviceOffloadKind,
Index: clang/lib/Driver/ToolChains/AMDGPU.cpp
===================================================================
--- clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -103,6 +103,19 @@
   return DAL;
 }
 
+bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget(
+    llvm::AMDGPU::GPUKind Kind) {
+  const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
+
+  // Default to enabling f32 denormals by default on subtargets where fma is
+  // fast with denormals
+  const bool DefaultDenormsAreZeroForTarget =
+      (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
+      (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32);
+
+  return DefaultDenormsAreZeroForTarget;
+}
+
 llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
     const llvm::opt::ArgList &DriverArgs, Action::OffloadKind DeviceOffloadKind,
     const llvm::fltSemantics *FPType) const {
@@ -121,18 +134,10 @@
   const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
   auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
 
-  // Default to enabling f32 denormals by default on subtargets where fma is
-  // fast with denormals
-
-  const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
-  const bool DefaultDenormsAreZeroForTarget =
-    (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
-    (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32);
-
   // TODO: There are way too many flags that change this. Do we need to check
   // them all?
   bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
-             !DefaultDenormsAreZeroForTarget;
+             !getDefaultDenormsAreZeroForTarget(Kind);
   // Outputs are flushed to zero, preserving sign
   return DAZ ? llvm::DenormalMode::getPreserveSign() :
                llvm::DenormalMode::getIEEE();
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to