yaxunl created this revision. yaxunl added reviewers: arsenm, b-sumner, rjmccall. Herald added subscribers: kerbowa, t-tye, tpr, dstuttard, nhaehnle, wdng, jvesely, kzhuravl.
AMDGPU backend need to know whether IEEE754-2008 NaN compliant instructions need to be emitted for a function, which is conveyed by a function attribute "amdgpu-ieee". "amdgpu-ieee"="false" turns this off. Without this function attribute backend assumes it is on for compute functions. -mamdgpu-ieee and -mno-amdgpu-ieee are added to Clang to control this function attribute. By default it is on. If other options affecting NaN handling is set (e.g. -menable-no-nans, -cl-fast-relaxed-math, etc.), it is on if IEEE NaN compliant handling is required, otherwise it is off. If -mamdgpu-ieee or -mno-amdgpu-ieee is explicitly provided, the explicit value takes precedence. https://reviews.llvm.org/D77013 Files: clang/include/clang/Basic/CodeGenOptions.def clang/include/clang/Driver/Options.td clang/lib/CodeGen/TargetInfo.cpp clang/lib/Frontend/CompilerInvocation.cpp clang/test/CodeGenOpenCL/amdgpu-ieee.cl Index: clang/test/CodeGenOpenCL/amdgpu-ieee.cl =================================================================== --- /dev/null +++ clang/test/CodeGenOpenCL/amdgpu-ieee.cl @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=COM,ON %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -mamdgpu-ieee | FileCheck -check-prefixes=COM,ON %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -mno-amdgpu-ieee | FileCheck -check-prefixes=COM,OFF %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -menable-no-nans | FileCheck -check-prefixes=COM,OFF %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -menable-no-nans -mamdgpu-ieee | FileCheck -check-prefixes=COM,ON %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -cl-fast-relaxed-math | FileCheck -check-prefixes=COM,OFF %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -cl-fast-relaxed-math -mamdgpu-ieee \ +// RUN: | FileCheck -check-prefixes=COM,ON %s + +kernel void kern() { +// COM: define amdgpu_kernel void @kern() [[ATTRS:#[0-9]+]] +} + +// ON-NOT: attributes [[ATTRS]] = {{.*}} "amdgpu-ieee" +// OFF: attributes [[ATTRS]] = {{.*}} "amdgpu-ieee"="false" Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -1430,6 +1430,11 @@ std::string(Args.getLastArgValue(OPT_fsymbol_partition_EQ)); Opts.ForceAAPCSBitfieldLoad = Args.hasArg(OPT_ForceAAPCSBitfieldLoad); + + Opts.EmitIEEENaNCompliantInsts = + Args.hasFlag(options::OPT_mamdgpu_ieee, options::OPT_mno_amdgpu_ieee, + !Opts.NoNaNsFPMath); + return Success; } Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -8512,6 +8512,9 @@ if (NumVGPR != 0) F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); } + + if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts) + F->addFnAttr("amdgpu-ieee", "false"); } unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const { Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2402,6 +2402,11 @@ HelpText<"Generate additional code for specified <version> of debugger ABI (AMDGPU only)">, MetaVarName<"<version>">; +def mamdgpu_ieee : Flag<["-"], "mamdgpu-ieee">, Flags<[CC1Option]>, + Group<m_Group>, HelpText<"Enable IEEE754-2008 NaN compliance in supported AMDGPU instructions">; +def mno_amdgpu_ieee : Flag<["-"], "mno-amdgpu-ieee">, Flags<[CC1Option]>, + Group<m_Group>; + def mcode_object_v3 : Flag<["-"], "mcode-object-v3">, Group<m_amdgpu_Features_Group>, HelpText<"Enable code object v3 (AMDGPU only)">; def mno_code_object_v3 : Flag<["-"], "mno-code-object-v3">, Group<m_amdgpu_Features_Group>, Index: clang/include/clang/Basic/CodeGenOptions.def =================================================================== --- clang/include/clang/Basic/CodeGenOptions.def +++ clang/include/clang/Basic/CodeGenOptions.def @@ -396,6 +396,9 @@ /// Whether to not follow the AAPCS that enforce at least one read before storing to a volatile bitfield CODEGENOPT(ForceAAPCSBitfieldLoad, 1, 0) +/// Whether to emit IEEE754-2008 NaN compliant instructions if available (AMDGPU Only) +CODEGENOPT(EmitIEEENaNCompliantInsts, 1, 1) + #undef CODEGENOPT #undef ENUM_CODEGENOPT #undef VALUE_CODEGENOPT
Index: clang/test/CodeGenOpenCL/amdgpu-ieee.cl =================================================================== --- /dev/null +++ clang/test/CodeGenOpenCL/amdgpu-ieee.cl @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=COM,ON %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -mamdgpu-ieee | FileCheck -check-prefixes=COM,ON %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -mno-amdgpu-ieee | FileCheck -check-prefixes=COM,OFF %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -menable-no-nans | FileCheck -check-prefixes=COM,OFF %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -menable-no-nans -mamdgpu-ieee | FileCheck -check-prefixes=COM,ON %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -cl-fast-relaxed-math | FileCheck -check-prefixes=COM,OFF %s +// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -O0 -emit-llvm -o - %s \ +// RUN: -cl-fast-relaxed-math -mamdgpu-ieee \ +// RUN: | FileCheck -check-prefixes=COM,ON %s + +kernel void kern() { +// COM: define amdgpu_kernel void @kern() [[ATTRS:#[0-9]+]] +} + +// ON-NOT: attributes [[ATTRS]] = {{.*}} "amdgpu-ieee" +// OFF: attributes [[ATTRS]] = {{.*}} "amdgpu-ieee"="false" Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -1430,6 +1430,11 @@ std::string(Args.getLastArgValue(OPT_fsymbol_partition_EQ)); Opts.ForceAAPCSBitfieldLoad = Args.hasArg(OPT_ForceAAPCSBitfieldLoad); + + Opts.EmitIEEENaNCompliantInsts = + Args.hasFlag(options::OPT_mamdgpu_ieee, options::OPT_mno_amdgpu_ieee, + !Opts.NoNaNsFPMath); + return Success; } Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -8512,6 +8512,9 @@ if (NumVGPR != 0) F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR)); } + + if (!getABIInfo().getCodeGenOpts().EmitIEEENaNCompliantInsts) + F->addFnAttr("amdgpu-ieee", "false"); } unsigned AMDGPUTargetCodeGenInfo::getOpenCLKernelCallingConv() const { Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2402,6 +2402,11 @@ HelpText<"Generate additional code for specified <version> of debugger ABI (AMDGPU only)">, MetaVarName<"<version>">; +def mamdgpu_ieee : Flag<["-"], "mamdgpu-ieee">, Flags<[CC1Option]>, + Group<m_Group>, HelpText<"Enable IEEE754-2008 NaN compliance in supported AMDGPU instructions">; +def mno_amdgpu_ieee : Flag<["-"], "mno-amdgpu-ieee">, Flags<[CC1Option]>, + Group<m_Group>; + def mcode_object_v3 : Flag<["-"], "mcode-object-v3">, Group<m_amdgpu_Features_Group>, HelpText<"Enable code object v3 (AMDGPU only)">; def mno_code_object_v3 : Flag<["-"], "mno-code-object-v3">, Group<m_amdgpu_Features_Group>, Index: clang/include/clang/Basic/CodeGenOptions.def =================================================================== --- clang/include/clang/Basic/CodeGenOptions.def +++ clang/include/clang/Basic/CodeGenOptions.def @@ -396,6 +396,9 @@ /// Whether to not follow the AAPCS that enforce at least one read before storing to a volatile bitfield CODEGENOPT(ForceAAPCSBitfieldLoad, 1, 0) +/// Whether to emit IEEE754-2008 NaN compliant instructions if available (AMDGPU Only) +CODEGENOPT(EmitIEEENaNCompliantInsts, 1, 1) + #undef CODEGENOPT #undef ENUM_CODEGENOPT #undef VALUE_CODEGENOPT
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits