From: Pan Xiuli <xiuli....@intel.com> If we get intel_reqd_sub_group_size attribute from frontend then set it to backend. V2: Refine the codeGenNum with runtime caclculate and fail the build if the size from frontend is illegal.
Signed-off-by: Pan Xiuli <xiuli....@intel.com> --- backend/src/backend/context.cpp | 6 +----- backend/src/backend/gen_program.cpp | 28 ++++++++++++++++++++-------- backend/src/llvm/llvm_gen_backend.cpp | 24 ++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 13 deletions(-) diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp index e9ddd17..c9500c8 100644 --- a/backend/src/backend/context.cpp +++ b/backend/src/backend/context.cpp @@ -340,7 +340,6 @@ namespace gbe /////////////////////////////////////////////////////////////////////////// // Generic Context (shared by the simulator and the HW context) /////////////////////////////////////////////////////////////////////////// - IVAR(OCL_SIMD_WIDTH, 8, 15, 16); Context::Context(const ir::Unit &unit, const std::string &name) : unit(unit), fn(*unit.getFunction(name)), name(name), liveness(NULL), dag(NULL), useDWLabel(false) @@ -361,10 +360,7 @@ namespace gbe } void Context::startNewCG(uint32_t simdWidth) { - if (simdWidth == 0 || OCL_SIMD_WIDTH != 15) - this->simdWidth = nextHighestPowerOf2(OCL_SIMD_WIDTH); - else - this->simdWidth = simdWidth; + this->simdWidth = simdWidth; GBE_SAFE_DELETE(this->registerAllocator); GBE_SAFE_DELETE(this->scratchAllocator); GBE_ASSERT(dag != NULL && liveness != NULL); diff --git a/backend/src/backend/gen_program.cpp b/backend/src/backend/gen_program.cpp index 383f2f2..cfb23fe 100644 --- a/backend/src/backend/gen_program.cpp +++ b/backend/src/backend/gen_program.cpp @@ -59,6 +59,7 @@ #include <clang/CodeGen/CodeGenAction.h> #endif +#include "sys/cvar.hpp" #include <cstring> #include <sstream> #include <memory> @@ -138,17 +139,24 @@ namespace gbe { } /*! We must avoid spilling at all cost with Gen */ - static const struct CodeGenStrategy { + struct CodeGenStrategy { uint32_t simdWidth; uint32_t reservedSpillRegs; bool limitRegisterPressure; - } codeGenStrategy[] = { + }; + static const struct CodeGenStrategy codeGenStrategyDefault[] = { {16, 0, false}, {8, 0, false}, {8, 8, false}, {8, 16, false}, }; + static const struct CodeGenStrategy codeGenStrategySimd16[] = { + {16, 0, false}, + {16, 8, false}, + {16, 16, false}, + }; + IVAR(OCL_SIMD_WIDTH, 8, 15, 16); Kernel *GenProgram::compileKernel(const ir::Unit &unit, const std::string &name, bool relaxMath, int profiling) { #ifdef GBE_COMPILER_AVAILABLE @@ -156,19 +164,23 @@ namespace gbe { // when the function already provides the simd width we need to use (i.e. // non zero) const ir::Function *fn = unit.getFunction(name); + const struct CodeGenStrategy* codeGenStrategy = codeGenStrategyDefault; if(fn == NULL) GBE_ASSERT(0); - uint32_t codeGenNum = sizeof(codeGenStrategy) / sizeof(codeGenStrategy[0]); + uint32_t codeGenNum = sizeof(codeGenStrategyDefault) / sizeof(codeGenStrategyDefault[0]); uint32_t codeGen = 0; GenContext *ctx = NULL; - if (fn->getSimdWidth() == 8) { + if ( fn->getSimdWidth() != 0 && OCL_SIMD_WIDTH != 15) { + GBE_ASSERTM(0, "unsupported SIMD width!"); + }else if (fn->getSimdWidth() == 8 || OCL_SIMD_WIDTH == 8) { codeGen = 1; - } else if (fn->getSimdWidth() == 16) { - codeGenNum = 1; - } else if (fn->getSimdWidth() == 0) { + } else if (fn->getSimdWidth() == 16 || OCL_SIMD_WIDTH == 16){ + codeGenStrategy = codeGenStrategySimd16; + codeGenNum = sizeof(codeGenStrategySimd16) / sizeof(codeGenStrategySimd16[0]); + } else if (fn->getSimdWidth() == 0 && OCL_SIMD_WIDTH == 15) { codeGen = 0; } else - GBE_ASSERT(0); + GBE_ASSERTM(0, "unsupported SIMD width!"); Kernel *kernel = NULL; // Stop when compilation is successful diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp index 31b8bf2..96c81b9 100644 --- a/backend/src/llvm/llvm_gen_backend.cpp +++ b/backend/src/llvm/llvm_gen_backend.cpp @@ -2124,6 +2124,7 @@ namespace gbe // Loop over the kernel metadatas to set the required work group size. size_t reqd_wg_sz[3] = {0, 0, 0}; size_t hint_wg_sz[3] = {0, 0, 0}; + size_t reqd_sg_sz = 0; ir::FunctionArgument::InfoFromLLVM llvmInfo; MDNode *addrSpaceNode = NULL; MDNode *typeNameNode = NULL; @@ -2219,6 +2220,27 @@ namespace gbe functionAttributes += buffer; functionAttributes += " "; } + if ((attrNode = F.getMetadata("intel_reqd_sub_group_size"))) { + GBE_ASSERT(attrNode->getNumOperands() == 1); + ConstantInt *sz = mdconst::extract<ConstantInt>(attrNode->getOperand(0)); + GBE_ASSERT(sz); + reqd_sg_sz = sz->getZExtValue(); + if(!(reqd_sg_sz == 8 || reqd_sg_sz == 16)){ + F.getContext().emitError("Required sub group size is illegal!"); + ctx.getUnit().setValid(false); + return; + } + functionAttributes += "intel_reqd_sub_group_size"; + std::stringstream param; + char buffer[100] = {0}; + param << "("; + param << reqd_sg_sz; + param << ")"; + param >> buffer; + functionAttributes += buffer; + functionAttributes += " "; + } + #else /* First find the meta data belong to this function. */ MDNode *node = getKernelFunctionMetadata(&F); @@ -2344,6 +2366,8 @@ namespace gbe #endif /* LLVM 3.9 Function metadata */ ctx.getFunction().setCompileWorkGroupSize(reqd_wg_sz[0], reqd_wg_sz[1], reqd_wg_sz[2]); + if (reqd_sg_sz) + ctx.setSimdWidth(reqd_sg_sz); ctx.getFunction().setFunctionAttributes(functionAttributes); // Loop over the arguments and output registers for them -- 2.7.4 _______________________________________________ Beignet mailing list Beignet@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/beignet