v3: fix compiler warnings v4: use loadFrom helper v5: fix signed min/max v6: set tex mask add support for indirect image access set cache mode
Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 395 +++++++++++++++++++-- 1 file changed, 375 insertions(+), 20 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp index e15f1734cc3..5c3fde32601 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp @@ -91,6 +91,8 @@ private: LValues& convert(nir_register *); LValues& convert(nir_ssa_def *); + ImgFormat convertGLImgFormat(GLuint); + Value* getSrc(nir_alu_src *, uint8_t component = 0); Value* getSrc(nir_register *, uint8_t); Value* getSrc(nir_src *, uint8_t, bool indirect = false); @@ -120,6 +122,7 @@ private: DataType getDType(nir_alu_instr*); DataType getDType(nir_intrinsic_instr*); + DataType getDType(nir_intrinsic_instr*, bool isSigned); DataType getDType(nir_op, NirSSADefBitSize); std::vector<DataType> getSTypes(nir_alu_instr*); @@ -153,6 +156,11 @@ private: /* tex stuff */ Value* applyProjection(Value *src, Value *proj); + unsigned int getNIRArgCount(TexInstruction::Target&); + + /* image stuff */ + uint16_t derefImageVar(nir_deref_var *, Value **indirect); + CacheMode getCacheModeFromVar(nir_variable *); nir_shader *nir; @@ -244,11 +252,30 @@ Converter::getDType(nir_alu_instr *insn) DataType Converter::getDType(nir_intrinsic_instr *insn) +{ + bool isSigned; + switch (insn->intrinsic) { + case nir_intrinsic_shared_atomic_imax: + case nir_intrinsic_shared_atomic_imin: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_imin: + isSigned = true; + break; + default: + isSigned = false; + break; + } + + return getDType(insn, isSigned); +} + +DataType +Converter::getDType(nir_intrinsic_instr *insn, bool isSigned) { if (insn->dest.is_ssa) - return typeOfSize(insn->dest.ssa.bit_size / 8, false, false); + return typeOfSize(insn->dest.ssa.bit_size / 8, false, isSigned); else - return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, false); + return typeOfSize(insn->dest.reg.reg->bit_size / 8, false, isSigned); } DataType @@ -445,28 +472,31 @@ Converter::getSubOp(nir_op op) } } +#define CASE_OP_INTR_ATOM(nir, nvir) \ + case nir_intrinsic_image_atomic_ ## nir : \ + case nir_intrinsic_shared_atomic_ ## nir : \ + case nir_intrinsic_ssbo_atomic_ ## nir : \ + return NV50_IR_SUBOP_ATOM_ ## nvir +#define CASE_OP_INTR_ATOM_S(nir, nvir) \ + case nir_intrinsic_shared_atomic_ ## nir : \ + case nir_intrinsic_ssbo_atomic_ ## nir : \ + return NV50_IR_SUBOP_ATOM_ ## nvir int Converter::getSubOp(nir_intrinsic_op op) { switch (op) { - case nir_intrinsic_ssbo_atomic_add: - return NV50_IR_SUBOP_ATOM_ADD; - case nir_intrinsic_ssbo_atomic_and: - return NV50_IR_SUBOP_ATOM_AND; - case nir_intrinsic_ssbo_atomic_comp_swap: - return NV50_IR_SUBOP_ATOM_CAS; - case nir_intrinsic_ssbo_atomic_exchange: - return NV50_IR_SUBOP_ATOM_EXCH; - case nir_intrinsic_ssbo_atomic_or: - return NV50_IR_SUBOP_ATOM_OR; - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_ssbo_atomic_umax: - return NV50_IR_SUBOP_ATOM_MAX; - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_ssbo_atomic_umin: - return NV50_IR_SUBOP_ATOM_MIN; - case nir_intrinsic_ssbo_atomic_xor: - return NV50_IR_SUBOP_ATOM_XOR; + CASE_OP_INTR_ATOM(add, ADD); + CASE_OP_INTR_ATOM(and, AND); + CASE_OP_INTR_ATOM(comp_swap, CAS); + CASE_OP_INTR_ATOM(exchange, EXCH); + CASE_OP_INTR_ATOM(or, OR); + case nir_intrinsic_image_atomic_max: + CASE_OP_INTR_ATOM_S(imax, MAX); + CASE_OP_INTR_ATOM_S(umax, MAX); + case nir_intrinsic_image_atomic_min: + CASE_OP_INTR_ATOM_S(imin, MIN); + CASE_OP_INTR_ATOM_S(umin, MIN); + CASE_OP_INTR_ATOM(xor, XOR); case nir_intrinsic_vote_all: return NV50_IR_SUBOP_VOTE_ALL; case nir_intrinsic_vote_any: @@ -479,6 +509,8 @@ Converter::getSubOp(nir_intrinsic_op op) return 0; } } +#undef CASE_OP_INTR_ATOM +#undef CASE_OP_INTR_ATOM_S CondCode Converter::getCondCode(nir_op op) @@ -1628,6 +1660,68 @@ Converter::convert(nir_intrinsic_op intr) } } +ImgFormat +Converter::convertGLImgFormat(GLuint format) +{ +#define FMT_CASE(a, b) \ + case GL_ ## a: return nv50_ir::FMT_ ## b + + switch (format) { + FMT_CASE(NONE, NONE); + + FMT_CASE(RGBA32F, RGBA32F); + FMT_CASE(RGBA16F, RGBA16F); + FMT_CASE(RG32F, RG32F); + FMT_CASE(RG16F, RG16F); + FMT_CASE(R11F_G11F_B10F, R11G11B10F); + FMT_CASE(R32F, R32F); + FMT_CASE(R16F, R16F); + + FMT_CASE(RGBA32UI, RGBA32UI); + FMT_CASE(RGBA16UI, RGBA16UI); + FMT_CASE(RGB10_A2UI, RGB10A2UI); + FMT_CASE(RGBA8UI, RGBA8UI); + FMT_CASE(RG32UI, RG32UI); + FMT_CASE(RG16UI, RG16UI); + FMT_CASE(RG8UI, RG8UI); + FMT_CASE(R32UI, R32UI); + FMT_CASE(R16UI, R16UI); + FMT_CASE(R8UI, R8UI); + + FMT_CASE(RGBA32I, RGBA32I); + FMT_CASE(RGBA16I, RGBA16I); + FMT_CASE(RGBA8I, RGBA8I); + FMT_CASE(RG32I, RG32I); + FMT_CASE(RG16I, RG16I); + FMT_CASE(RG8I, RG8I); + FMT_CASE(R32I, R32I); + FMT_CASE(R16I, R16I); + FMT_CASE(R8I, R8I); + + FMT_CASE(RGBA16, RGBA16); + FMT_CASE(RGB10_A2, RGB10A2); + FMT_CASE(RGBA8, RGBA8); + FMT_CASE(RG16, RG16); + FMT_CASE(RG8, RG8); + FMT_CASE(R16, R16); + FMT_CASE(R8, R8); + + FMT_CASE(RGBA16_SNORM, RGBA16_SNORM); + FMT_CASE(RGBA8_SNORM, RGBA8_SNORM); + FMT_CASE(RG16_SNORM, RG16_SNORM); + FMT_CASE(RG8_SNORM, RG8_SNORM); + FMT_CASE(R16_SNORM, R16_SNORM); + FMT_CASE(R8_SNORM, R8_SNORM); + + FMT_CASE(BGRA_INTEGER, BGRA8); + default: + ERROR("unknown format %x\n", format); + assert(false); + return nv50_ir::FMT_NONE; + } +#undef FMT_CASE +} + bool Converter::visit(nir_intrinsic_instr *insn) { @@ -1966,6 +2060,28 @@ Converter::visit(nir_intrinsic_instr *insn) info->io.globalAccess |= 0x1; break; } + case nir_intrinsic_shared_atomic_add: + case nir_intrinsic_shared_atomic_and: + case nir_intrinsic_shared_atomic_comp_swap: + case nir_intrinsic_shared_atomic_exchange: + case nir_intrinsic_shared_atomic_or: + case nir_intrinsic_shared_atomic_imax: + case nir_intrinsic_shared_atomic_imin: + case nir_intrinsic_shared_atomic_umax: + case nir_intrinsic_shared_atomic_umin: + case nir_intrinsic_shared_atomic_xor: { + const DataType dType = getDType(insn); + LValues &newDefs = convert(&insn->dest); + Value *indirectOffset; + uint32_t offset = getIndirect(&insn->src[0], 0, &indirectOffset); + Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, dType, offset); + Instruction *atom = mkOp2(OP_ATOM, dType, newDefs[0], sym, getSrc(&insn->src[1], 0)); + if (op == nir_intrinsic_shared_atomic_comp_swap) + atom->setSrc(2, getSrc(&insn->src[2], 0)); + atom->setIndirect(0, 0, indirectOffset); + atom->subOp = getSubOp(op); + break; + } case nir_intrinsic_ssbo_atomic_add: case nir_intrinsic_ssbo_atomic_and: case nir_intrinsic_ssbo_atomic_comp_swap: @@ -1995,6 +2111,182 @@ Converter::visit(nir_intrinsic_instr *insn) info->io.globalAccess |= 0x2; break; } + case nir_intrinsic_image_store: { + nir_variable *tex = insn->variables[0]->var; + const glsl_type *type = tex->type->without_array(); + TexInstruction::Target target = + convert((glsl_sampler_dim)type->sampler_dimensionality, + type->sampler_array, type->sampler_shadow); + std::vector<Value*> srcs, dummy; + unsigned int argCount = getNIRArgCount(target); + auto location = tex->data.driver_location; + Value *indirect = nullptr; + + srcs.resize(argCount + 4); + for (auto i = 0u; i < argCount; ++i) + srcs[i] = getSrc(&insn->src[0], i); + for (auto i = 0u; i < 4; ++i) + srcs[argCount + i] = getSrc(&insn->src[2], i); + + if (location == 0xffffffff) + location = derefImageVar(insn->variables[0], &indirect); + + TexInstruction *texi = mkTex(OP_SUSTP, target.getEnum(), location, 0, dummy, srcs); + texi->tex.format = + &nv50_ir::TexInstruction::formatTable[convertGLImgFormat(tex->data.image.format)]; + texi->tex.mask = 0xf; + texi->cache = getCacheModeFromVar(tex); + + if (indirect) + texi->setIndirectR(indirect); + + info->io.globalAccess |= 0x2; + break; + } + case nir_intrinsic_image_load: { + nir_variable *tex = insn->variables[0]->var; + const glsl_type *type = tex->type->without_array(); + TexInstruction::Target target = + convert((glsl_sampler_dim)type->sampler_dimensionality, + type->sampler_array, type->sampler_shadow); + std::vector<Value*> srcs, defs; + uint32_t mask = 0; + LValues &newDefs = convert(&insn->dest); + unsigned int argCount = getNIRArgCount(target); + auto location = tex->data.driver_location; + Value *indirect = nullptr; + + defs.resize(newDefs.size()); + for (auto i = 0u; i < newDefs.size(); ++i) { + defs[i] = newDefs[i]; + mask |= 1 << i; + } + + srcs.resize(argCount); + for (auto i = 0u; i < argCount; ++i) + srcs[i] = getSrc(&insn->src[0], i); + + if (location == 0xffffffff) + location = derefImageVar(insn->variables[0], &indirect); + + TexInstruction *texi = mkTex(OP_SULDP, target.getEnum(), location, 0, defs, srcs); + texi->tex.format = + &nv50_ir::TexInstruction::formatTable[convertGLImgFormat(tex->data.image.format)]; + texi->tex.mask = mask; + texi->cache = getCacheModeFromVar(tex); + + if (indirect) + texi->setIndirectR(indirect); + + info->io.globalAccess |= 0x1; + break; + } + case nir_intrinsic_image_samples: + case nir_intrinsic_image_size: { + nir_variable *tex = insn->variables[0]->var; + const glsl_type *type = tex->type->without_array(); + TexInstruction::Target target = + convert((glsl_sampler_dim)type->sampler_dimensionality, + type->sampler_array, type->sampler_shadow); + std::vector<Value*> srcs, defs; + uint32_t mask = 0; + LValues &newDefs = convert(&insn->dest); + auto location = tex->data.driver_location; + Value *indirect = nullptr; + + defs.resize(newDefs.size()); + for (auto i = 0u; i < newDefs.size(); ++i) { + defs[i] = newDefs[i]; + mask |= 1 << i; + } + + if (location == 0xffffffff) + location = derefImageVar(insn->variables[0], &indirect); + + TexInstruction *texi = mkTex(OP_SUQ, target.getEnum(), location, 0, defs, srcs); + texi->tex.mask = mask; + + if (indirect) + texi->setIndirectR(indirect); + + break; + } + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_max: + case nir_intrinsic_image_atomic_min: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: { + nir_variable *tex = insn->variables[0]->var; + const glsl_type *type = tex->type->without_array(); + const DataType dType = getDType(insn, type->base_type == GLSL_TYPE_INT); + TexInstruction::Target target = + convert((glsl_sampler_dim)type->sampler_dimensionality, + type->sampler_array, type->sampler_shadow); + std::vector<Value*> srcs, defs; + LValues &newDefs = convert(&insn->dest); + unsigned int argCount = getNIRArgCount(target); + auto location = tex->data.driver_location; + Value *indirect = nullptr; + + defs.push_back(newDefs[0]); + + srcs.resize(argCount); + for (auto i = 0u; i < argCount; ++i) + srcs[i] = getSrc(&insn->src[0], i); + srcs.push_back(getSrc(&insn->src[2], 0)); + if (op == nir_intrinsic_image_atomic_comp_swap) + srcs.push_back(getSrc(&insn->src[3], 0)); + + if (location == 0xffffffff) + location = derefImageVar(insn->variables[0], &indirect); + + TexInstruction *texi = + mkTex(OP_SUREDP, target.getEnum(), location, 0, defs, srcs); + texi->tex.mask = 1; + texi->setType(dType); + texi->subOp = getSubOp(op); + + if (indirect) + texi->setIndirectR(indirect); + + info->io.globalAccess |= 0x2; + break; + } + case nir_intrinsic_store_shared: { + DataType sType = getSType(insn->src[0], false, false); + Value *indirectOffset; + uint32_t offset = getIndirect(&insn->src[1], 0, &indirectOffset); + + for (auto i = 0u; i < insn->num_components; ++i) { + if (!((1u << i) & nir_intrinsic_write_mask(insn))) + continue; + Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType)); + mkStore(OP_STORE, sType, sym, indirectOffset, getSrc(&insn->src[0], i)); + } + break; + } + case nir_intrinsic_load_shared: { + const DataType dType = getDType(insn); + LValues &newDefs = convert(&insn->dest); + Value *indirectOffset; + uint32_t offset = getIndirect(&insn->src[0], 0, &indirectOffset); + + for (auto i = 0u; i < insn->num_components; ++i) + loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset); + + break; + } + case nir_intrinsic_barrier: { + // TODO: add flag to shader_info + info->numBarriers = 1; + Instruction *bar = mkOp2(OP_BAR, TYPE_U32, NULL, mkImm(0), mkImm(0)); + bar->fixed = 1; + bar->subOp = NV50_IR_SUBOP_BAR_SYNC; + break; + } default: ERROR("unknown nir_intrinsic_op %s\n", nir_intrinsic_infos[op].name); return false; @@ -2465,6 +2757,69 @@ Converter::applyProjection(Value *src, Value *proj) return mkOp2v(OP_MUL, TYPE_F32, getScratch(), src, proj); } +unsigned int +Converter::getNIRArgCount(TexInstruction::Target& target) +{ + unsigned int result = target.getArgCount(); + if (target.isCube() && target.isArray()) + return result - 1; + return result; +} + +uint16_t +Converter::derefImageVar(nir_deref_var *deref, Value **indirect) +{ + uint16_t idx = 0; + + if (indirect) + *indirect = loadImm(getSSA(), 0u); + + for (nir_deref *child = deref->deref.child; child; child = child->child) { + switch (child->deref_type) { + case nir_deref_type_array: { + nir_deref_array *arr = nir_deref_as_array(child); + auto size = type_size(child->type); + idx += size * arr->base_offset; + + switch (arr->deref_array_type) { + case nir_deref_array_type_direct: + break; + case nir_deref_array_type_indirect: { + Value *offset = + mkOp2v(OP_MUL, TYPE_U32, getSSA(), loadImm(getSSA(), size), + getSrc(&arr->indirect, 0, false)); + *indirect = mkOp2v(OP_ADD, TYPE_U32, getSSA(), *indirect, offset); + break; + } + case nir_deref_array_type_wildcard: + ERROR("Wildcard derefs not supported!!\n"); + assert(false); + break; + default: + ERROR("Unknown deref type!\n"); + assert(false); + break; + } + break; + } + default: + ERROR("Only array derefs supported yet!\n"); + assert(false); + } + } + return idx; +} + +CacheMode +Converter::getCacheModeFromVar(nir_variable *var) +{ + if (var->data.image._volatile) + return CACHE_CV; + if (var->data.image.coherent) + return CACHE_CG; + return CACHE_CA; +} + bool Converter::visit(nir_tex_instr *insn) { -- 2.14.3 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev