On Sun, Apr 17, 2016 at 4:07 PM, Pierre Moreau <pierre.mor...@free.fr> wrote: > Ping :-) > > On 10:56 PM - Mar 19 2016, Pierre Moreau wrote: >> Generating a `cvt u32 $r0 u64 $r1d` or a `cvt u64 $r0d u32 $r2` makes the GPU >> unhappy. Instead, manually handle the conversion between 64-bit and 32-bit >> values, and use `cvt` to convert between the original target (resp. source) >> and 32-bit value. This happens to be the behaviour of NVIDIA's driver. >> >> Signed-off-by: Pierre Moreau <pierre.mor...@free.fr> >> --- >> .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 59 >> ++++++++++++++++++++++ >> .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 + >> 2 files changed, 60 insertions(+) >> >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp >> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp >> index 2719f2c..c419a68 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp >> @@ -1859,6 +1859,63 @@ NVC0LoweringPass::handleOUT(Instruction *i) >> return true; >> } >> >> +bool >> +NVC0LoweringPass::handleCVT(Instruction *i) >> +{ >> + if (isFloatType(i->dType) || isFloatType(i->sType) || >> + isSignedIntType(i->dType) xor isSignedIntType(i->sType))
I know pre-C89 features are cool, but let's avoid using them. I know characters like ^ were uncommon on the 1960's and 1970's teletypes, but I think we're past those days now. >> + return false; >> + >> + if (typeSizeof(i->sType) == 8) { >> + Value *toSplit = i->getSrc(0); >> + if (i->saturate) { >> + Value *minValue = bld.loadImm(bld.getSSA(8), 0ul); >> + Value *maxValue = bld.loadImm(bld.getSSA(8), UINT32_MAX); >> + if (isSignedType(i->sType)) { >> + minValue = bld.loadImm(bld.getSSA(8), INT32_MIN); >> + maxValue = bld.loadImm(bld.getSSA(8), INT32_MAX); >> + } >> + Value *minRes = bld.mkOp2v(OP_MAX, i->sType, bld.getSSA(8), >> toSplit, >> + minValue); >> + toSplit = bld.mkOp2v(OP_MIN, i->sType, bld.getSSA(8), minRes, >> + maxValue); Aren't you assuming that i->dType == 4 here? It could be an unsigned <-> signed conversion, at 64-bit. So the clamp values would be different. Handling ALL the cases is quite annoying... can you figure out what the hw doesn't support and just handle that? I doubt it'll be any slower, and definitely simpler. -ilia >> + } >> + >> + Value *value32[2] = { bld.getSSA(), bld.getSSA() }; >> + bld.mkSplit(value32, 4, toSplit); >> + if (typeSizeof(i->dType) == 4) { >> + bld.mkMov(i->getDef(0), value32[0], i->dType); >> + delete_Instruction(prog, i); >> + return true; >> + } >> + >> + i->setSrc(0, bld.getSSA()); >> + i->sType = isSignedIntType(i->dType) ? TYPE_S32 : TYPE_U32; >> + bld.mkMov(i->getSrc(0), value32[0], i->sType); >> + } else if (typeSizeof(i->dType) == 8) { >> + bld.setPosition(i, true); >> + Value *res = i->getDef(0); >> + Value *high32 = bld.loadImm(bld.getSSA(), >> + isSignedType(i->sType) ? UINT32_MAX : 0u); >> + Value *low32 = i->getSrc(0); >> + DataType resType = i->dType; >> + >> + if (typeSizeof(i->sType) <= 2) { >> + i->dType = isSignedIntType(i->dType) ? TYPE_S32 : TYPE_U32; >> + low32 = bld.getSSA(); >> + i->setDef(0, low32); >> + } else if (typeSizeof(i->sType) == 4) { >> + delete_Instruction(prog, i); >> + } >> + >> + Value *merged64 = bld.mkOp2v(OP_MERGE, resType, bld.getSSA(8), low32, >> + high32); >> + bld.mkMov(res, merged64, resType); >> + } >> + >> + return true; >> +} >> + >> // Generate a binary predicate if an instruction is predicated by >> // e.g. an f32 value. >> void >> @@ -1894,6 +1951,8 @@ NVC0LoweringPass::visit(Instruction *i) >> checkPredicate(i); >> >> switch (i->op) { >> + case OP_CVT: >> + return handleCVT(i); >> case OP_TEX: >> case OP_TXB: >> case OP_TXL: >> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h >> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h >> index 6eb8aff..9fc24d9 100644 >> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h >> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h >> @@ -96,6 +96,7 @@ protected: >> bool handleMOD(Instruction *); >> bool handleSQRT(Instruction *); >> bool handlePOW(Instruction *); >> + bool handleCVT(Instruction *); >> bool handleTEX(TexInstruction *); >> bool handleTXD(TexInstruction *); >> bool handleTXQ(TexInstruction *); >> -- >> 2.7.4 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev