gives me an performance boost of 0.2% in pixmark_piano on my gk106, gm204 and gp107.
changes in shader-db: total instructions in shared programs : 7614782 -> 7614782 (0.00%) total cvt instructions in shared programs : 139343 -> 95856 (-31.21%) total gprs used in shared programs : 798045 -> 798045 (0.00%) total shared used in shared programs : 639636 -> 639636 (0.00%) total local used in shared programs : 24648 -> 24648 (0.00%) total bytes used in shared programs : 81330696 -> 81330696 (0.00%) local shared gpr inst cvts bytes helped 0 0 0 0 14037 0 hurt 0 0 0 0 0 0 v2: only for 32 bit operations move some common code out of the switch handle OP_SAT with modifiers Signed-off-by: Karol Herbst <kher...@redhat.com> --- .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 53 +++++++++++++++++++ .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 + 2 files changed, 54 insertions(+) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp index 295497be2f9..24129ca4d64 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp @@ -719,6 +719,56 @@ NVC0LegalizePostRA::propagateJoin(BasicBlock *bb) bb->remove(bb->getEntry()); } +// replaces instructions which would end up as f2f or i2i with faster +// alternatives: +// - abs(a) -> add(0, abs a) +// - neg(a) -> add(0, neg a) +// - neg(abs a) -> add(0, neg abs a) +// - sat(a) -> sat add(0, a) +void +NVC0LegalizePostRA::replaceCvt(Instruction *cvt) +{ + if (typeSizeof(cvt->sType) != 4) + return; + if (cvt->sType != cvt->dType) + return; + // we could make it work, but in this case we have optimizations disabled + // and we don't really care either way. + if (cvt->src(0).getFile() == FILE_IMMEDIATE) + return; + + Modifier mod; + + switch (cvt->op) { + case OP_ABS: + if (cvt->src(0).mod) + return; + if (!isFloatType(cvt->sType)) + return; + mod = NV50_IR_MOD_ABS; + break; + case OP_NEG: + if (cvt->src(0).mod && (cvt->src(0).mod.neg() || !isFloatType(cvt->sType))) + return; + mod = cvt->src(0).mod ? NV50_IR_MOD_NEG_ABS : NV50_IR_MOD_NEG; + break; + case OP_SAT: + if (!isFloatType(cvt->sType) && cvt->src(0).mod.abs()) + return; + mod = cvt->src(0).mod; + cvt->saturate = true; + break; + default: + return; + } + + cvt->op = OP_ADD; + cvt->moveSources(0, 1); + cvt->setSrc(0, rZero); + cvt->src(0).mod = 0; + cvt->src(1).mod = mod; +} + bool NVC0LegalizePostRA::visit(BasicBlock *bb) { @@ -758,6 +808,9 @@ NVC0LegalizePostRA::visit(BasicBlock *bb) next = hi; } + if (i->isCvt()) + replaceCvt(i); + if (i->op != OP_MOV && i->op != OP_PFETCH) replaceZero(i); } diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h index e0f50ab0904..4679c56471b 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h @@ -81,6 +81,7 @@ private: virtual bool visit(Function *); virtual bool visit(BasicBlock *); + void replaceCvt(Instruction *); void replaceZero(Instruction *); bool tryReplaceContWithBra(BasicBlock *); void propagateJoin(BasicBlock *); -- 2.19.2 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev