On Fri, Dec 14, 2018 at 12:27 AM Ilia Mirkin <imir...@alum.mit.edu> wrote: > > On Thu, Dec 13, 2018 at 6:14 PM Karol Herbst <kher...@redhat.com> wrote: > > > > gives me an performance boost of 0.2% in pixmark_piano on my gk106, gm204 > > and > > gp107. > > > > changes in shader-db: > > > > total instructions in shared programs : 7614782 -> 7614782 (0.00%) > > total cvt instructions in shared programs : 139343 -> 95856 (-31.21%) > > total gprs used in shared programs : 798045 -> 798045 (0.00%) > > total shared used in shared programs : 639636 -> 639636 (0.00%) > > total local used in shared programs : 24648 -> 24648 (0.00%) > > total bytes used in shared programs : 81330696 -> 81330696 (0.00%) > > > > local shared gpr inst cvts bytes > > helped 0 0 0 0 14037 0 > > hurt 0 0 0 0 0 0 > > > > Signed-off-by: Karol Herbst <kher...@redhat.com> > > --- > > .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 55 +++++++++++++++++++ > > .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 1 + > > 2 files changed, 56 insertions(+) > > > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > > b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > > index 295497be2f9..267fef0898d 100644 > > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp > > @@ -719,6 +719,58 @@ NVC0LegalizePostRA::propagateJoin(BasicBlock *bb) > > bb->remove(bb->getEntry()); > > } > > > > +// replaces instructions which would end up as f2f or i2i with faster > > +// alternatives: > > +// - abs(a) -> add(0, abs a) > > +// - neg(a) -> add(0, neg a) > > +// - neg(abs a) -> add(0, neg abs a) > > +// - sat(a) -> sat add(0, a) > > +void > > +NVC0LegalizePostRA::replaceCvt(Instruction *cvt) > > +{ > > + if (typeSizeof(cvt->sType) == 8) > > != 4 > > Otherwise you're just asking for trouble. > > > + return; > > + if (cvt->sType != cvt->dType) > > + return; > > + // we could make it work, but in this case we have optimizations > > disabled > > + // and we don't really care either way. > > + if (cvt->src(0).getFile() == FILE_IMMEDIATE) > > + return; > > + > > + switch (cvt->op) { > > + case OP_ABS: > > + if (cvt->src(0).mod) > > + break; > > + if (!isFloatType(cvt->sType)) > > Why? What's wrong with IABS -> IADD? Similar question for OP_NEG. >
iadd can't have abs modifiers > > + break; > > + cvt->op = OP_ADD; > > + cvt->moveSources(0, 1); > > These two lines, along with moveSources are always repeated. Would be > nice to centralize this somehow. yeah.. just didn't figure out a super nice way to do that except checking twice for valid opcodes :/ > > > + cvt->src(1).mod = NV50_IR_MOD_ABS; > > + cvt->setSrc(0, rZero); > > + cvt->src(0).mod = 0; > > + break; > > + case OP_NEG: > > + if (cvt->src(0).mod && (cvt->src(0).mod.neg() || > > !isFloatType(cvt->sType))) > > + break; > > + cvt->op = OP_ADD; > > + cvt->moveSources(0, 1); > > + cvt->src(1).mod = cvt->src(0).mod ? NV50_IR_MOD_NEG_ABS : > > NV50_IR_MOD_NEG; > > + cvt->setSrc(0, rZero); > > + cvt->src(0).mod = 0; > > + break; > > + case OP_SAT: > > + if (cvt->src(0).mod) > > + break; > > + cvt->op = OP_ADD; > > + cvt->moveSources(0, 1); > > + cvt->setSrc(0, rZero); > > + cvt->saturate = true; > > + break; > > + default: > > + break; > > + } > > +} > > + > > bool > > NVC0LegalizePostRA::visit(BasicBlock *bb) > > { > > @@ -758,6 +810,9 @@ NVC0LegalizePostRA::visit(BasicBlock *bb) > > next = hi; > > } > > > > + if (i->isCvt()) > > + replaceCvt(i); > > + > > if (i->op != OP_MOV && i->op != OP_PFETCH) > > replaceZero(i); > > } > > diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > > b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > > index e0f50ab0904..4679c56471b 100644 > > --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > > +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h > > @@ -81,6 +81,7 @@ private: > > virtual bool visit(Function *); > > virtual bool visit(BasicBlock *); > > > > + void replaceCvt(Instruction *); > > void replaceZero(Instruction *); > > bool tryReplaceContWithBra(BasicBlock *); > > void propagateJoin(BasicBlock *); > > -- > > 2.19.2 > > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev