ir: add a lowering pass for surfaces on Fermi

Ilia Mirkin Sat, 14 May 2016 07:52:19 -0700

On Sat, May 14, 2016 at 9:54 AM, Samuel Pitoiset
<samuel.pitoi...@gmail.com> wrote:
> Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com>
> ---
>  .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      | 117 
> +++++++++++++++++++++
>  .../nouveau/codegen/nv50_ir_lowering_nvc0.h        |   2 +
>  2 files changed, 119 insertions(+)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> index 1068c21..002f09d 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -1982,6 +1982,121 @@ NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction 
> *su)
>        su->sType = (su->tex.target == TEX_TARGET_BUFFER) ? TYPE_U32 : TYPE_U8;
>  }
>
> +void
> +NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su)
> +{
> +   const int idx = su->tex.r;
> +   const int dim = su->tex.target.getDim();
> +   const int arg = dim + (su->tex.target.isArray() || 
> su->tex.target.isCube());
> +   const uint16_t base = idx * NVE4_SU_INFO__STRIDE;
> +   int c;
> +   Value *zero = bld.mkImm(0);
> +   Value *src[3];
> +   Value *v;
> +   Value *ind = NULL;
> +
> +   if (su->tex.rIndirectSrc >= 0) {
> +      // FIXME: out of bounds
> +      assert(su->tex.r == 0);
> +      ind = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
> +                       su->getIndirectR(), bld.mkImm(6));
> +   }
> +
> +   // get surface coordinates
> +   for (c = 0; c < arg; ++c)
> +      src[c] = su->getSrc(c);
> +   for (; c < 3; ++c)
> +      src[c] = zero;
> +
> +   // calculate pixel offset
> +   if (su->op == OP_SULDP || su->op == OP_SUREDP) {
> +      v = loadSuInfo32(ind, base + NVE4_SU_INFO_BSIZE);
> +      su->setSrc(0, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[0], v));
> +   }
> +
> +   // add array layer offset
> +   if (su->tex.target.isArray() || su->tex.target.isCube()) {
> +      v = loadSuInfo32(ind, base + NVE4_SU_INFO_ARRAY);
> +      assert(dim > 1);
> +      su->setSrc(2, bld.mkOp2v(OP_MUL, TYPE_U32, bld.getSSA(), src[2], v));
> +   }
> +
> +   // prevent read fault when the image is not actually bound
> +   CmpInstruction *pred =
> +      bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
> +                TYPE_U32, bld.mkImm(0),
> +                loadSuInfo32(ind, base + NVE4_SU_INFO_ADDR));
> +   if (su->tex.format) {
> +      const TexInstruction::ImgFormatDesc *format = su->tex.format;
> +      int blockwidth = format->bits[0] + format->bits[1] +
> +                       format->bits[2] + format->bits[3];
> +
> +      if (blockwidth >= 8) {


Why is the blockwidth so important here? Don't you just want to do
this for reads, since those use byte-type accesses as well as atomics?
i.e. do you need to do this for regular stores?

Even if you decide to stick with it, what you're really protecting
against here is a format of PIPE_FORMAT_NONE, which you should check
for explicitly here rather than creating an arbitrary 8-bit limit.

> +         // make sure that the format doesn't mismatch
> +         bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0),
> +                   TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
> +                   loadSuInfo32(ind, base + NVE4_SU_INFO_BSIZE),
> +                   pred->getDef(0));
> +      }
> +   }
> +   su->setPredicate(CC_NOT_P, pred->getDef(0));
> +}
> +
> +void
> +NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su)
> +{
> +   if (su->tex.target == TEX_TARGET_1D_ARRAY) {
> +      /* As 1d arrays also need 3 coordinates, switching to 
> TEX_TARGET_2D_ARRAY
> +       * will simplify the lowering pass and the texture constraints. */
> +      su->moveSources(1, 1);
> +      su->setSrc(2, su->getSrc(1));

Is this line necessary? I thought that moveSources would take src(1)
and move it to src(2) [and so on].

> +      su->setSrc(1, bld.loadImm(NULL, 0));
> +      su->tex.target = TEX_TARGET_2D_ARRAY;
> +   }
> +
> +   processSurfaceCoordsNVC0(su);
> +
> +   if (su->op == OP_SULDP)
> +      convertSurfaceFormat(su);
> +
> +   if (su->op == OP_SUREDB || su->op == OP_SUREDP) {
> +      const int dim = su->tex.target.getDim();
> +      const int arg = dim + (su->tex.target.isArray() || 
> su->tex.target.isCube());
> +      LValue *addr = bld.getSSA(8);
> +      Value *def = su->getDef(0);
> +
> +      su->op = OP_SULEA;
> +
> +      // Set the destination to the address
> +      su->dType = TYPE_U64;
> +      su->setDef(0, addr);
> +      su->setDef(1, su->getPredicate());
> +
> +      bld.setPosition(su, true);
> +
> +      // Perform the atomic op
> +      Instruction *red = bld.mkOp(OP_ATOM, su->sType, bld.getSSA());
> +      red->subOp = su->subOp;
> +      red->setSrc(0, bld.mkSymbol(FILE_MEMORY_GLOBAL, 0, su->sType, 0));
> +      red->setSrc(1, su->getSrc(arg));
> +      if (red->subOp == NV50_IR_SUBOP_ATOM_CAS)
> +         red->setSrc(2, su->getSrc(arg + 1));
> +      red->setIndirect(0, 0, addr);
> +
> +      // make sure to initialize dst value when the atomic operation is not
> +      // performed
> +      Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
> +
> +      assert(su->cc == CC_NOT_P);
> +      red->setPredicate(su->cc, su->getPredicate());
> +      mov->setPredicate(CC_P, su->getPredicate());
> +
> +      bld.mkOp2(OP_UNION, TYPE_U32, def, red->getDef(0), mov->getDef(0));
> +
> +      handleCasExch(red, false);
> +   }
> +}
> +
>  bool
>  NVC0LoweringPass::handleWRSV(Instruction *i)
>  {
> @@ -2455,6 +2570,8 @@ NVC0LoweringPass::visit(Instruction *i)
>     case OP_SUREDP:
>        if (targ->getChipset() >= NVISA_GK104_CHIPSET)
>           handleSurfaceOpNVE4(i->asTex());
> +      else
> +         handleSurfaceOpNVC0(i->asTex());
>        break;
>     case OP_SUQ:
>        handleSUQ(i->asTex());
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> index c007e09..2321956 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
> @@ -106,6 +106,7 @@ protected:
>     bool handleATOM(Instruction *);
>     bool handleCasExch(Instruction *, bool needCctl);
>     void handleSurfaceOpNVE4(TexInstruction *);
> +   void handleSurfaceOpNVC0(TexInstruction *);
>     void handleSharedATOM(Instruction *);
>     void handleSharedATOMNVE4(Instruction *);
>     void handleLDST(Instruction *);
> @@ -138,6 +139,7 @@ private:
>
>     void adjustCoordinatesMS(TexInstruction *);
>     void processSurfaceCoordsNVE4(TexInstruction *);
> +   void processSurfaceCoordsNVC0(TexInstruction *);
>     void convertSurfaceFormat(TexInstruction *);
>
>  protected:
> --
> 2.8.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 6/8] nvc0/ir: add a lowering pass for surfaces on Fermi

Reply via email to