Basically we just have to scale up the coordinates and then add the
relevant sample offset. The code to handle this was already largely
present from Christoph's earlier attempts to pipe images through back in
the dark ages, this just hooks it all up.

Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
---

v1 -> v2:
 - fix sample count result
 - rework handleSUQ to properly handle the tex mask
 - add clamping of indirect address

 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  |  3 +
 .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp      | 80 +++++++++++++---------
 src/gallium/drivers/nouveau/nvc0/nvc0_compute.c    | 24 +++++++
 src/gallium/drivers/nouveau/nvc0/nvc0_context.h    |  2 +-
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c    | 20 +++---
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c     | 20 ++++++
 src/gallium/drivers/nouveau/nvc0/nvc0_tex.c        |  4 +-
 7 files changed, 106 insertions(+), 47 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 0fa5aa1..f3d7bee 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -2388,6 +2388,9 @@ Converter::getImageCoords(std::vector<Value *> &coords, 
int r, int s)
 
    for (int c = 0; c < arg; ++c)
       coords.push_back(fetchSrc(s, c));
+
+   if (t.isMS())
+      coords.push_back(fetchSrc(s, 3));
 }
 
 // For raw loads, granularity is 4 byte.
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 67bd73b..0558ae0 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1569,44 +1569,51 @@ static inline uint16_t getSuClampSubOp(const 
TexInstruction *su, int c)
 bool
 NVC0LoweringPass::handleSUQ(TexInstruction *suq)
 {
+   int mask = suq->tex.mask;
    int dim = suq->tex.target.getDim();
    int arg = dim + (suq->tex.target.isArray() || suq->tex.target.isCube());
-   uint8_t s = prog->driver->io.auxCBSlot;
    Value *ind = suq->getIndirectR();
    uint32_t base;
-   int c;
-
-   base = prog->driver->io.suInfoBase + suq->tex.r * NVE4_SU_INFO__STRIDE;
+   int c, d;
 
-   if (ind)
-      ind = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getScratch(),
+   if (ind) {
+      ind = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(),
+                       ind, bld.mkImm(suq->tex.r));
+      ind = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(),
+                       ind, bld.mkImm(7));
+      ind = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
                        ind, bld.mkImm(6));
+      base = 0;
+   } else {
+      base = suq->tex.r * NVE4_SU_INFO__STRIDE;
+   }
 
-   for (c = 0; c < arg; ++c) {
-      if (suq->defExists(c)) {
-         int offset;
+   for (c = 0, d = 0; c < 3; ++c, mask >>= 1) {
+      if (c >= arg || !(mask & 1))
+         continue;
 
-         if (c == 1 && suq->tex.target == TEX_TARGET_1D_ARRAY) {
-            offset = base + NVE4_SU_INFO_SIZE(2);
-         } else {
-            offset = base + NVE4_SU_INFO_SIZE(c);
-         }
-         bld.mkLoad(TYPE_U32, suq->getDef(c),
-                    bld.mkSymbol(FILE_MEMORY_CONST, s, TYPE_U32, offset), ind);
-      }
-   }
+      int offset;
 
-   if (suq->tex.target.isCube()) {
-      if (suq->defExists(2)) {
-         bld.mkOp2(OP_DIV, TYPE_U32, suq->getDef(2), suq->getDef(2),
-                   bld.loadImm(NULL, 6));
+      if (c == 1 && suq->tex.target == TEX_TARGET_1D_ARRAY) {
+         offset = NVE4_SU_INFO_SIZE(2);
+      } else {
+         offset = NVE4_SU_INFO_SIZE(c);
       }
+      bld.mkMov(suq->getDef(d++), loadSuInfo32(ind, base + offset));
+      if (c == 2 && suq->tex.target.isCube())
+         bld.mkOp2(OP_DIV, TYPE_U32, suq->getDef(d - 1), suq->getDef(d - 1),
+                   bld.loadImm(NULL, 6));
    }
 
-   if (suq->defExists(3)) {
-      // .w contains the number of samples for multi-sampled images but we
-      // don't support them for now.
-      bld.mkMov(suq->getDef(3), bld.loadImm(NULL, 1));
+   if (mask & 1) {
+      if (suq->tex.target.isMS()) {
+         Value *ms_x = loadSuInfo32(ind, base + NVE4_SU_INFO_MS(0));
+         Value *ms_y = loadSuInfo32(ind, base + NVE4_SU_INFO_MS(1));
+         Value *ms = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getScratch(), ms_x, 
ms_y);
+         bld.mkOp2(OP_SHL, TYPE_U32, suq->getDef(d++), bld.loadImm(NULL, 1), 
ms);
+      } else {
+         bld.mkMov(suq->getDef(d++), bld.loadImm(NULL, 1));
+      }
    }
 
    bld.remove(suq);
@@ -1616,7 +1623,7 @@ NVC0LoweringPass::handleSUQ(TexInstruction *suq)
 void
 NVC0LoweringPass::adjustCoordinatesMS(TexInstruction *tex)
 {
-   const uint16_t base = tex->tex.r * NVE4_SU_INFO__STRIDE;
+   uint16_t base;
    const int arg = tex->tex.target.getArgCount();
 
    if (tex->tex.target == TEX_TARGET_2D_MS)
@@ -1632,13 +1639,18 @@ NVC0LoweringPass::adjustCoordinatesMS(TexInstruction 
*tex)
    Value *s = tex->getSrc(arg - 1);
 
    Value *tx = bld.getSSA(), *ty = bld.getSSA(), *ts = bld.getSSA();
-   Value *ind = NULL;
+   Value *ind = tex->getIndirectR();
 
-   if (tex->tex.rIndirectSrc >= 0) {
-      assert(tex->tex.r == 0);
-      // FIXME: out of bounds
+   if (ind) {
+      ind = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(),
+                       ind, bld.mkImm(tex->tex.r));
+      ind = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(),
+                       ind, bld.mkImm(7));
       ind = bld.mkOp2v(OP_SHL, TYPE_U32, bld.getSSA(),
-                       tex->getIndirectR(), bld.mkImm(6));
+                       ind, bld.mkImm(6));
+      base = 0;
+   } else {
+      base = tex->tex.r * NVE4_SU_INFO__STRIDE;
    }
 
    Value *ms_x = loadSuInfo32(ind, base + NVE4_SU_INFO_MS(0));
@@ -2044,6 +2056,10 @@ 
NVC0LoweringPass::processSurfaceCoordsNVC0(TexInstruction *su)
    Value *v;
    Value *ind = NULL;
 
+   bld.setPosition(su, false);
+
+   adjustCoordinatesMS(su);
+
    if (su->tex.rIndirectSrc >= 0) {
       ind = su->getIndirectR();
       if (su->tex.r > 0) {
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
index 7511819..f5f7fd4 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -113,6 +113,30 @@ nvc0_screen_compute_setup(struct nvc0_screen *screen,
    PUSH_DATA (push, screen->txc->offset + 65536);
    PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);
 
+   /* MS sample coordinate offsets */
+   BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
+   PUSH_DATA (push, 2048);
+   PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
+   PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(5));
+   BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 2 * 8);
+   PUSH_DATA (push, NVC0_CB_AUX_MS_INFO);
+   PUSH_DATA (push, 0); /* 0 */
+   PUSH_DATA (push, 0);
+   PUSH_DATA (push, 1); /* 1 */
+   PUSH_DATA (push, 0);
+   PUSH_DATA (push, 0); /* 2 */
+   PUSH_DATA (push, 1);
+   PUSH_DATA (push, 1); /* 3 */
+   PUSH_DATA (push, 1);
+   PUSH_DATA (push, 2); /* 4 */
+   PUSH_DATA (push, 0);
+   PUSH_DATA (push, 3); /* 5 */
+   PUSH_DATA (push, 0);
+   PUSH_DATA (push, 2); /* 6 */
+   PUSH_DATA (push, 1);
+   PUSH_DATA (push, 3); /* 7 */
+   PUSH_DATA (push, 1);
+
    return 0;
 }
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h 
b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
index 8a965fc..c633ccf 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -112,7 +112,7 @@
 #define NVC0_CB_AUX_TEX_INFO(i)     0x020 + (i) * 4
 #define NVC0_CB_AUX_TEX_SIZE        (32 * 4)
 /* 8 sets of 32-bits coordinate offsets */
-#define NVC0_CB_AUX_MS_INFO         0x0a0 /* CP */
+#define NVC0_CB_AUX_MS_INFO         0x0a0
 #define NVC0_CB_AUX_MS_SIZE         (8 * 2 * 4)
 /* block/grid size, at 3 32-bits integers each and gridid */
 #define NVC0_CB_AUX_GRID_INFO       0x0e0 /* CP */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index aba9511..d75b702 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -555,29 +555,25 @@ nvc0_program_translate(struct nvc0_program *prog, 
uint16_t chipset,
 
    info->io.genUserClip = prog->vp.num_ucps;
    info->io.auxCBSlot = 15;
+   info->io.msInfoCBSlot = 15;
    info->io.ucpBase = NVC0_CB_AUX_UCP_INFO;
    info->io.drawInfoBase = NVC0_CB_AUX_DRAW_INFO;
+   info->io.msInfoBase = NVC0_CB_AUX_MS_INFO;
+   info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0);
+   info->io.suInfoBase = NVC0_CB_AUX_SU_INFO(0);
+   if (chipset >= NVISA_GK104_CHIPSET) {
+      info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
+   }
 
    if (prog->type == PIPE_SHADER_COMPUTE) {
       if (chipset >= NVISA_GK104_CHIPSET) {
          info->io.auxCBSlot = 7;
-         info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
+         info->io.msInfoCBSlot = 7;
          info->prop.cp.gridInfoBase = NVC0_CB_AUX_GRID_INFO;
          info->io.uboInfoBase = NVC0_CB_AUX_UBO_INFO(0);
       }
-      info->io.msInfoCBSlot = 0;
-      info->io.msInfoBase = NVC0_CB_AUX_MS_INFO;
-      info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0);
-      info->io.suInfoBase = NVC0_CB_AUX_SU_INFO(0);
    } else {
-      if (chipset >= NVISA_GK104_CHIPSET) {
-         info->io.texBindBase = NVC0_CB_AUX_TEX_INFO(0);
-      }
       info->io.sampleInfoBase = NVC0_CB_AUX_SAMPLE_INFO;
-      info->io.bufInfoBase = NVC0_CB_AUX_BUF_INFO(0);
-      info->io.suInfoBase = NVC0_CB_AUX_SU_INFO(0);
-      info->io.msInfoCBSlot = 15;
-      info->io.msInfoBase = 0; /* TODO */
    }
 
    info->assignSlots = nvc0_program_assign_varying_slots;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index ce6bd8b..2a36d42 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -977,6 +977,26 @@ nvc0_screen_create(struct nouveau_device *dev)
          BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(i)), 1);
          PUSH_DATA (push, 0x54);
       }
+
+      /* MS sample coordinate offsets: these do not work with _ALT modes ! */
+      BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * 8);
+      PUSH_DATA (push, NVC0_CB_AUX_MS_INFO);
+      PUSH_DATA (push, 0); /* 0 */
+      PUSH_DATA (push, 0);
+      PUSH_DATA (push, 1); /* 1 */
+      PUSH_DATA (push, 0);
+      PUSH_DATA (push, 0); /* 2 */
+      PUSH_DATA (push, 1);
+      PUSH_DATA (push, 1); /* 3 */
+      PUSH_DATA (push, 1);
+      PUSH_DATA (push, 2); /* 4 */
+      PUSH_DATA (push, 0);
+      PUSH_DATA (push, 3); /* 5 */
+      PUSH_DATA (push, 0);
+      PUSH_DATA (push, 2); /* 6 */
+      PUSH_DATA (push, 1);
+      PUSH_DATA (push, 3); /* 7 */
+      PUSH_DATA (push, 1);
    }
    BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1);
    PUSH_DATA (push, 0);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index baf39aa..22d9ed8 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -932,11 +932,11 @@ nve4_set_surface_info(struct nouveau_pushbuf *push,
       address += lvl->offset;
 
       info[0]  = address >> 8;
-      info[2]  = width - 1;
+      info[2]  = (width << mt->ms_x) - 1;
       /* NOTE: this is really important: */
       info[2] |= (0xff & nve4_su_format_aux_map[view->format]) << 22;
       info[3]  = (0x88 << 24) | (lvl->pitch / 64);
-      info[4]  = height - 1;
+      info[4]  = (height << mt->ms_y) - 1;
       info[4] |= (lvl->tile_mode & 0x0f0) << 25;
       info[4] |= NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 22;
       info[5]  = mt->layer_stride >> 8;
-- 
2.7.3

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to