[Mesa-dev] [PATCH 2/7] i965/fs: Use a stride of 1 and byte offsets for UBOs

2015-12-07 Thread Jason Ekstrand
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 16 
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 11 ---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  2 +-
 3 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index d2881b2..de5c17a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -175,7 +175,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder 
,
 * the redundant ones.
 */
fs_reg vec4_offset = vgrf(glsl_type::int_type);
-   bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~3));
+   bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~0xf));
 
int scale = 1;
if (devinfo->gen == 4 && bld.dispatch_width() == 8) {
@@ -207,7 +207,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder 
,
  inst->mlen = 1 + bld.dispatch_width() / 8;
}
 
-   bld.MOV(dst, offset(vec4_result, bld, (const_offset & 3) * scale));
+   bld.MOV(dst, offset(vec4_result, bld, ((const_offset & 0xf) / 4) * scale));
 }
 
 /**
@@ -2052,10 +2052,12 @@ fs_visitor::demote_pull_constants()
 
  /* Generate a pull load into dst. */
  if (inst->src[i].reladdr) {
+fs_reg indirect = ibld.vgrf(BRW_REGISTER_TYPE_D);
+ibld.MUL(indirect, *inst->src[i].reladdr, brw_imm_d(4));
 VARYING_PULL_CONSTANT_LOAD(ibld, dst,
brw_imm_ud(index),
-   *inst->src[i].reladdr,
-   pull_index);
+   indirect,
+   pull_index * 4);
 inst->src[i].reladdr = NULL;
 inst->src[i].stride = 1;
  } else {
@@ -3092,13 +3094,11 @@ fs_visitor::lower_uniform_pull_constant_loads()
  continue;
 
   if (devinfo->gen >= 7) {
- /* The offset arg before was a vec4-aligned byte offset.  We need to
-  * turn it into a dword offset.
-  */
+ /* The offset arg is a vec4-aligned immediate byte offset. */
  fs_reg const_offset_reg = inst->src[1];
  assert(const_offset_reg.file == IMM &&
 const_offset_reg.type == BRW_REGISTER_TYPE_UD);
- const_offset_reg.ud /= 4;
+ assert(const_offset_reg.ud % 16 == 0);
 
  fs_reg payload, offset;
  if (devinfo->gen >= 9) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 9b50e4e..39bbef4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -2363,16 +2363,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
   }
 
   if (has_indirect) {
- /* Turn the byte offset into a dword offset. */
- fs_reg base_offset = vgrf(glsl_type::int_type);
- bld.SHR(base_offset, retype(get_nir_src(instr->src[1]),
- BRW_REGISTER_TYPE_D),
- brw_imm_d(2));
+ fs_reg base_offset = retype(get_nir_src(instr->src[1]),
+ BRW_REGISTER_TYPE_D);
 
- unsigned vec4_offset = instr->const_index[0] / 4;
+ unsigned vec4_offset = instr->const_index[0];
  for (int i = 0; i < instr->num_components; i++)
 VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index,
-   base_offset, vec4_offset + i);
+   base_offset, vec4_offset + i * 4);
   } else {
  fs_reg packed_consts = vgrf(glsl_type::float_type);
  packed_consts.type = dest.type;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 2c56995..52bddae 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -404,7 +404,7 @@ brw_create_constant_surface(struct brw_context *brw,
uint32_t *out_offset,
 bool dword_pitch)
 {
-   uint32_t stride = dword_pitch ? 4 : 16;
+   uint32_t stride = dword_pitch ? 1 : 16;
uint32_t elements = ALIGN(size, stride) / stride;
 
brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/7] i965/fs: Use a stride of 1 and byte offsets for UBOs

2015-11-23 Thread Matt Turner
On Mon, Nov 23, 2015 at 6:11 PM, Jason Ekstrand  wrote:
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp | 16 
>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 11 ---
>  src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  2 +-
>  3 files changed, 13 insertions(+), 16 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 777cee5..9e2b1fa2 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -187,7 +187,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder 
> ,
>  * the redundant ones.
>  */
> fs_reg vec4_offset = vgrf(glsl_type::int_type);
> -   bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~3));
> +   bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~0xf));
>
> int scale = 1;
> if (devinfo->gen == 4 && bld.dispatch_width() == 8) {
> @@ -219,7 +219,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder 
> ,
>   inst->mlen = 1 + bld.dispatch_width() / 8;
> }
>
> -   bld.MOV(dst, offset(vec4_result, bld, (const_offset & 3) * scale));
> +   bld.MOV(dst, offset(vec4_result, bld, ((const_offset & 0xf) / 4) * 
> scale));
>  }
>
>  /**
> @@ -1999,10 +1999,12 @@ fs_visitor::demote_pull_constants()
>
>   /* Generate a pull load into dst. */
>   if (inst->src[i].reladdr) {
> +fs_reg indirect = ibld.vgrf(BRW_REGISTER_TYPE_D);
> +ibld.MUL(indirect, *inst->src[i].reladdr, brw_imm_d(4));
>  VARYING_PULL_CONSTANT_LOAD(ibld, dst,
> brw_imm_ud(index),
> -   *inst->src[i].reladdr,
> -   pull_index);
> +   indirect,
> +   pull_index * 4);
>  inst->src[i].reladdr = NULL;
>  inst->src[i].stride = 1;
>   } else {
> @@ -3038,13 +3040,11 @@ fs_visitor::lower_uniform_pull_constant_loads()
>   continue;
>
>if (devinfo->gen >= 7) {
> - /* The offset arg before was a vec4-aligned byte offset.  We need to
> -  * turn it into a dword offset.
> -  */
> + /* The offset arg is a vec4-aligned immediate byte offset. */
>   fs_reg const_offset_reg = inst->src[1];
>   assert(const_offset_reg.file == IMM &&
>  const_offset_reg.type == BRW_REGISTER_TYPE_UD);
> - const_offset_reg.ud /= 4;
> + assert(const_offset_reg.ud % 16 == 0);
>
>   fs_reg payload, offset;
>   if (devinfo->gen >= 9) {
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> index c439da2..062ae08 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
> @@ -2343,16 +2343,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
> nir_intrinsic_instr *instr
>}
>
>if (has_indirect) {
> - /* Turn the byte offset into a dword offset. */
> - fs_reg base_offset = vgrf(glsl_type::int_type);
> - bld.SHR(base_offset, retype(get_nir_src(instr->src[1]),
> - BRW_REGISTER_TYPE_D),
> - brw_imm_d(2));
> + fs_reg base_offset = retype(get_nir_src(instr->src[1]),
> + BRW_REGISTER_TYPE_D);
>
> - unsigned vec4_offset = instr->const_index[0] / 4;
> + unsigned vec4_offset = instr->const_index[0];
>   for (int i = 0; i < instr->num_components; i++)
>  VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index,
> -   base_offset, vec4_offset + i);
> +   base_offset, vec4_offset + i * 4);
>} else {
>   fs_reg packed_consts = vgrf(glsl_type::float_type);
>   packed_consts.type = dest.type;
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
> b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index f88f8d5..7cb7dd5 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -403,7 +403,7 @@ brw_create_constant_surface(struct brw_context *brw,
> uint32_t *out_offset,
>  bool dword_pitch)
>  {
> -   uint32_t stride = dword_pitch ? 4 : 16;
> +   uint32_t stride = dword_pitch ? 1 : 16;

I've thought dword_pitch was a bad name for a long time, but this
really seals it.

> uint32_t elements = ALIGN(size, stride) / stride;
>
> brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
> --
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/7] i965/fs: Use a stride of 1 and byte offsets for UBOs

2015-11-23 Thread Jason Ekstrand
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 16 
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 11 ---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  2 +-
 3 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 777cee5..9e2b1fa2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -187,7 +187,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder 
,
 * the redundant ones.
 */
fs_reg vec4_offset = vgrf(glsl_type::int_type);
-   bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~3));
+   bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~0xf));
 
int scale = 1;
if (devinfo->gen == 4 && bld.dispatch_width() == 8) {
@@ -219,7 +219,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder 
,
  inst->mlen = 1 + bld.dispatch_width() / 8;
}
 
-   bld.MOV(dst, offset(vec4_result, bld, (const_offset & 3) * scale));
+   bld.MOV(dst, offset(vec4_result, bld, ((const_offset & 0xf) / 4) * scale));
 }
 
 /**
@@ -1999,10 +1999,12 @@ fs_visitor::demote_pull_constants()
 
  /* Generate a pull load into dst. */
  if (inst->src[i].reladdr) {
+fs_reg indirect = ibld.vgrf(BRW_REGISTER_TYPE_D);
+ibld.MUL(indirect, *inst->src[i].reladdr, brw_imm_d(4));
 VARYING_PULL_CONSTANT_LOAD(ibld, dst,
brw_imm_ud(index),
-   *inst->src[i].reladdr,
-   pull_index);
+   indirect,
+   pull_index * 4);
 inst->src[i].reladdr = NULL;
 inst->src[i].stride = 1;
  } else {
@@ -3038,13 +3040,11 @@ fs_visitor::lower_uniform_pull_constant_loads()
  continue;
 
   if (devinfo->gen >= 7) {
- /* The offset arg before was a vec4-aligned byte offset.  We need to
-  * turn it into a dword offset.
-  */
+ /* The offset arg is a vec4-aligned immediate byte offset. */
  fs_reg const_offset_reg = inst->src[1];
  assert(const_offset_reg.file == IMM &&
 const_offset_reg.type == BRW_REGISTER_TYPE_UD);
- const_offset_reg.ud /= 4;
+ assert(const_offset_reg.ud % 16 == 0);
 
  fs_reg payload, offset;
  if (devinfo->gen >= 9) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index c439da2..062ae08 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -2343,16 +2343,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , 
nir_intrinsic_instr *instr
   }
 
   if (has_indirect) {
- /* Turn the byte offset into a dword offset. */
- fs_reg base_offset = vgrf(glsl_type::int_type);
- bld.SHR(base_offset, retype(get_nir_src(instr->src[1]),
- BRW_REGISTER_TYPE_D),
- brw_imm_d(2));
+ fs_reg base_offset = retype(get_nir_src(instr->src[1]),
+ BRW_REGISTER_TYPE_D);
 
- unsigned vec4_offset = instr->const_index[0] / 4;
+ unsigned vec4_offset = instr->const_index[0];
  for (int i = 0; i < instr->num_components; i++)
 VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index,
-   base_offset, vec4_offset + i);
+   base_offset, vec4_offset + i * 4);
   } else {
  fs_reg packed_consts = vgrf(glsl_type::float_type);
  packed_consts.type = dest.type;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index f88f8d5..7cb7dd5 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -403,7 +403,7 @@ brw_create_constant_surface(struct brw_context *brw,
uint32_t *out_offset,
 bool dword_pitch)
 {
-   uint32_t stride = dword_pitch ? 4 : 16;
+   uint32_t stride = dword_pitch ? 1 : 16;
uint32_t elements = ALIGN(size, stride) / stride;
 
brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev