[Mesa-dev] [PATCH 2/7] i965/fs: Use a stride of 1 and byte offsets for UBOs
--- src/mesa/drivers/dri/i965/brw_fs.cpp | 16 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 11 --- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 2 +- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index d2881b2..de5c17a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -175,7 +175,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder , * the redundant ones. */ fs_reg vec4_offset = vgrf(glsl_type::int_type); - bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~3)); + bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~0xf)); int scale = 1; if (devinfo->gen == 4 && bld.dispatch_width() == 8) { @@ -207,7 +207,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder , inst->mlen = 1 + bld.dispatch_width() / 8; } - bld.MOV(dst, offset(vec4_result, bld, (const_offset & 3) * scale)); + bld.MOV(dst, offset(vec4_result, bld, ((const_offset & 0xf) / 4) * scale)); } /** @@ -2052,10 +2052,12 @@ fs_visitor::demote_pull_constants() /* Generate a pull load into dst. */ if (inst->src[i].reladdr) { +fs_reg indirect = ibld.vgrf(BRW_REGISTER_TYPE_D); +ibld.MUL(indirect, *inst->src[i].reladdr, brw_imm_d(4)); VARYING_PULL_CONSTANT_LOAD(ibld, dst, brw_imm_ud(index), - *inst->src[i].reladdr, - pull_index); + indirect, + pull_index * 4); inst->src[i].reladdr = NULL; inst->src[i].stride = 1; } else { @@ -3092,13 +3094,11 @@ fs_visitor::lower_uniform_pull_constant_loads() continue; if (devinfo->gen >= 7) { - /* The offset arg before was a vec4-aligned byte offset. We need to - * turn it into a dword offset. - */ + /* The offset arg is a vec4-aligned immediate byte offset. */ fs_reg const_offset_reg = inst->src[1]; assert(const_offset_reg.file == IMM && const_offset_reg.type == BRW_REGISTER_TYPE_UD); - const_offset_reg.ud /= 4; + assert(const_offset_reg.ud % 16 == 0); fs_reg payload, offset; if (devinfo->gen >= 9) { diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 9b50e4e..39bbef4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -2363,16 +2363,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , nir_intrinsic_instr *instr } if (has_indirect) { - /* Turn the byte offset into a dword offset. */ - fs_reg base_offset = vgrf(glsl_type::int_type); - bld.SHR(base_offset, retype(get_nir_src(instr->src[1]), - BRW_REGISTER_TYPE_D), - brw_imm_d(2)); + fs_reg base_offset = retype(get_nir_src(instr->src[1]), + BRW_REGISTER_TYPE_D); - unsigned vec4_offset = instr->const_index[0] / 4; + unsigned vec4_offset = instr->const_index[0]; for (int i = 0; i < instr->num_components; i++) VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index, - base_offset, vec4_offset + i); + base_offset, vec4_offset + i * 4); } else { fs_reg packed_consts = vgrf(glsl_type::float_type); packed_consts.type = dest.type; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 2c56995..52bddae 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -404,7 +404,7 @@ brw_create_constant_surface(struct brw_context *brw, uint32_t *out_offset, bool dword_pitch) { - uint32_t stride = dword_pitch ? 4 : 16; + uint32_t stride = dword_pitch ? 1 : 16; uint32_t elements = ALIGN(size, stride) / stride; brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset, -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/7] i965/fs: Use a stride of 1 and byte offsets for UBOs
On Mon, Nov 23, 2015 at 6:11 PM, Jason Ekstrandwrote: > --- > src/mesa/drivers/dri/i965/brw_fs.cpp | 16 > src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 11 --- > src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 2 +- > 3 files changed, 13 insertions(+), 16 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp > b/src/mesa/drivers/dri/i965/brw_fs.cpp > index 777cee5..9e2b1fa2 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > @@ -187,7 +187,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder > , > * the redundant ones. > */ > fs_reg vec4_offset = vgrf(glsl_type::int_type); > - bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~3)); > + bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~0xf)); > > int scale = 1; > if (devinfo->gen == 4 && bld.dispatch_width() == 8) { > @@ -219,7 +219,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder > , > inst->mlen = 1 + bld.dispatch_width() / 8; > } > > - bld.MOV(dst, offset(vec4_result, bld, (const_offset & 3) * scale)); > + bld.MOV(dst, offset(vec4_result, bld, ((const_offset & 0xf) / 4) * > scale)); > } > > /** > @@ -1999,10 +1999,12 @@ fs_visitor::demote_pull_constants() > > /* Generate a pull load into dst. */ > if (inst->src[i].reladdr) { > +fs_reg indirect = ibld.vgrf(BRW_REGISTER_TYPE_D); > +ibld.MUL(indirect, *inst->src[i].reladdr, brw_imm_d(4)); > VARYING_PULL_CONSTANT_LOAD(ibld, dst, > brw_imm_ud(index), > - *inst->src[i].reladdr, > - pull_index); > + indirect, > + pull_index * 4); > inst->src[i].reladdr = NULL; > inst->src[i].stride = 1; > } else { > @@ -3038,13 +3040,11 @@ fs_visitor::lower_uniform_pull_constant_loads() > continue; > >if (devinfo->gen >= 7) { > - /* The offset arg before was a vec4-aligned byte offset. We need to > - * turn it into a dword offset. > - */ > + /* The offset arg is a vec4-aligned immediate byte offset. */ > fs_reg const_offset_reg = inst->src[1]; > assert(const_offset_reg.file == IMM && > const_offset_reg.type == BRW_REGISTER_TYPE_UD); > - const_offset_reg.ud /= 4; > + assert(const_offset_reg.ud % 16 == 0); > > fs_reg payload, offset; > if (devinfo->gen >= 9) { > diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > index c439da2..062ae08 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp > @@ -2343,16 +2343,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , > nir_intrinsic_instr *instr >} > >if (has_indirect) { > - /* Turn the byte offset into a dword offset. */ > - fs_reg base_offset = vgrf(glsl_type::int_type); > - bld.SHR(base_offset, retype(get_nir_src(instr->src[1]), > - BRW_REGISTER_TYPE_D), > - brw_imm_d(2)); > + fs_reg base_offset = retype(get_nir_src(instr->src[1]), > + BRW_REGISTER_TYPE_D); > > - unsigned vec4_offset = instr->const_index[0] / 4; > + unsigned vec4_offset = instr->const_index[0]; > for (int i = 0; i < instr->num_components; i++) > VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index, > - base_offset, vec4_offset + i); > + base_offset, vec4_offset + i * 4); >} else { > fs_reg packed_consts = vgrf(glsl_type::float_type); > packed_consts.type = dest.type; > diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > index f88f8d5..7cb7dd5 100644 > --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > @@ -403,7 +403,7 @@ brw_create_constant_surface(struct brw_context *brw, > uint32_t *out_offset, > bool dword_pitch) > { > - uint32_t stride = dword_pitch ? 4 : 16; > + uint32_t stride = dword_pitch ? 1 : 16; I've thought dword_pitch was a bad name for a long time, but this really seals it. > uint32_t elements = ALIGN(size, stride) / stride; > > brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset, > -- ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/7] i965/fs: Use a stride of 1 and byte offsets for UBOs
--- src/mesa/drivers/dri/i965/brw_fs.cpp | 16 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 11 --- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 2 +- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 777cee5..9e2b1fa2 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -187,7 +187,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder , * the redundant ones. */ fs_reg vec4_offset = vgrf(glsl_type::int_type); - bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~3)); + bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~0xf)); int scale = 1; if (devinfo->gen == 4 && bld.dispatch_width() == 8) { @@ -219,7 +219,7 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder , inst->mlen = 1 + bld.dispatch_width() / 8; } - bld.MOV(dst, offset(vec4_result, bld, (const_offset & 3) * scale)); + bld.MOV(dst, offset(vec4_result, bld, ((const_offset & 0xf) / 4) * scale)); } /** @@ -1999,10 +1999,12 @@ fs_visitor::demote_pull_constants() /* Generate a pull load into dst. */ if (inst->src[i].reladdr) { +fs_reg indirect = ibld.vgrf(BRW_REGISTER_TYPE_D); +ibld.MUL(indirect, *inst->src[i].reladdr, brw_imm_d(4)); VARYING_PULL_CONSTANT_LOAD(ibld, dst, brw_imm_ud(index), - *inst->src[i].reladdr, - pull_index); + indirect, + pull_index * 4); inst->src[i].reladdr = NULL; inst->src[i].stride = 1; } else { @@ -3038,13 +3040,11 @@ fs_visitor::lower_uniform_pull_constant_loads() continue; if (devinfo->gen >= 7) { - /* The offset arg before was a vec4-aligned byte offset. We need to - * turn it into a dword offset. - */ + /* The offset arg is a vec4-aligned immediate byte offset. */ fs_reg const_offset_reg = inst->src[1]; assert(const_offset_reg.file == IMM && const_offset_reg.type == BRW_REGISTER_TYPE_UD); - const_offset_reg.ud /= 4; + assert(const_offset_reg.ud % 16 == 0); fs_reg payload, offset; if (devinfo->gen >= 9) { diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index c439da2..062ae08 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -2343,16 +2343,13 @@ fs_visitor::nir_emit_intrinsic(const fs_builder , nir_intrinsic_instr *instr } if (has_indirect) { - /* Turn the byte offset into a dword offset. */ - fs_reg base_offset = vgrf(glsl_type::int_type); - bld.SHR(base_offset, retype(get_nir_src(instr->src[1]), - BRW_REGISTER_TYPE_D), - brw_imm_d(2)); + fs_reg base_offset = retype(get_nir_src(instr->src[1]), + BRW_REGISTER_TYPE_D); - unsigned vec4_offset = instr->const_index[0] / 4; + unsigned vec4_offset = instr->const_index[0]; for (int i = 0; i < instr->num_components; i++) VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index, - base_offset, vec4_offset + i); + base_offset, vec4_offset + i * 4); } else { fs_reg packed_consts = vgrf(glsl_type::float_type); packed_consts.type = dest.type; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index f88f8d5..7cb7dd5 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -403,7 +403,7 @@ brw_create_constant_surface(struct brw_context *brw, uint32_t *out_offset, bool dword_pitch) { - uint32_t stride = dword_pitch ? 4 : 16; + uint32_t stride = dword_pitch ? 1 : 16; uint32_t elements = ALIGN(size, stride) / stride; brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset, -- 2.5.0.400.gff86faf ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev