Re: [Mesa-dev] [PATCH] i965/fs: Implement SIMD16 dual source blending.

2014-09-18 Thread Iago Toral Quiroga
Hi Jason,

On mié, 2014-09-17 at 11:39 -0700, Jason Ekstrand wrote:
 I haven't tested this yet, just looked it over.  I've got a couple of
 inline comments below.  One general comment though: I'm currently
 working on a bunch of compiler stuff that reworks the way we do FB
 writes.  In particular, it reworks things to use GRF registers instead
 of the MRF.  It probably wouldn't be too bad for me to rebase on top
 of this or to rebase your patch on top of what I'm doing.  I just
 thought I'd warn you about the conflict.

Thanks for the warning, I suppose it should be easy for me to rebase
this patch on top of your work if you happen to land yours sooner.

 On Wed, Sep 17, 2014 at 4:36 AM, Iago Toral Quiroga
 ito...@igalia.com wrote:
 From the SNB PRM, volume 4, part 1, page 193:
 The dual source render target messages only have SIMD8 forms
 due to
 maximum message length limitations. SIMD16 pixel shaders must
 send two of
 these messages to cover all of the pixels. Each message
 contains two colors
 (4 channels each) for each pixel in the message payload.
 
 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82831
 ---
  src/mesa/drivers/dri/i965/brw_eu.h |  1 +
  src/mesa/drivers/dri/i965/brw_eu_emit.c|  3 +-
  src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 14 +++--
  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 41
 +++---
  4 files changed, 45 insertions(+), 14 deletions(-)
 
 I tested this on SandyBridge and IvyBridge. No piglit
 regressions in these
 platforms, but would be nice if someone could test this in
 later platforms too.
 
 I only noticed these two tests for dual source blending in
 piglit though:
 tests/spec/ext_framebuffer_multisample/alpha-to-one-dual-src-blend.cpp
 
 tests/spec/ext_framebuffer_multisample/alpha-to-coverage-dual-src-blend.cpp
 
 The first one fails, in both platforms with and without my
 patch. The second one
 passes in both platforms, with and without my patch.
 
 I also tested this with a seprate test program to verify that
 it worked, at
 least, in a simple case.
 
 diff --git a/src/mesa/drivers/dri/i965/brw_eu.h
 b/src/mesa/drivers/dri/i965/brw_eu.h
 index e6c26e3..5908ba5 100644
 --- a/src/mesa/drivers/dri/i965/brw_eu.h
 +++ b/src/mesa/drivers/dri/i965/brw_eu.h
 @@ -266,6 +266,7 @@ void brw_fb_WRITE(struct brw_compile *p,
unsigned msg_length,
unsigned response_length,
bool eot,
 +  bool last_render_target,
bool header_present);
 
  void brw_SAMPLE(struct brw_compile *p,
 diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c
 b/src/mesa/drivers/dri/i965/brw_eu_emit.c
 index 39f94e9..ffdbe6d 100644
 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
 +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
 @@ -2251,6 +2251,7 @@ void brw_fb_WRITE(struct brw_compile *p,
unsigned msg_length,
unsigned response_length,
bool eot,
 +  bool last_render_target,
bool header_present)
  {
 struct brw_context *brw = p-brw;
 @@ -2290,7 +2291,7 @@ void brw_fb_WRITE(struct brw_compile *p,
 msg_type,
 msg_length,
 header_present,
 -   eot, /* last render target write
 */
 +   last_render_target,
 response_length,
 eot,
 0 /* send_commit_msg */);
 diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
 b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
 index 1bc10f5..a4b84aa 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
 @@ -121,9 +121,12 @@ fs_generator::fire_fb_write(fs_inst
 *inst,
 
 if (inst-opcode == FS_OPCODE_REP_FB_WRITE)
msg_control =
 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
 -   else if (prog_data-dual_src_blend)
 -  msg_control =
 BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
 -   else if (dispatch_width == 16)
 +   else if (prog_data-dual_src_blend) {
 +  if (dispatch_width == 8 || !inst-eot)
 +   

Re: [Mesa-dev] [PATCH 17/20] i965: Make instruction lists local to the bblocks.

2014-09-18 Thread Pohjolainen, Topi
On Tue, Sep 02, 2014 at 09:34:28PM -0700, Matt Turner wrote:
 ---
  src/mesa/drivers/dri/i965/brw_cfg.cpp  | 62 -
  src/mesa/drivers/dri/i965/brw_cfg.h| 77 
 +-
  .../drivers/dri/i965/brw_dead_control_flow.cpp |  6 +-
  src/mesa/drivers/dri/i965/brw_fs.cpp   |  6 +-
  src/mesa/drivers/dri/i965/brw_fs_cse.cpp   |  6 --
  .../dri/i965/brw_fs_peephole_predicated_break.cpp  |  6 +-
  src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp  |  4 +-
  src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp  |  4 +-
  .../drivers/dri/i965/brw_schedule_instructions.cpp | 10 +--
  src/mesa/drivers/dri/i965/brw_shader.cpp   | 14 
  src/mesa/drivers/dri/i965/brw_vec4_cse.cpp |  6 --
  src/mesa/drivers/dri/i965/intel_asm_annotation.c   |  4 +-
  12 files changed, 114 insertions(+), 91 deletions(-)
 
 diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp 
 b/src/mesa/drivers/dri/i965/brw_cfg.cpp
 index 8714b68..44e7744 100644
 --- a/src/mesa/drivers/dri/i965/brw_cfg.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp

I really like this.

Indentation in this file looks weird but it is the current code using
tabs and your changes spaces.

Reviewed-by: Topi Pohjolainen topi.pohjolai...@intel.com

 @@ -54,9 +54,7 @@ bblock_t::bblock_t(cfg_t *cfg) :
 cfg(cfg), start_ip(0), end_ip(0), num(0),
 if_block(NULL), else_block(NULL)
  {
 -   start = NULL;
 -   end = NULL;
 -
 +   instructions.make_empty();
 parents.make_empty();
 children.make_empty();
  }
 @@ -119,8 +117,8 @@ bblock_t::can_combine_with(const bblock_t *that) const
 if ((const bblock_t *)this-link.next != that)
return false;
  
 -   if (ends_block(this-end) ||
 -   starts_block(that-start))
 +   if (ends_block(this-end()) ||
 +   starts_block(that-start()))
return false;
  
 return true;
 @@ -138,8 +136,8 @@ bblock_t::combine_with(bblock_t *that)
 }
  
 this-end_ip = that-end_ip;
 -   this-end = that-end;
 this-else_block = that-else_block;
 +   this-instructions.append_list(that-instructions);
  
 this-cfg-remove_block(that);
  }
 @@ -148,9 +146,7 @@ void
  bblock_t::dump(backend_visitor *v)
  {
 int ip = this-start_ip;
 -   for (backend_instruction *inst = (backend_instruction *)this-start;
 - inst != this-end-next;
 - inst = (backend_instruction *) inst-next) {
 +   foreach_inst_in_block(backend_instruction, inst, this) {
fprintf(stderr, %5d: , ip);
v-dump_instruction(inst);
ip++;
 @@ -178,16 +174,15 @@ cfg_t::cfg_t(exec_list *instructions)
  
 set_next_block(cur, entry, ip);
  
 -   entry-start = (backend_instruction *) instructions-get_head();
 -
 -   foreach_in_list(backend_instruction, inst, instructions) {
 -  cur-end = inst;
 -
 +   foreach_in_list_safe(backend_instruction, inst, instructions) {
/* set_next_block wants the post-incremented ip */
ip++;
  
switch (inst-opcode) {
case BRW_OPCODE_IF:
 + inst-remove();
 + cur-instructions.push_tail(inst);
 +
/* Push our information onto a stack so we can recover from
 * nested ifs.
 */
 @@ -202,44 +197,46 @@ cfg_t::cfg_t(exec_list *instructions)
 * instructions.
 */
next = new_block();
 -  next-start = (backend_instruction *)inst-next;
cur_if-add_successor(mem_ctx, next);
  
set_next_block(cur, next, ip);
break;
  
case BRW_OPCODE_ELSE:
 + inst-remove();
 + cur-instructions.push_tail(inst);
 +
   cur_else = cur;
  
next = new_block();
 -  next-start = (backend_instruction *)inst-next;
cur_if-add_successor(mem_ctx, next);
  
set_next_block(cur, next, ip);
break;
  
case BRW_OPCODE_ENDIF: {
 - if (cur-start == inst) {
 + if (cur-instructions.is_empty()) {
  /* New block was just created; use it. */
  cur_endif = cur;
   } else {
  cur_endif = new_block();
 -cur_endif-start = inst;
  
 -cur-end = (backend_instruction *)inst-prev;
  cur-add_successor(mem_ctx, cur_endif);
  
  set_next_block(cur, cur_endif, ip - 1);
   }
  
 + inst-remove();
 + cur-instructions.push_tail(inst);
 +
   if (cur_else) {
  cur_else-add_successor(mem_ctx, cur_endif);
   } else {
  cur_if-add_successor(mem_ctx, cur_endif);
   }
  
 - assert(cur_if-end-opcode == BRW_OPCODE_IF);
 - assert(!cur_else || cur_else-end-opcode == BRW_OPCODE_ELSE);
 + assert(cur_if-end()-opcode == BRW_OPCODE_IF);
 + assert(!cur_else || cur_else-end()-opcode == BRW_OPCODE_ELSE);
  
   cur_if-if_block = cur_if;
   cur_if-else_block = cur_else;
 @@ -269,25 +266,28 @@ cfg_t::cfg_t(exec_list *instructions)
 */
cur_while = new_block();
  
 

Re: [Mesa-dev] [PATCH 20/20] i965: Add and use functions to get next/prev blocks.

2014-09-18 Thread Pohjolainen, Topi
On Thu, Sep 04, 2014 at 01:26:45PM -0700, Matt Turner wrote:
 On Tue, Sep 2, 2014 at 9:34 PM, Matt Turner matts...@gmail.com wrote:
  diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp 
  b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
  index 557c3ad..8a7f42a 100644
  --- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
  +++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
  @@ -52,20 +52,20 @@ dead_control_flow_eliminate(backend_visitor *v)
continue;
 
 backend_instruction *if_inst = NULL, *else_inst = NULL;
  -  backend_instruction *prev_inst = ((bblock_t 
  *)endif_block-link.prev)-end();
  +  backend_instruction *prev_inst = endif_block-next()-end();
 
 This is obviously supposed to be -prev(), not -next(). Fixed locally.

With that patches 19 and 20 are:

Reviewed-by: Topi Pohjolainen topi.pohjolai...@intel.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 21/37] i965/gen6/gs: Implement support for gl_PrimitiveIdIn.

2014-09-18 Thread Jordan Justen
On Thu, Aug 14, 2014 at 4:11 AM, Iago Toral Quiroga ito...@igalia.com wrote:
 For this we will need to move PrimitiveID information, delivered in the thread
 payload in r0.1, to a separate register (we use GS_OPCODE_SET_PRIMITIVE_ID
 for this), then map the corresponding varying slot to that register in the
 setup_payload() method.

 Notice that we cannot use a virtual register as the destination for the
 PrimitiveID because we need to map all input attributes to hardware registers
 in setup_payload(), which happens before virtual registers are mapped to
 hardware registers. We could work around that issue if we were able to compute
 the first non-payload register in emit_prolog() and move the PrimitiveID
 information to that register, but we can't because at that point we still
 don't know the final number uniforms that will be included in the payload.

 So, what we do is to place PrimitiveID information in r1, which is always
 delivered as part of the payload but its only populated with data
 relevant for transform feedback when we set GEN6_GS_SVBI_PAYLOAD_ENABLE
 in the 3DSTATE_GS state packet.

 When we implement transform feedback, we wil make sure to move the value of r1
 to another register before we overwrite it with the PrimitiveID.
 ---
  src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp | 69 
 ++-
  src/mesa/drivers/dri/i965/gen6_gs_visitor.h   |  2 +
  2 files changed, 70 insertions(+), 1 deletion(-)

 diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp 
 b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
 index 4a440eb..b45c381 100644
 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
 +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
 @@ -31,6 +31,8 @@

  #include gen6_gs_visitor.h

 +const unsigned MAX_GS_INPUT_VERTICES = 6;
 +
  namespace brw {

  void
 @@ -38,6 +40,7 @@ gen6_gs_visitor::emit_prolog()
  {
 vec4_gs_visitor::emit_prolog();

 +   this-current_annotation = gen6 prolog;
 /* Gen6 geometry shaders require to allocate an initial VUE handle via
  * FF_SYNC message, however the documentation remarks that only one thread
  * can write to the URB simultaneously and the FF_SYNC message provides 
 the
 @@ -59,7 +62,6 @@ gen6_gs_visitor::emit_prolog()
  * flags for the next vertex come right after the data items and flags for
  * the previous vertex.
  */
 -   this-current_annotation = gen6 prolog;

Seems like this belongs in
i965/gen6/gs: Add initial implementation for a gen6 geometry shader visitor.
(Or, perhaps just drop the change...)

Patches 21-26 (gs-support-snb-for-submission-02092014)
 i965/gen6/gs: Implement support for gl_PrimitiveIdIn.
 i965/gen6/gs: Assign geometry shader VUE map properly.
 i965/gen6/gs: Enable texture units and upload sampler state.
 i965/gen6/gs: implement GS_OPCODE_SVB_WRITE opcode
 i965/gen6/gs: implement GS_OPCODE_SVB_SET_DST_INDEX opcode
 i965/gen6/gs: implement GS_OPCODE_FF_SYNC_SET_PRIMITIVES opcode
Reviewed-by: Jordan Justen jordan.l.jus...@intel.com

 this-vertex_output = src_reg(this,
   glsl_type::uint_type,
   (prog_data-vue_map.num_slots + 1) *
 @@ -94,6 +96,30 @@ gen6_gs_visitor::emit_prolog()
  */
 this-prim_count = src_reg(this, glsl_type::uint_type);
 emit(MOV(dst_reg(this-prim_count), 0u));
 +
 +   /* PrimitveID is delivered in r0.1 of the thread payload. If the program
 +* needs it we have to move it to a separate register where we can map
 +* the atttribute.
 +*
 +* Notice that we cannot use a virtual register for this, because we need 
 to
 +* map all input attributes to hardware registers in setup_payload(),
 +* which happens before virtual registers are mapped to hardware 
 registers.
 +* We could work around that issue if we were able to compute the first
 +* non-payload register here and move the PrimitiveID information to that
 +* register, but we can't because at this point we don't know the final
 +* number uniforms that will be included in the payload.
 +*
 +* So, what we do is to place PrimitiveID information in r1, which is 
 always
 +* delivered as part of the payload, but its only populated with data
 +* relevant for transform feedback when we set GEN6_GS_SVBI_PAYLOAD_ENABLE
 +* in the 3DSTATE_GS state packet. That information can be obtained by 
 other
 +* means though, so we can safely use r1 for this purpose.
 +*/
 +   if (c-prog_data.include_primitive_id) {
 +  this-primitive_id =
 + src_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
 +  emit(GS_OPCODE_SET_PRIMITIVE_ID, dst_reg(this-primitive_id));
 +   }
  }

  void
 @@ -410,4 +436,45 @@ gen6_gs_visitor::emit_thread_end()
 inst-mlen = 1;
  }

 +void
 +gen6_gs_visitor::setup_payload()
 +{
 +   int attribute_map[BRW_VARYING_SLOT_COUNT * MAX_GS_INPUT_VERTICES];
 +
 +   /* Attributes are going to be interleaved, so one register contains two
 +

Re: [Mesa-dev] [PATCH 27/37] i965/gen6/gs: Add an additional parameter to the FF_SYNC opcode.

2014-09-18 Thread Jordan Justen
On Thu, Aug 14, 2014 at 4:11 AM, Iago Toral Quiroga ito...@igalia.com wrote:
 From: Samuel Iglesias Gonsalvez sigles...@igalia.com

 We will use this parameter in later patches to provide information relevant
 to transform feedback that needs to be set as part of the FF_SYNC message.

 Signed-off-by: Samuel Iglesias Gonsalvez sigles...@igalia.com
 ---
  src/mesa/drivers/dri/i965/brw_defines.h  |  4 
  src/mesa/drivers/dri/i965/brw_vec4.h |  3 ++-
  src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 +---
  src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp|  3 ++-
  4 files changed, 21 insertions(+), 5 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
 b/src/mesa/drivers/dri/i965/brw_defines.h
 index 6e8b998..b0d6d9f 100644
 --- a/src/mesa/drivers/dri/i965/brw_defines.h
 +++ b/src/mesa/drivers/dri/i965/brw_defines.h
 @@ -1030,6 +1030,10 @@ enum opcode {
  *   FF_SYNC operation.
  *
  * - src1 is the number of primitives written.
 +*
 +* - src2 is the value to hold in M0.0: number of SO vertices to write
 +*   and number of SO primitives needed. Its value will be overwritten
 +*   with the SVBI values if transform feedback is enabled.
  */
 GS_OPCODE_FF_SYNC,

 diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
 b/src/mesa/drivers/dri/i965/brw_vec4.h
 index 763cb23..58a5aac 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4.h
 +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
 @@ -679,7 +679,8 @@ private:
 struct brw_reg src2);
 void generate_gs_ff_sync(struct brw_reg dst,
  struct brw_reg src0,
 -struct brw_reg src1);
 +struct brw_reg src1,
 +struct brw_reg src2);
 void generate_gs_set_primitive_id(struct brw_reg dst);
 void generate_oword_dual_block_offsets(struct brw_reg m1,
   struct brw_reg index);
 diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
 b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
 index d4554f5..c69b305 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
 @@ -734,7 +734,8 @@ vec4_generator::generate_gs_ff_sync_set_primitives(struct 
 brw_reg dst,
  void
  vec4_generator::generate_gs_ff_sync(struct brw_reg dst,
  struct brw_reg src0,
 -struct brw_reg src1)
 +struct brw_reg src1,
 +struct brw_reg src2)
  {
 /* We use dst to setup the ff_sync header, so we expect it to be
  * initialized to R0 by the caller. Here we overwrite dword 0 (cleared
 @@ -744,7 +745,7 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst,
 brw_push_insn_state(p);
 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
 brw_set_default_access_mode(p, BRW_ALIGN_1);
 -   brw_MOV(p, get_element_ud(dst, 0), brw_imm_ud(0));
 +   brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src2, 0));
 brw_MOV(p, get_element_ud(dst, 1), get_element_ud(src1, 0));
 brw_set_default_access_mode(p, BRW_ALIGN_16);
 brw_pop_insn_state(p);
 @@ -763,6 +764,15 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst,
 brw_set_default_access_mode(p, BRW_ALIGN_1);
 brw_set_default_mask_control(p, BRW_MASK_DISABLE);
 brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src0, 0));
 +
 +   /* src2 is not an immediate when we use transform feedback */
 +   if (src2.file != BRW_IMMEDIATE_VALUE) {
 +  brw_MOV(p, suboffset(vec1(src2), 0), suboffset(vec1(src0), 1));
 +  brw_MOV(p, suboffset(vec1(src2), 1), suboffset(vec1(src0), 2));
 +  brw_MOV(p, suboffset(vec1(src2), 2), suboffset(vec1(src0), 3));
 +  brw_MOV(p, suboffset(vec1(src2), 3), suboffset(vec1(src0), 4));

Ken and I discussed this a bit. Ken suggested that this:
brw_MOV(p, brw_vec4_grf(src1.nr, 0), brw_vec4_grf(dst.nr, 1));

Should be able to copy all 4 dwords in one instruction. What do you think?

By the way, this was for the version of this patch on the
gs-support-snb-for-submission-02092014 which has src1 as the
destination and dst as the source for the moves. (Hmm, not sure about
the src1 naming in this context...)

If that change seem good, then
Reviewed-by: Jordan Justen jordan.l.jus...@intel.com

 +   }
 +
 brw_set_default_access_mode(p, BRW_ALIGN_16);
 brw_pop_insn_state(p);
  }
 @@ -1374,7 +1384,7 @@ 
 vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
break;

 case GS_OPCODE_FF_SYNC:
 -  generate_gs_ff_sync(dst, src[0], src[1]);
 +  generate_gs_ff_sync(dst, src[0], src[1], src[2]);
break;

 case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
 diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp 
 b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
 index 

[Mesa-dev] [PATCH v2] i965/fs: Implement SIMD16 dual source blending.

2014-09-18 Thread Iago Toral Quiroga
From the SNB PRM, volume 4, part 1, page 193:
The dual source render target messages only have SIMD8 forms due to
maximum message length limitations. SIMD16 pixel shaders must send two of
these messages to cover all of the pixels. Each message contains two colors
(4 channels each) for each pixel in the message payload.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82831
---
 src/mesa/drivers/dri/i965/brw_eu.h |  1 +
 src/mesa/drivers/dri/i965/brw_eu_emit.c|  3 +-
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 14 -
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 79 ++
 4 files changed, 83 insertions(+), 14 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu.h 
b/src/mesa/drivers/dri/i965/brw_eu.h
index e6c26e3..5908ba5 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -266,6 +266,7 @@ void brw_fb_WRITE(struct brw_compile *p,
   unsigned msg_length,
   unsigned response_length,
   bool eot,
+  bool last_render_target,
   bool header_present);
 
 void brw_SAMPLE(struct brw_compile *p,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 39f94e9..ffdbe6d 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2251,6 +2251,7 @@ void brw_fb_WRITE(struct brw_compile *p,
   unsigned msg_length,
   unsigned response_length,
   bool eot,
+  bool last_render_target,
   bool header_present)
 {
struct brw_context *brw = p-brw;
@@ -2290,7 +2291,7 @@ void brw_fb_WRITE(struct brw_compile *p,
msg_type,
msg_length,
header_present,
-   eot, /* last render target write */
+   last_render_target,
response_length,
eot,
0 /* send_commit_msg */);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 1bc10f5..a4b84aa 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -121,9 +121,12 @@ fs_generator::fire_fb_write(fs_inst *inst,
 
if (inst-opcode == FS_OPCODE_REP_FB_WRITE)
   msg_control = 
BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
-   else if (prog_data-dual_src_blend)
-  msg_control = 
BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
-   else if (dispatch_width == 16)
+   else if (prog_data-dual_src_blend) {
+  if (dispatch_width == 8 || !inst-eot)
+ msg_control = 
BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
+  else
+ msg_control = 
BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23;
+   } else if (dispatch_width == 16)
   msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
else
   msg_control = 
BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
@@ -131,6 +134,9 @@ fs_generator::fire_fb_write(fs_inst *inst,
uint32_t surf_index =
   prog_data-binding_table.render_target_start + inst-target;
 
+   bool last_render_target = inst-eot ||
+ (prog_data-dual_src_blend  dispatch_width == 
16);
+
brw_fb_WRITE(p,
 dispatch_width,
 base_reg,
@@ -140,6 +146,7 @@ fs_generator::fire_fb_write(fs_inst *inst,
 nr,
 0,
 inst-eot,
+last_render_target,
 inst-header_present);
 
brw_mark_surface_used(prog_data-base, surf_index);
@@ -254,6 +261,7 @@ fs_generator::generate_blorp_fb_write(fs_inst *inst)
 inst-mlen,
 0,
 true,
+true,
 inst-header_present);
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 2d5318a..99d04c0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -3064,12 +3064,6 @@ fs_visitor::emit_fb_writes()
int reg_width = dispatch_width / 8;
bool src0_alpha_to_render_target = false;
 
-   if (do_dual_src) {
-  no16(GL_ARB_blend_func_extended not yet supported in SIMD16.);
-  if (dispatch_width == 16)
- do_dual_src = false;
-   }
-
/* From the Sandy Bridge PRM, volume 4, page 198:
 *
 * Dispatched Pixel Enables. One bit per pixel indicating
@@ -3109,11 +3103,22 @@ fs_visitor::emit_fb_writes()
   nr += 1;
}
 
-   /* Reserve space for color. It'll be filled in per MRT below. */
+   /* Reserve space for color. It'll be filled in per MRT below.
+*
+* From the SNB PRM, volume 4, part 1, page 

Re: [Mesa-dev] [PATCH] replace file specific compileroptimizationwith inline attibute

2014-09-18 Thread Marc Dietrich
Hello Matt,

Am Sonntag, 14. September 2014, 21:12:57 schrieb Matt Turner:
 On Fri, Sep 12, 2014 at 1:02 AM, Marc Dietrich marvi...@gmx.de wrote:
  Am Donnerstag, 11. September 2014, 08:52:39 schrieb Matt Turner:
  On Thu, Sep 11, 2014 at 6:58 AM, Marc Dietrich marvi...@gmx.de wrote:
   File specific optimization as used for
   src/mesa/main/streaming-load-memcpy.c currently will cause problems with
   LTO in the future
   (see: https://bugs.freedesktop.org/show_bug.cgi?id=83669). Replace it 
   with
   in-file target specification.
 
  This is only available in gcc-4.8 and later.
 
  no, it's there since 4.4 (4.8 has some runtime auto selection for c++ 
  AFAIK).
  See https://gcc.gnu.org/gcc-4.4/changes.html (C family).
 
 Oh, interesting. This sounds like a good plan then. Looks to me like
 gcc-4.3 is the only version that supports -msse4* and doesn't support
 this attribute, and I think everyone would be okay with requiring
 =gcc-4.4 to compile i965_dri.so.
 
 Perhaps we could use this with our code using SSSE3 intrinsics as well.
 
 I'll investigate. Thanks for bringing this up and correcting me about
 gcc's support!
 
  I guess we need a fallback for compilers supporting -msse4.1 (icc only?) and
  not __attribute__ (target).
 
 Meh. Maybe it can inspire other compiler authors to maintain
 compatibility with gcc. But I'm predicting dissent..

are you planing to commit this patch?

Marc


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 21/37] i965/gen6/gs: Implement support for gl_PrimitiveIdIn.

2014-09-18 Thread Iago Toral Quiroga
On jue, 2014-09-18 at 00:30 -0700, Jordan Justen wrote:
 On Thu, Aug 14, 2014 at 4:11 AM, Iago Toral Quiroga ito...@igalia.com wrote:
  For this we will need to move PrimitiveID information, delivered in the 
  thread
  payload in r0.1, to a separate register (we use GS_OPCODE_SET_PRIMITIVE_ID
  for this), then map the corresponding varying slot to that register in the
  setup_payload() method.
 
  Notice that we cannot use a virtual register as the destination for the
  PrimitiveID because we need to map all input attributes to hardware 
  registers
  in setup_payload(), which happens before virtual registers are mapped to
  hardware registers. We could work around that issue if we were able to 
  compute
  the first non-payload register in emit_prolog() and move the PrimitiveID
  information to that register, but we can't because at that point we still
  don't know the final number uniforms that will be included in the payload.
 
  So, what we do is to place PrimitiveID information in r1, which is always
  delivered as part of the payload but its only populated with data
  relevant for transform feedback when we set GEN6_GS_SVBI_PAYLOAD_ENABLE
  in the 3DSTATE_GS state packet.
 
  When we implement transform feedback, we wil make sure to move the value of 
  r1
  to another register before we overwrite it with the PrimitiveID.
  ---
   src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp | 69 
  ++-
   src/mesa/drivers/dri/i965/gen6_gs_visitor.h   |  2 +
   2 files changed, 70 insertions(+), 1 deletion(-)
 
  diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp 
  b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
  index 4a440eb..b45c381 100644
  --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
  +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
  @@ -31,6 +31,8 @@
 
   #include gen6_gs_visitor.h
 
  +const unsigned MAX_GS_INPUT_VERTICES = 6;
  +
   namespace brw {
 
   void
  @@ -38,6 +40,7 @@ gen6_gs_visitor::emit_prolog()
   {
  vec4_gs_visitor::emit_prolog();
 
  +   this-current_annotation = gen6 prolog;
  /* Gen6 geometry shaders require to allocate an initial VUE handle via
   * FF_SYNC message, however the documentation remarks that only one 
  thread
   * can write to the URB simultaneously and the FF_SYNC message provides 
  the
  @@ -59,7 +62,6 @@ gen6_gs_visitor::emit_prolog()
   * flags for the next vertex come right after the data items and flags 
  for
   * the previous vertex.
   */
  -   this-current_annotation = gen6 prolog;
 
 Seems like this belongs in
 i965/gen6/gs: Add initial implementation for a gen6 geometry shader visitor.
 (Or, perhaps just drop the change...)

You are right, I'll fix it.
Iago

 Patches 21-26 (gs-support-snb-for-submission-02092014)
  i965/gen6/gs: Implement support for gl_PrimitiveIdIn.
  i965/gen6/gs: Assign geometry shader VUE map properly.
  i965/gen6/gs: Enable texture units and upload sampler state.
  i965/gen6/gs: implement GS_OPCODE_SVB_WRITE opcode
  i965/gen6/gs: implement GS_OPCODE_SVB_SET_DST_INDEX opcode
  i965/gen6/gs: implement GS_OPCODE_FF_SYNC_SET_PRIMITIVES opcode
 Reviewed-by: Jordan Justen jordan.l.jus...@intel.com
 
  this-vertex_output = src_reg(this,
glsl_type::uint_type,
(prog_data-vue_map.num_slots + 1) *
  @@ -94,6 +96,30 @@ gen6_gs_visitor::emit_prolog()
   */
  this-prim_count = src_reg(this, glsl_type::uint_type);
  emit(MOV(dst_reg(this-prim_count), 0u));
  +
  +   /* PrimitveID is delivered in r0.1 of the thread payload. If the program
  +* needs it we have to move it to a separate register where we can map
  +* the atttribute.
  +*
  +* Notice that we cannot use a virtual register for this, because we 
  need to
  +* map all input attributes to hardware registers in setup_payload(),
  +* which happens before virtual registers are mapped to hardware 
  registers.
  +* We could work around that issue if we were able to compute the first
  +* non-payload register here and move the PrimitiveID information to 
  that
  +* register, but we can't because at this point we don't know the final
  +* number uniforms that will be included in the payload.
  +*
  +* So, what we do is to place PrimitiveID information in r1, which is 
  always
  +* delivered as part of the payload, but its only populated with data
  +* relevant for transform feedback when we set 
  GEN6_GS_SVBI_PAYLOAD_ENABLE
  +* in the 3DSTATE_GS state packet. That information can be obtained by 
  other
  +* means though, so we can safely use r1 for this purpose.
  +*/
  +   if (c-prog_data.include_primitive_id) {
  +  this-primitive_id =
  + src_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
  +  emit(GS_OPCODE_SET_PRIMITIVE_ID, dst_reg(this-primitive_id));
  +   }
   }
 
   void
  @@ -410,4 +436,45 @@ gen6_gs_visitor::emit_thread_end()
  

Re: [Mesa-dev] [PATCH 27/37] i965/gen6/gs: Add an additional parameter to the FF_SYNC opcode.

2014-09-18 Thread Iago Toral Quiroga
On jue, 2014-09-18 at 00:48 -0700, Jordan Justen wrote:
 On Thu, Aug 14, 2014 at 4:11 AM, Iago Toral Quiroga ito...@igalia.com wrote:
  From: Samuel Iglesias Gonsalvez sigles...@igalia.com
 
  We will use this parameter in later patches to provide information relevant
  to transform feedback that needs to be set as part of the FF_SYNC message.
 
  Signed-off-by: Samuel Iglesias Gonsalvez sigles...@igalia.com
  ---
   src/mesa/drivers/dri/i965/brw_defines.h  |  4 
   src/mesa/drivers/dri/i965/brw_vec4.h |  3 ++-
   src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 +---
   src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp|  3 ++-
   4 files changed, 21 insertions(+), 5 deletions(-)
 
  diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
  b/src/mesa/drivers/dri/i965/brw_defines.h
  index 6e8b998..b0d6d9f 100644
  --- a/src/mesa/drivers/dri/i965/brw_defines.h
  +++ b/src/mesa/drivers/dri/i965/brw_defines.h
  @@ -1030,6 +1030,10 @@ enum opcode {
   *   FF_SYNC operation.
   *
   * - src1 is the number of primitives written.
  +*
  +* - src2 is the value to hold in M0.0: number of SO vertices to write
  +*   and number of SO primitives needed. Its value will be overwritten
  +*   with the SVBI values if transform feedback is enabled.
   */
  GS_OPCODE_FF_SYNC,
 
  diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
  b/src/mesa/drivers/dri/i965/brw_vec4.h
  index 763cb23..58a5aac 100644
  --- a/src/mesa/drivers/dri/i965/brw_vec4.h
  +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
  @@ -679,7 +679,8 @@ private:
  struct brw_reg src2);
  void generate_gs_ff_sync(struct brw_reg dst,
   struct brw_reg src0,
  -struct brw_reg src1);
  +struct brw_reg src1,
  +struct brw_reg src2);
  void generate_gs_set_primitive_id(struct brw_reg dst);
  void generate_oword_dual_block_offsets(struct brw_reg m1,
struct brw_reg index);
  diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
  b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
  index d4554f5..c69b305 100644
  --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
  +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
  @@ -734,7 +734,8 @@ 
  vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst,
   void
   vec4_generator::generate_gs_ff_sync(struct brw_reg dst,
   struct brw_reg src0,
  -struct brw_reg src1)
  +struct brw_reg src1,
  +struct brw_reg src2)
   {
  /* We use dst to setup the ff_sync header, so we expect it to be
   * initialized to R0 by the caller. Here we overwrite dword 0 (cleared
  @@ -744,7 +745,7 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst,
  brw_push_insn_state(p);
  brw_set_default_mask_control(p, BRW_MASK_DISABLE);
  brw_set_default_access_mode(p, BRW_ALIGN_1);
  -   brw_MOV(p, get_element_ud(dst, 0), brw_imm_ud(0));
  +   brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src2, 0));
  brw_MOV(p, get_element_ud(dst, 1), get_element_ud(src1, 0));
  brw_set_default_access_mode(p, BRW_ALIGN_16);
  brw_pop_insn_state(p);
  @@ -763,6 +764,15 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst,
  brw_set_default_access_mode(p, BRW_ALIGN_1);
  brw_set_default_mask_control(p, BRW_MASK_DISABLE);
  brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src0, 0));
  +
  +   /* src2 is not an immediate when we use transform feedback */
  +   if (src2.file != BRW_IMMEDIATE_VALUE) {
  +  brw_MOV(p, suboffset(vec1(src2), 0), suboffset(vec1(src0), 1));
  +  brw_MOV(p, suboffset(vec1(src2), 1), suboffset(vec1(src0), 2));
  +  brw_MOV(p, suboffset(vec1(src2), 2), suboffset(vec1(src0), 3));
  +  brw_MOV(p, suboffset(vec1(src2), 3), suboffset(vec1(src0), 4));
 
 Ken and I discussed this a bit. Ken suggested that this:
 brw_MOV(p, brw_vec4_grf(src1.nr, 0), brw_vec4_grf(dst.nr, 1));
 
 Should be able to copy all 4 dwords in one instruction. What do you think?

Sure, if we can do this in just on MOV that is better. I'll give it a
try.

 By the way, this was for the version of this patch on the
 gs-support-snb-for-submission-02092014 which has src1 as the
 destination and dst as the source for the moves. (Hmm, not sure about
 the src1 naming in this context...)

Yes, this is used as both a src and a dst... and I supposed Samuel
decided to follow naming conventions for other opcodes that have a dst
and multiple src parameters. I suppose the best way to do this would
have been to create a separate generator opcode for the part where this
is used as a destination register only...

Iago

 If that change seem good, then
 Reviewed-by: Jordan Justen 

Re: [Mesa-dev] [PATCH 27/37] i965/gen6/gs: Add an additional parameter to the FF_SYNC opcode.

2014-09-18 Thread Samuel Iglesias Gonsálvez
On Thu, 2014-09-18 at 10:39 +0200, Iago Toral Quiroga wrote:
 On jue, 2014-09-18 at 00:48 -0700, Jordan Justen wrote:
  On Thu, Aug 14, 2014 at 4:11 AM, Iago Toral Quiroga ito...@igalia.com 
  wrote:
   From: Samuel Iglesias Gonsalvez sigles...@igalia.com
  
   We will use this parameter in later patches to provide information 
   relevant
   to transform feedback that needs to be set as part of the FF_SYNC message.
  
   Signed-off-by: Samuel Iglesias Gonsalvez sigles...@igalia.com
   ---
src/mesa/drivers/dri/i965/brw_defines.h  |  4 
src/mesa/drivers/dri/i965/brw_vec4.h |  3 ++-
src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 +---
src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp|  3 ++-
4 files changed, 21 insertions(+), 5 deletions(-)
  
   diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
   b/src/mesa/drivers/dri/i965/brw_defines.h
   index 6e8b998..b0d6d9f 100644
   --- a/src/mesa/drivers/dri/i965/brw_defines.h
   +++ b/src/mesa/drivers/dri/i965/brw_defines.h
   @@ -1030,6 +1030,10 @@ enum opcode {
*   FF_SYNC operation.
*
* - src1 is the number of primitives written.
   +*
   +* - src2 is the value to hold in M0.0: number of SO vertices to write
   +*   and number of SO primitives needed. Its value will be overwritten
   +*   with the SVBI values if transform feedback is enabled.
*/
   GS_OPCODE_FF_SYNC,
  
   diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
   b/src/mesa/drivers/dri/i965/brw_vec4.h
   index 763cb23..58a5aac 100644
   --- a/src/mesa/drivers/dri/i965/brw_vec4.h
   +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
   @@ -679,7 +679,8 @@ private:
   struct brw_reg src2);
   void generate_gs_ff_sync(struct brw_reg dst,
struct brw_reg src0,
   -struct brw_reg src1);
   +struct brw_reg src1,
   +struct brw_reg src2);
   void generate_gs_set_primitive_id(struct brw_reg dst);
   void generate_oword_dual_block_offsets(struct brw_reg m1,
 struct brw_reg index);
   diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
   b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
   index d4554f5..c69b305 100644
   --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
   +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
   @@ -734,7 +734,8 @@ 
   vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst,
void
vec4_generator::generate_gs_ff_sync(struct brw_reg dst,
struct brw_reg src0,
   -struct brw_reg src1)
   +struct brw_reg src1,
   +struct brw_reg src2)
{
   /* We use dst to setup the ff_sync header, so we expect it to be
* initialized to R0 by the caller. Here we overwrite dword 0 (cleared
   @@ -744,7 +745,7 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg 
   dst,
   brw_push_insn_state(p);
   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
   brw_set_default_access_mode(p, BRW_ALIGN_1);
   -   brw_MOV(p, get_element_ud(dst, 0), brw_imm_ud(0));
   +   brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src2, 0));
   brw_MOV(p, get_element_ud(dst, 1), get_element_ud(src1, 0));
   brw_set_default_access_mode(p, BRW_ALIGN_16);
   brw_pop_insn_state(p);
   @@ -763,6 +764,15 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg 
   dst,
   brw_set_default_access_mode(p, BRW_ALIGN_1);
   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
   brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src0, 0));
   +
   +   /* src2 is not an immediate when we use transform feedback */
   +   if (src2.file != BRW_IMMEDIATE_VALUE) {
   +  brw_MOV(p, suboffset(vec1(src2), 0), suboffset(vec1(src0), 1));
   +  brw_MOV(p, suboffset(vec1(src2), 1), suboffset(vec1(src0), 2));
   +  brw_MOV(p, suboffset(vec1(src2), 2), suboffset(vec1(src0), 3));
   +  brw_MOV(p, suboffset(vec1(src2), 3), suboffset(vec1(src0), 4));
  
  Ken and I discussed this a bit. Ken suggested that this:
  brw_MOV(p, brw_vec4_grf(src1.nr, 0), brw_vec4_grf(dst.nr, 1));
  
  Should be able to copy all 4 dwords in one instruction. What do you think?
 
 Sure, if we can do this in just on MOV that is better. I'll give it a
 try.
 

Piglit shows no regressions with this change, it works like a charm!

We will add it to the commit.

Thanks,

Sam


signature.asc
Description: This is a digitally signed message part
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/11] glsl: Check realloc return value in ir_function::matching_signature()

2014-09-18 Thread Juha-Pekka Heikkila
On 10.09.2014 00:59, Anuj Phogat wrote:
 On Mon, Sep 8, 2014 at 11:53 PM, Juha-Pekka Heikkila
 juhapekka.heikk...@gmail.com wrote:
 Signed-off-by: Juha-Pekka Heikkila juhapekka.heikk...@gmail.com
 ---
  src/glsl/ir_function.cpp | 11 +--
  1 file changed, 9 insertions(+), 2 deletions(-)

 diff --git a/src/glsl/ir_function.cpp b/src/glsl/ir_function.cpp
 index 98bec45..2b2643c 100644
 --- a/src/glsl/ir_function.cpp
 +++ b/src/glsl/ir_function.cpp
 @@ -24,6 +24,7 @@
  #include glsl_types.h
  #include ir.h
  #include glsl_parser_extras.h
 +#include main/errors.h

  typedef enum {
 PARAMETER_LIST_NO_MATCH,
 @@ -296,6 +297,7 @@ ir_function::matching_signature(_mesa_glsl_parse_state 
 *state,
  bool *is_exact)
  {
 ir_function_signature **inexact_matches = NULL;
 +   ir_function_signature **inexact_matches_temp;
 ir_function_signature *match = NULL;
 int num_inexact_matches = 0;

 @@ -321,11 +323,16 @@ ir_function::matching_signature(_mesa_glsl_parse_state 
 *state,
   free(inexact_matches);
   return sig;
case PARAMETER_LIST_INEXACT_MATCH:
 - inexact_matches = (ir_function_signature **)
 + inexact_matches_temp = (ir_function_signature **)
 realloc(inexact_matches,
 sizeof(*inexact_matches) *
 (num_inexact_matches + 1));
 - assert(inexact_matches);
 + if (inexact_matches_temp == NULL) {
 +_mesa_error_no_memory(__func__);
 +free(inexact_matches);
 This free is not required. inexact_matches is null.

Why is inexact matches null? This reallocation is inside
foreach_in_list{..} and the amount of inexact matches is counted with
num_inexact_matches variable. If we're not getting the null from realloc
on the first run inexact_matches would have valid pointer.

 +return NULL;
 + }
 + inexact_matches = inexact_matches_temp;
   inexact_matches[num_inexact_matches++] = sig;
   continue;
case PARAMETER_LIST_NO_MATCH:
 --
 1.8.5.1

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/fs: Implement SIMD16 dual source blending.

2014-09-18 Thread Iago Toral Quiroga
On jue, 2014-09-18 at 08:08 +0200, Iago Toral Quiroga wrote:
 Hi Jason,
 
 On mié, 2014-09-17 at 11:39 -0700, Jason Ekstrand wrote:
  I haven't tested this yet, just looked it over.  I've got a couple of
  inline comments below.  One general comment though: I'm currently
  working on a bunch of compiler stuff that reworks the way we do FB
  writes.  In particular, it reworks things to use GRF registers instead
  of the MRF.  It probably wouldn't be too bad for me to rebase on top
  of this or to rebase your patch on top of what I'm doing.  I just
  thought I'd warn you about the conflict.
 
 Thanks for the warning, I suppose it should be easy for me to rebase
 this patch on top of your work if you happen to land yours sooner.
 
  On Wed, Sep 17, 2014 at 4:36 AM, Iago Toral Quiroga
  ito...@igalia.com wrote:
  From the SNB PRM, volume 4, part 1, page 193:
  The dual source render target messages only have SIMD8 forms
  due to
  maximum message length limitations. SIMD16 pixel shaders must
  send two of
  these messages to cover all of the pixels. Each message
  contains two colors
  (4 channels each) for each pixel in the message payload.
  
  Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82831
  ---
   src/mesa/drivers/dri/i965/brw_eu.h |  1 +
   src/mesa/drivers/dri/i965/brw_eu_emit.c|  3 +-
   src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 14 +++--
   src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 41
  +++---
   4 files changed, 45 insertions(+), 14 deletions(-)
  
  I tested this on SandyBridge and IvyBridge. No piglit
  regressions in these
  platforms, but would be nice if someone could test this in
  later platforms too.
  
  I only noticed these two tests for dual source blending in
  piglit though:
  
  tests/spec/ext_framebuffer_multisample/alpha-to-one-dual-src-blend.cpp
  
  tests/spec/ext_framebuffer_multisample/alpha-to-coverage-dual-src-blend.cpp
  
  The first one fails, in both platforms with and without my
  patch. The second one
  passes in both platforms, with and without my patch.
  
  I also tested this with a seprate test program to verify that
  it worked, at
  least, in a simple case.
  
  diff --git a/src/mesa/drivers/dri/i965/brw_eu.h
  b/src/mesa/drivers/dri/i965/brw_eu.h
  index e6c26e3..5908ba5 100644
  --- a/src/mesa/drivers/dri/i965/brw_eu.h
  +++ b/src/mesa/drivers/dri/i965/brw_eu.h
  @@ -266,6 +266,7 @@ void brw_fb_WRITE(struct brw_compile *p,
 unsigned msg_length,
 unsigned response_length,
 bool eot,
  +  bool last_render_target,
 bool header_present);
  
   void brw_SAMPLE(struct brw_compile *p,
  diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c
  b/src/mesa/drivers/dri/i965/brw_eu_emit.c
  index 39f94e9..ffdbe6d 100644
  --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
  +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
  @@ -2251,6 +2251,7 @@ void brw_fb_WRITE(struct brw_compile *p,
 unsigned msg_length,
 unsigned response_length,
 bool eot,
  +  bool last_render_target,
 bool header_present)
   {
  struct brw_context *brw = p-brw;
  @@ -2290,7 +2291,7 @@ void brw_fb_WRITE(struct brw_compile *p,
  msg_type,
  msg_length,
  header_present,
  -   eot, /* last render target write
  */
  +   last_render_target,
  response_length,
  eot,
  0 /* send_commit_msg */);
  diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
  b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
  index 1bc10f5..a4b84aa 100644
  --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
  +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
  @@ -121,9 +121,12 @@ fs_generator::fire_fb_write(fs_inst
  *inst,
  
  if (inst-opcode == FS_OPCODE_REP_FB_WRITE)
 msg_control =
  BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
  -   else if (prog_data-dual_src_blend)
  -  msg_control =
  

[Mesa-dev] [PATCH 2/3] radeon/winsys: keep track of the last CS a BO was used in

2014-09-18 Thread Christian König
From: Christian König christian.koe...@amd.com

Signed-off-by: Christian König christian.koe...@amd.com
---
 src/gallium/winsys/radeon/drm/radeon_drm_bo.h |  3 +++
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 11 +--
 src/gallium/winsys/radeon/drm/radeon_drm_cs.h |  2 +-
 3 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h 
b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
index 1c00a13..393c53c 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
@@ -65,6 +65,9 @@ struct radeon_bo {
 /* how many command streams, which are being emitted in a separate
  * thread, is this bo referenced in? */
 int num_active_ioctls;
+
+/* the ID of the last command submission this buffer was used with */
+uint64_t last_cs_id;
 };
 
 struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index 0aa54c2..e821b6f 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -107,7 +107,7 @@ static boolean radeon_init_cs_context(struct 
radeon_cs_context *csc,
 csc-chunks[1].length_dw = 0;
 csc-chunks[1].chunk_data = (uint64_t)(uintptr_t)csc-relocs;
 csc-chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
-csc-chunks[2].length_dw = 2;
+csc-chunks[2].length_dw = 5;
 csc-chunks[2].chunk_data = (uint64_t)(uintptr_t)csc-flags;
 
 csc-chunk_array[0] = (uint64_t)(uintptr_t)csc-chunks[0];
@@ -382,6 +382,7 @@ static boolean radeon_drm_cs_memory_below_limit(struct 
radeon_winsys_cs *rcs, ui
 
 void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct 
radeon_cs_context *csc)
 {
+uint64_t id;
 unsigned i;
 
 if (drmCommandWriteRead(csc-fd, DRM_RADEON_CS,
@@ -403,8 +404,11 @@ void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs 
*cs, struct radeon_cs
 radeon_dump_cs_on_lockup(cs, csc);
 }
 
-for (i = 0; i  csc-crelocs; i++)
+id = *((uint64_t *)csc-flags[3]);
+for (i = 0; i  csc-crelocs; i++) {
 p_atomic_dec(csc-relocs_bo[i]-num_active_ioctls);
+csc-relocs_bo[i]-last_cs_id = id;
+}
 
 radeon_cs_context_cleanup(csc);
 }
@@ -533,6 +537,9 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs 
*rcs,
 }
 break;
 }
+cs-cst-flags[2] = 0;
+cs-cst-flags[3] = 0;
+cs-cst-flags[4] = 0;
 
 if (cs-ws-thread) {
 pipe_semaphore_wait(cs-flush_completed);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h 
b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
index 089494e..1d0bc64 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
@@ -37,7 +37,7 @@ struct radeon_cs_context {
 struct drm_radeon_cscs;
 struct drm_radeon_cs_chunk  chunks[3];
 uint64_tchunk_array[3];
-uint32_tflags[2];
+uint32_tflags[5];
 
 uint32_tcs_trace_id;
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] radeon/winsys: always send the INFO chunk

2014-09-18 Thread Christian König
From: Christian König christian.koe...@amd.com

Old kernels that don't know the chunk should simply ignore it.

Signed-off-by: Christian König christian.koe...@amd.com
---
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 14 ++
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index ecf8957..0aa54c2 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -115,6 +115,7 @@ static boolean radeon_init_cs_context(struct 
radeon_cs_context *csc,
 csc-chunk_array[2] = (uint64_t)(uintptr_t)csc-chunks[2];
 
 csc-cs.chunks = (uint64_t)(uintptr_t)csc-chunk_array;
+csc-cs.num_chunks = 3;
 
 for (i = 0; i  Elements(csc-reloc_indices_hashlist); i++) {
 csc-reloc_indices_hashlist[i] = -1;
@@ -498,48 +499,37 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs 
*rcs,
 p_atomic_inc(cs-cst-relocs_bo[i]-num_active_ioctls);
 }
 
+cs-cst-flags[0] = 0;
 switch (cs-base.ring_type) {
 case RING_DMA:
-cs-cst-flags[0] = 0;
 cs-cst-flags[1] = RADEON_CS_RING_DMA;
-cs-cst-cs.num_chunks = 3;
 if (cs-ws-info.r600_virtual_address) {
 cs-cst-flags[0] |= RADEON_CS_USE_VM;
 }
 break;
 
 case RING_UVD:
-cs-cst-flags[0] = 0;
 cs-cst-flags[1] = RADEON_CS_RING_UVD;
-cs-cst-cs.num_chunks = 3;
 break;
 
 case RING_VCE:
-cs-cst-flags[0] = 0;
 cs-cst-flags[1] = RADEON_CS_RING_VCE;
-cs-cst-cs.num_chunks = 3;
 break;
 
 default:
 case RING_GFX:
-cs-cst-flags[0] = 0;
 cs-cst-flags[1] = RADEON_CS_RING_GFX;
-cs-cst-cs.num_chunks = 2;
 if (flags  RADEON_FLUSH_KEEP_TILING_FLAGS) {
 cs-cst-flags[0] |= RADEON_CS_KEEP_TILING_FLAGS;
-cs-cst-cs.num_chunks = 3;
 }
 if (cs-ws-info.r600_virtual_address) {
 cs-cst-flags[0] |= RADEON_CS_USE_VM;
-cs-cst-cs.num_chunks = 3;
 }
 if (flags  RADEON_FLUSH_END_OF_FRAME) {
 cs-cst-flags[0] |= RADEON_CS_END_OF_FRAME;
-cs-cst-cs.num_chunks = 3;
 }
 if (flags  RADEON_FLUSH_COMPUTE) {
 cs-cst-flags[1] = RADEON_CS_RING_COMPUTE;
-cs-cst-cs.num_chunks = 3;
 }
 break;
 }
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] radeon/winsys: explicitly sync BOs

2014-09-18 Thread Christian König
From: Christian König christian.koe...@amd.com

For now syncs all engines accessing a BO using the
new kernel interface, older kernels should ignore
the new chunk and maintain the old behavior.

Signed-off-by: Christian König christian.koe...@amd.com
---
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 30 ---
 src/gallium/winsys/radeon/drm/radeon_drm_cs.h |  5 +++--
 2 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index e821b6f..587719b 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -72,6 +72,9 @@
 #include stdint.h
 #include xf86drm.h
 
+#ifndef RADEON_CHUNK_ID_WAIT_FOR
+#define RADEON_CHUNK_ID_WAIT_FOR 0x05
+#endif
 
 #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
 
@@ -100,6 +103,13 @@ static boolean radeon_init_cs_context(struct 
radeon_cs_context *csc,
 return FALSE;
 }
 
+csc-cs_ids = CALLOC(csc-nrelocs, sizeof(uint64_t));
+if (!csc-cs_ids) {
+FREE(csc-relocs_bo);
+FREE(csc-relocs);
+return FALSE;
+}
+
 csc-chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
 csc-chunks[0].length_dw = 0;
 csc-chunks[0].chunk_data = (uint64_t)(uintptr_t)csc-buf;
@@ -109,13 +119,17 @@ static boolean radeon_init_cs_context(struct 
radeon_cs_context *csc,
 csc-chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
 csc-chunks[2].length_dw = 5;
 csc-chunks[2].chunk_data = (uint64_t)(uintptr_t)csc-flags;
+csc-chunks[3].chunk_id = RADEON_CHUNK_ID_WAIT_FOR;
+csc-chunks[3].length_dw = 0;
+csc-chunks[3].chunk_data = (uint64_t)(uintptr_t)csc-cs_ids;
 
 csc-chunk_array[0] = (uint64_t)(uintptr_t)csc-chunks[0];
 csc-chunk_array[1] = (uint64_t)(uintptr_t)csc-chunks[1];
 csc-chunk_array[2] = (uint64_t)(uintptr_t)csc-chunks[2];
+csc-chunk_array[3] = (uint64_t)(uintptr_t)csc-chunks[3];
 
 csc-cs.chunks = (uint64_t)(uintptr_t)csc-chunk_array;
-csc-cs.num_chunks = 3;
+csc-cs.num_chunks = 4;
 
 for (i = 0; i  Elements(csc-reloc_indices_hashlist); i++) {
 csc-reloc_indices_hashlist[i] = -1;
@@ -285,8 +299,11 @@ static unsigned radeon_add_reloc(struct radeon_drm_cs *cs,
 
 size = csc-nrelocs * sizeof(struct drm_radeon_cs_reloc);
 csc-relocs = realloc(csc-relocs, size);
-
 csc-chunks[1].chunk_data = (uint64_t)(uintptr_t)csc-relocs;
+
+size = csc-nrelocs * sizeof(uint64_t);
+csc-cs_ids = realloc(csc-cs_ids, size);
+csc-chunks[3].chunk_data = (uint64_t)(uintptr_t)csc-cs_ids;
 }
 
 /* Initialize the new relocation. */
@@ -383,7 +400,14 @@ static boolean radeon_drm_cs_memory_below_limit(struct 
radeon_winsys_cs *rcs, ui
 void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct 
radeon_cs_context *csc)
 {
 uint64_t id;
-unsigned i;
+unsigned i, c;
+
+for (i = 0, c = 0; i  csc-crelocs; i++) {
+id = csc-relocs_bo[i]-last_cs_id;
+if (id)
+csc-cs_ids[c++] = id;
+}
+csc-chunks[3].length_dw = c * 2;
 
 if (drmCommandWriteRead(csc-fd, DRM_RADEON_CS,
 csc-cs, sizeof(struct drm_radeon_cs))) {
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h 
b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
index 1d0bc64..f903b5d 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
@@ -35,8 +35,8 @@ struct radeon_cs_context {
 
 int fd;
 struct drm_radeon_cscs;
-struct drm_radeon_cs_chunk  chunks[3];
-uint64_tchunk_array[3];
+struct drm_radeon_cs_chunk  chunks[4];
+uint64_tchunk_array[4];
 uint32_tflags[5];
 
 uint32_tcs_trace_id;
@@ -47,6 +47,7 @@ struct radeon_cs_context {
 unsignedvalidated_crelocs;
 struct radeon_bo**relocs_bo;
 struct drm_radeon_cs_reloc  *relocs;
+uint64_t*cs_ids;
 
 int reloc_indices_hashlist[512];
 
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] target-helpers: add inline qualifier on configuration_query()

2014-09-18 Thread Brian Paul
To silence unused function warnings.
---
 src/gallium/auxiliary/target-helpers/inline_drm_helper.h |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h 
b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
index dd55a71..9ca7a4a 100644
--- a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
@@ -408,7 +408,7 @@ static const struct drm_conf_ret share_fd_ret = {
{true},
 };
 
-static const struct drm_conf_ret *
+static inline const struct drm_conf_ret *
 configuration_query(enum drm_conf conf)
 {
switch (conf) {
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] st/xa: silence unused variable warning

2014-09-18 Thread Brian Paul
---
 src/gallium/state_trackers/xa/xa_tracker.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/state_trackers/xa/xa_tracker.c 
b/src/gallium/state_trackers/xa/xa_tracker.c
index 268d56b..f69ac8e 100644
--- a/src/gallium/state_trackers/xa/xa_tracker.c
+++ b/src/gallium/state_trackers/xa/xa_tracker.c
@@ -148,6 +148,7 @@ xa_tracker_create(int drm_fd)
 
 #if GALLIUM_STATIC_TARGETS
 xa-screen = dd_create_screen(drm_fd);
+(void) loader_fd; /* silence unused var warning */
 #else
 loader_fd = dup(drm_fd);
 if (loader_fd == -1)
-- 
1.7.10.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/11] glsl: Check realloc return value in ir_function::matching_signature()

2014-09-18 Thread Anuj Phogat
On Thu, Sep 18, 2014 at 3:26 AM, Juha-Pekka Heikkila
juhapekka.heikk...@gmail.com wrote:
 On 10.09.2014 00:59, Anuj Phogat wrote:
 On Mon, Sep 8, 2014 at 11:53 PM, Juha-Pekka Heikkila
 juhapekka.heikk...@gmail.com wrote:
 Signed-off-by: Juha-Pekka Heikkila juhapekka.heikk...@gmail.com
 ---
  src/glsl/ir_function.cpp | 11 +--
  1 file changed, 9 insertions(+), 2 deletions(-)

 diff --git a/src/glsl/ir_function.cpp b/src/glsl/ir_function.cpp
 index 98bec45..2b2643c 100644
 --- a/src/glsl/ir_function.cpp
 +++ b/src/glsl/ir_function.cpp
 @@ -24,6 +24,7 @@
  #include glsl_types.h
  #include ir.h
  #include glsl_parser_extras.h
 +#include main/errors.h

  typedef enum {
 PARAMETER_LIST_NO_MATCH,
 @@ -296,6 +297,7 @@ ir_function::matching_signature(_mesa_glsl_parse_state 
 *state,
  bool *is_exact)
  {
 ir_function_signature **inexact_matches = NULL;
 +   ir_function_signature **inexact_matches_temp;
 ir_function_signature *match = NULL;
 int num_inexact_matches = 0;

 @@ -321,11 +323,16 @@ 
 ir_function::matching_signature(_mesa_glsl_parse_state *state,
   free(inexact_matches);
   return sig;
case PARAMETER_LIST_INEXACT_MATCH:
 - inexact_matches = (ir_function_signature **)
 + inexact_matches_temp = (ir_function_signature **)
 realloc(inexact_matches,
 sizeof(*inexact_matches) *
 (num_inexact_matches + 1));
 - assert(inexact_matches);
 + if (inexact_matches_temp == NULL) {
 +_mesa_error_no_memory(__func__);
 +free(inexact_matches);
 This free is not required. inexact_matches is null.

 Why is inexact matches null? This reallocation is inside
 foreach_in_list{..} and the amount of inexact matches is counted with
 num_inexact_matches variable. If we're not getting the null from realloc
 on the first run inexact_matches would have valid pointer.

Right. Ignore my comment. I'm fine with this patch.
 +return NULL;
 + }
 + inexact_matches = inexact_matches_temp;
   inexact_matches[num_inexact_matches++] = sig;
   continue;
case PARAMETER_LIST_NO_MATCH:
 --
 1.8.5.1

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] radeon/winsys: keep track of the last CS a BO was used in

2014-09-18 Thread Marek Olšák
On Thu, Sep 18, 2014 at 5:34 PM, Christian König
deathsim...@vodafone.de wrote:
 From: Christian König christian.koe...@amd.com

 Signed-off-by: Christian König christian.koe...@amd.com
 ---
  src/gallium/winsys/radeon/drm/radeon_drm_bo.h |  3 +++
  src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 11 +--
  src/gallium/winsys/radeon/drm/radeon_drm_cs.h |  2 +-
  3 files changed, 13 insertions(+), 3 deletions(-)

 diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h 
 b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
 index 1c00a13..393c53c 100644
 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
 +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
 @@ -65,6 +65,9 @@ struct radeon_bo {
  /* how many command streams, which are being emitted in a separate
   * thread, is this bo referenced in? */
  int num_active_ioctls;
 +
 +/* the ID of the last command submission this buffer was used with */
 +uint64_t last_cs_id;
  };

  struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws);
 diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c 
 b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
 index 0aa54c2..e821b6f 100644
 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
 +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
 @@ -107,7 +107,7 @@ static boolean radeon_init_cs_context(struct 
 radeon_cs_context *csc,
  csc-chunks[1].length_dw = 0;
  csc-chunks[1].chunk_data = (uint64_t)(uintptr_t)csc-relocs;
  csc-chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS;
 -csc-chunks[2].length_dw = 2;
 +csc-chunks[2].length_dw = 5;
  csc-chunks[2].chunk_data = (uint64_t)(uintptr_t)csc-flags;

  csc-chunk_array[0] = (uint64_t)(uintptr_t)csc-chunks[0];
 @@ -382,6 +382,7 @@ static boolean radeon_drm_cs_memory_below_limit(struct 
 radeon_winsys_cs *rcs, ui

  void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct 
 radeon_cs_context *csc)
  {
 +uint64_t id;
  unsigned i;

  if (drmCommandWriteRead(csc-fd, DRM_RADEON_CS,
 @@ -403,8 +404,11 @@ void radeon_drm_cs_emit_ioctl_oneshot(struct 
 radeon_drm_cs *cs, struct radeon_cs
  radeon_dump_cs_on_lockup(cs, csc);
  }

 -for (i = 0; i  csc-crelocs; i++)
 +id = *((uint64_t *)csc-flags[3]);

Please add a comment here that the ID is returned by the CS ioctl.

Other than that, the series is:

Reviewed-by: Marek Olšák marek.ol...@amd.com

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 28/37] i965/gen6/gs: implement transform feedback support in gen6_gs_visitor

2014-09-18 Thread Jordan Justen
On Thu, Aug 14, 2014 at 4:12 AM, Iago Toral Quiroga ito...@igalia.com wrote:
 From: Samuel Iglesias Gonsalvez sigles...@igalia.com

 This takes care of generating code required to handle transform feedback.
 Notice that transform feedback isn't enabled yet, since that requires
 additional setups in other parts of the code that will come in later patches.

 Signed-off-by: Samuel Iglesias Gonsalvez sigles...@igalia.com
 ---
  src/mesa/drivers/dri/i965/brw_context.h   | 113 ++
  src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp | 309 
 +-
  src/mesa/drivers/dri/i965/gen6_gs_visitor.h   |  14 ++
  3 files changed, 391 insertions(+), 45 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
 b/src/mesa/drivers/dri/i965/brw_context.h
 index 7439da1..3418b76 100644
 --- a/src/mesa/drivers/dri/i965/brw_context.h
 +++ b/src/mesa/drivers/dri/i965/brw_context.h
 @@ -553,48 +553,6 @@ struct brw_vs_prog_data {
 bool uses_vertexid;
  };

 -
 -/* Note: brw_gs_prog_data_compare() must be updated when adding fields to
 - * this struct!
 - */
 -struct brw_gs_prog_data
 -{
 -   struct brw_vec4_prog_data base;
 -
 -   /**
 -* Size of an output vertex, measured in HWORDS (32 bytes).
 -*/
 -   unsigned output_vertex_size_hwords;
 -
 -   unsigned output_topology;
 -
 -   /**
 -* Size of the control data (cut bits or StreamID bits), in hwords (32
 -* bytes).  0 if there is no control data.
 -*/
 -   unsigned control_data_header_size_hwords;
 -
 -   /**
 -* Format of the control data (either 
 GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID
 -* if the control data is StreamID bits, or
 -* GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits).
 -* Ignored if control_data_header_size is 0.
 -*/
 -   unsigned control_data_format;
 -
 -   bool include_primitive_id;
 -
 -   int invocations;
 -
 -   /**
 -* Dispatch mode, can be any of:
 -* GEN7_GS_DISPATCH_MODE_DUAL_OBJECT
 -* GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE
 -* GEN7_GS_DISPATCH_MODE_SINGLE
 -*/
 -   int dispatch_mode;
 -};
 -
  /** Number of texture sampler units */
  #define BRW_MAX_TEX_UNIT 32

 @@ -641,6 +599,77 @@ struct brw_gs_prog_data
  #define SURF_INDEX_GEN6_SOL_BINDING(t) (t)
  #define BRW_MAX_GEN6_GS_SURFACES   
 SURF_INDEX_GEN6_SOL_BINDING(BRW_MAX_SOL_BINDINGS)

 +/* Note: brw_gs_prog_data_compare() must be updated when adding fields to
 + * this struct!
 + */
 +struct brw_gs_prog_data
 +{
 +   struct brw_vec4_prog_data base;
 +
 +   /**
 +* Size of an output vertex, measured in HWORDS (32 bytes).
 +*/
 +   unsigned output_vertex_size_hwords;
 +
 +   unsigned output_topology;
 +
 +   /**
 +* Size of the control data (cut bits or StreamID bits), in hwords (32
 +* bytes).  0 if there is no control data.
 +*/
 +   unsigned control_data_header_size_hwords;
 +
 +   /**
 +* Format of the control data (either 
 GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID
 +* if the control data is StreamID bits, or
 +* GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits).
 +* Ignored if control_data_header_size is 0.
 +*/
 +   unsigned control_data_format;
 +
 +   bool include_primitive_id;
 +
 +   int invocations;
 +
 +   /**
 +* Dispatch mode, can be any of:
 +* GEN7_GS_DISPATCH_MODE_DUAL_OBJECT
 +* GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE
 +* GEN7_GS_DISPATCH_MODE_SINGLE
 +*/
 +   int dispatch_mode;
 +
 +   /**
 +* Gen6 transform feedback enabled flag.
 +*/
 +   bool gen6_xfb_enabled;
 +
 +   /**
 +* Gen6: Provoking vertex convention for odd-numbered triangles
 +* in tristrips.
 +*/
 +   GLuint pv_first:1;
 +
 +   /**
 +* Gen6: Number of varyings that are output to transform feedback.
 +*/
 +   GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */
 +
 +   /**
 +* Gen6: Map from the index of a transform feedback binding table entry 
 to the
 +* gl_varying_slot that should be streamed out through that binding table
 +* entry.
 +*/
 +   unsigned char transform_feedback_bindings[BRW_MAX_SOL_BINDINGS];
 +
 +   /**
 +* Gen6: Map from the index of a transform feedback binding table entry 
 to the
 +* swizzles that should be used when streaming out data through that
 +* binding table entry.
 +*/
 +   unsigned char transform_feedback_swizzles[BRW_MAX_SOL_BINDINGS];
 +};
 +
  /**
   * Stride in bytes between shader_time entries.
   *
 diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp 
 b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
 index c1cfe75..b8eaa58 100644
 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
 +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
 @@ -97,6 +97,45 @@ gen6_gs_visitor::emit_prolog()
 this-prim_count = src_reg(this, glsl_type::uint_type);
 emit(MOV(dst_reg(this-prim_count), 0u));

 +   if (c-prog_data.gen6_xfb_enabled) {
 +  const struct gl_transform_feedback_info *linked_xfb_info =
 

Re: [Mesa-dev] [PATCH 29/37] i965/gen6/gs: Setup SOL surfaces for user-provided geometry shaders

2014-09-18 Thread Jordan Justen
On Thu, Aug 14, 2014 at 4:12 AM, Iago Toral Quiroga ito...@igalia.com wrote:
 From: Samuel Iglesias Gonsalvez sigles...@igalia.com

 Update gen6_gs_binding_table and gen6_sol_surface to use user-provided
 geometry program information when present. This is necessary to implement
 transform feedback support.

 Signed-off-by: Samuel Iglesias Gonsalvez sigles...@igalia.com
 ---
  src/mesa/drivers/dri/i965/brw_context.h |   2 +-
  src/mesa/drivers/dri/i965/gen6_sol.c| 119 
 ++--
  2 files changed, 82 insertions(+), 39 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
 b/src/mesa/drivers/dri/i965/brw_context.h
 index 3418b76..82f32af 100644
 --- a/src/mesa/drivers/dri/i965/brw_context.h
 +++ b/src/mesa/drivers/dri/i965/brw_context.h
 @@ -914,7 +914,7 @@ struct brw_stage_state
 uint32_t push_const_offset; /* Offset in the batchbuffer */
 int push_const_size; /* in 256-bit register increments */

 -   /* Binding table: pointers to SURFACE_STATE entries. */
 +   /** Binding table: pointers to SURFACE_STATE entries. */

Shouldn't be part of this patch. With it removed:
Reviewed-by: Jordan Justen jordan.l.jus...@intel.com

 uint32_t bind_bo_offset;
 uint32_t surf_offset[BRW_MAX_SURFACES];

 diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c 
 b/src/mesa/drivers/dri/i965/gen6_sol.c
 index e1c1b3c..d21a010 100644
 --- a/src/mesa/drivers/dri/i965/gen6_sol.c
 +++ b/src/mesa/drivers/dri/i965/gen6_sol.c
 @@ -41,13 +41,21 @@ gen6_update_sol_surfaces(struct brw_context *brw)
 /* BRW_NEW_TRANSFORM_FEEDBACK */
 struct gl_transform_feedback_object *xfb_obj =
ctx-TransformFeedback.CurrentObject;
 -   /* BRW_NEW_VERTEX_PROGRAM */
 -   const struct gl_shader_program *shaderprog =
 -  ctx-_Shader-CurrentProgram[MESA_SHADER_VERTEX];
 -   const struct gl_transform_feedback_info *linked_xfb_info =
 -  shaderprog-LinkedTransformFeedback;
 +   const struct gl_shader_program *shaderprog;
 +   const struct gl_transform_feedback_info *linked_xfb_info;
 int i;

 +   if (brw-geometry_program) {
 +  /* BRW_NEW_GEOMETRY_PROGRAM */
 +  shaderprog =
 + ctx-_Shader-CurrentProgram[MESA_SHADER_GEOMETRY];
 +   } else {
 +  /* BRW_NEW_VERTEX_PROGRAM */
 +  shaderprog =
 + ctx-_Shader-CurrentProgram[MESA_SHADER_VERTEX];
 +   }
 +   linked_xfb_info = shaderprog-LinkedTransformFeedback;
 +
 for (i = 0; i  BRW_MAX_SOL_BINDINGS; ++i) {
const int surf_index = SURF_INDEX_GEN6_SOL_BINDING(i);
if (_mesa_is_xfb_active_and_unpaused(ctx) 
 @@ -56,12 +64,24 @@ gen6_update_sol_surfaces(struct brw_context *brw)
   unsigned buffer_offset =
  xfb_obj-Offset[buffer] / 4 +
  linked_xfb_info-Outputs[i].DstOffset;
 - brw_update_sol_surface(
 -brw, xfb_obj-Buffers[buffer], 
 brw-ff_gs.surf_offset[surf_index],
 -linked_xfb_info-Outputs[i].NumComponents,
 -linked_xfb_info-BufferStride[buffer], buffer_offset);
 + if (brw-geometry_program) {
 +brw_update_sol_surface(
 +   brw, xfb_obj-Buffers[buffer],
 +   brw-gs.base.surf_offset[surf_index],
 +   linked_xfb_info-Outputs[i].NumComponents,
 +   linked_xfb_info-BufferStride[buffer], buffer_offset);
 + } else {
 +brw_update_sol_surface(
 +   brw, xfb_obj-Buffers[buffer],
 +   brw-ff_gs.surf_offset[surf_index],
 +   linked_xfb_info-Outputs[i].NumComponents,
 +   linked_xfb_info-BufferStride[buffer], buffer_offset);
 + }
} else {
 - brw-ff_gs.surf_offset[surf_index] = 0;
 + if (!brw-geometry_program)
 +brw-ff_gs.surf_offset[surf_index] = 0;
 + else
 +brw-gs.base.surf_offset[surf_index] = 0;
}
 }

 @@ -73,6 +93,7 @@ const struct brw_tracked_state gen6_sol_surface = {
.mesa = 0,
.brw = (BRW_NEW_BATCH |
BRW_NEW_VERTEX_PROGRAM |
 +  BRW_NEW_GEOMETRY_PROGRAM |
BRW_NEW_TRANSFORM_FEEDBACK),
.cache = 0
 },
 @@ -86,38 +107,50 @@ const struct brw_tracked_state gen6_sol_surface = {
  static void
  brw_gs_upload_binding_table(struct brw_context *brw)
  {
 -   struct gl_context *ctx = brw-ctx;
 -   /* BRW_NEW_VERTEX_PROGRAM */
 -   const struct gl_shader_program *shaderprog =
 -  ctx-_Shader-CurrentProgram[MESA_SHADER_VERTEX];
 -   bool has_surfaces = false;
 uint32_t *bind;

 -   if (shaderprog) {
 -  const struct gl_transform_feedback_info *linked_xfb_info =
 -shaderprog-LinkedTransformFeedback;
 -  /* Currently we only ever upload surfaces for SOL. */
 -  has_surfaces = linked_xfb_info-NumOutputs != 0;
 -   }
 +   if (!brw-geometry_program) {
 +  struct gl_context *ctx = brw-ctx;
 +  /* BRW_NEW_VERTEX_PROGRAM */
 +  const struct gl_shader_program *shaderprog =
 + 

[Mesa-dev] [PATCH 3/5] mesa: Set correct array element in vbo_exec_vtx_init.

2014-09-18 Thread Kenneth Graunke
I'm not familiar with this code, but this sure appears to be a typo.
It looks like the intent is to set each array element, not arrays[0]
each time.  Notably, the loop just below uses array, not arrays.

Signed-off-by: Kenneth Graunke kenn...@whitecape.org
Cc: mesa-sta...@lists.freedesktop.org
---
 src/mesa/vbo/vbo_exec_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c
index 74aec12..2871100 100644
--- a/src/mesa/vbo/vbo_exec_api.c
+++ b/src/mesa/vbo/vbo_exec_api.c
@@ -1067,7 +1067,7 @@ void vbo_exec_vtx_init( struct vbo_exec_context *exec )
  struct gl_client_array *array;
  array = arrays[VERT_ATTRIB_FF(i)];
  array-BufferObj = NULL;
- _mesa_reference_buffer_object(ctx, arrays-BufferObj,
+ _mesa_reference_buffer_object(ctx, array-BufferObj,
  vbo-currval[VBO_ATTRIB_POS+i].BufferObj);
   }
 
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/5] mesa: Use VertexArray, not _VertexArray, in array size expressions.

2014-09-18 Thread Kenneth Graunke
Both sizes are VERT_ATTRIB_MAX, so this has no effect.  But it drops a
few trivial uses of the derived state.

Signed-off-by: Kenneth Graunke kenn...@whitecape.org
---
 src/mesa/main/arrayobj.c | 2 +-
 src/mesa/main/attrib.c   | 2 +-
 src/mesa/main/varray.c   | 8 
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c
index 0d77b11..6440ea6 100644
--- a/src/mesa/main/arrayobj.c
+++ b/src/mesa/main/arrayobj.c
@@ -230,7 +230,7 @@ _mesa_initialize_vao(struct gl_context *ctx,
obj-RefCount = 1;
 
/* Init the individual arrays */
-   for (i = 0; i  Elements(obj-_VertexAttrib); i++) {
+   for (i = 0; i  Elements(obj-VertexAttrib); i++) {
   switch (i) {
   case VERT_ATTRIB_WEIGHT:
  init_array(ctx, obj, VERT_ATTRIB_WEIGHT, 1, GL_FLOAT);
diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c
index ef98ba7..d90e662 100644
--- a/src/mesa/main/attrib.c
+++ b/src/mesa/main/attrib.c
@@ -1449,7 +1449,7 @@ copy_array_object(struct gl_context *ctx,
/* In theory must be the same anyway, but on recreate make sure it matches 
*/
dest-ARBsemantics = src-ARBsemantics;
 
-   for (i = 0; i  Elements(src-_VertexAttrib); i++) {
+   for (i = 0; i  Elements(src-VertexAttrib); i++) {
   _mesa_copy_client_array(ctx, dest-_VertexAttrib[i], 
src-_VertexAttrib[i]);
   _mesa_copy_vertex_attrib_array(ctx, dest-VertexAttrib[i], 
src-VertexAttrib[i]);
   _mesa_copy_vertex_buffer_binding(ctx, dest-VertexBinding[i], 
src-VertexBinding[i]);
diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c
index ead7864..09bf52c 100644
--- a/src/mesa/main/varray.c
+++ b/src/mesa/main/varray.c
@@ -711,7 +711,7 @@ _mesa_EnableVertexAttribArray(GLuint index)
 
vao = ctx-Array.VAO;
 
-   ASSERT(VERT_ATTRIB_GENERIC(index)  Elements(vao-_VertexAttrib));
+   ASSERT(VERT_ATTRIB_GENERIC(index)  Elements(vao-VertexAttrib));
 
if (!vao-VertexAttrib[VERT_ATTRIB_GENERIC(index)].Enabled) {
   /* was disabled, now being enabled */
@@ -737,7 +737,7 @@ _mesa_DisableVertexAttribArray(GLuint index)
 
vao = ctx-Array.VAO;
 
-   ASSERT(VERT_ATTRIB_GENERIC(index)  Elements(vao-_VertexAttrib));
+   ASSERT(VERT_ATTRIB_GENERIC(index)  Elements(vao-VertexAttrib));
 
if (vao-VertexAttrib[VERT_ATTRIB_GENERIC(index)].Enabled) {
   /* was enabled, now being disabled */
@@ -831,7 +831,7 @@ get_current_attrib(struct gl_context *ctx, GLuint index, 
const char *function)
   return NULL;
}
 
-   ASSERT(VERT_ATTRIB_GENERIC(index)  
Elements(ctx-Array.VAO-_VertexAttrib));
+   ASSERT(VERT_ATTRIB_GENERIC(index)  Elements(ctx-Array.VAO-VertexAttrib));
 
FLUSH_CURRENT(ctx, 0);
return ctx-Current.Attrib[VERT_ATTRIB_GENERIC(index)];
@@ -953,7 +953,7 @@ _mesa_GetVertexAttribPointerv(GLuint index, GLenum pname, 
GLvoid **pointer)
   return;
}
 
-   ASSERT(VERT_ATTRIB_GENERIC(index)  
Elements(ctx-Array.VAO-_VertexAttrib));
+   ASSERT(VERT_ATTRIB_GENERIC(index)  Elements(ctx-Array.VAO-VertexAttrib));
 
*pointer = (GLvoid *) 
ctx-Array.VAO-VertexAttrib[VERT_ATTRIB_GENERIC(index)].Ptr;
 }
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/5] mesa: Replace gl_client_array usage in _mesa_print_arrays()

2014-09-18 Thread Kenneth Graunke
For now, this prints out the same information as before - just using the
newer/non-derived structures.  Printing out each structure's fields
separately might be more useful, but I've never used this code, so I'm
not sure.

Signed-off-by: Kenneth Graunke kenn...@whitecape.org
---
 src/mesa/main/varray.c | 47 +++
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c
index 09bf52c..380a32e 100644
--- a/src/mesa/main/varray.c
+++ b/src/mesa/main/varray.c
@@ -1904,16 +1904,19 @@ _mesa_copy_vertex_buffer_binding(struct gl_context *ctx,
  * Print vertex array's fields.
  */
 static void
-print_array(const char *name, GLint index, const struct gl_client_array *array)
+print_array(const char *name, GLint index,
+const struct gl_vertex_attrib_array *attrib,
+const struct gl_vertex_buffer_binding *binding)
 {
if (index = 0)
   printf(  %s[%d]: , name, index);
else
   printf(  %s: , name);
printf(Ptr=%p, Type=0x%x, Size=%d, ElemSize=%u, Stride=%d, Buffer=%u(Size 
%lu)\n,
- array-Ptr, array-Type, array-Size,
- array-_ElementSize, array-StrideB,
- array-BufferObj-Name, (unsigned long) array-BufferObj-Size);
+ _mesa_vertex_attrib_address(attrib, binding),
+  attrib-Type, attrib-Size,
+ attrib-_ElementSize, binding-Stride,
+ binding-BufferObj-Name, (unsigned long) binding-BufferObj-Size);
 }
 
 
@@ -1927,18 +1930,30 @@ _mesa_print_arrays(struct gl_context *ctx)
GLuint i;
 
printf(Array Object %u\n, vao-Name);
-   if (vao-_VertexAttrib[VERT_ATTRIB_POS].Enabled)
-  print_array(Vertex, -1, vao-_VertexAttrib[VERT_ATTRIB_POS]);
-   if (vao-_VertexAttrib[VERT_ATTRIB_NORMAL].Enabled)
-  print_array(Normal, -1, vao-_VertexAttrib[VERT_ATTRIB_NORMAL]);
-   if (vao-_VertexAttrib[VERT_ATTRIB_COLOR0].Enabled)
-  print_array(Color, -1, vao-_VertexAttrib[VERT_ATTRIB_COLOR0]);
-   for (i = 0; i  ctx-Const.MaxTextureCoordUnits; i++)
-  if (vao-_VertexAttrib[VERT_ATTRIB_TEX(i)].Enabled)
- print_array(TexCoord, i, vao-_VertexAttrib[VERT_ATTRIB_TEX(i)]);
-   for (i = 0; i  VERT_ATTRIB_GENERIC_MAX; i++)
-  if (vao-_VertexAttrib[VERT_ATTRIB_GENERIC(i)].Enabled)
- print_array(Attrib, i, vao-_VertexAttrib[VERT_ATTRIB_GENERIC(i)]);
+   if (vao-VertexAttrib[VERT_ATTRIB_POS].Enabled) {
+  print_array(Vertex, -1, vao-VertexAttrib[VERT_ATTRIB_POS],
+vao-VertexBinding[VERT_ATTRIB_POS]);
+   }
+   if (vao-VertexAttrib[VERT_ATTRIB_NORMAL].Enabled) {
+  print_array(Normal, -1, vao-VertexAttrib[VERT_ATTRIB_NORMAL],
+vao-VertexBinding[VERT_ATTRIB_NORMAL]);
+   }
+   if (vao-VertexAttrib[VERT_ATTRIB_COLOR0].Enabled) {
+  print_array(Color, -1, vao-VertexAttrib[VERT_ATTRIB_COLOR0],
+   vao-VertexBinding[VERT_ATTRIB_COLOR0]);
+   }
+   for (i = 0; i  ctx-Const.MaxTextureCoordUnits; i++) {
+  if (vao-VertexAttrib[VERT_ATTRIB_TEX(i)].Enabled) {
+ print_array(TexCoord, i, vao-VertexAttrib[VERT_ATTRIB_TEX(i)],
+vao-VertexBinding[VERT_ATTRIB_TEX(i)]);
+  }
+   }
+   for (i = 0; i  VERT_ATTRIB_GENERIC_MAX; i++) {
+  if (vao-VertexAttrib[VERT_ATTRIB_GENERIC(i)].Enabled) {
+ print_array(Attrib, i, vao-VertexAttrib[VERT_ATTRIB_GENERIC(i)],
+  vao-VertexBinding[VERT_ATTRIB_GENERIC(i)]);
+  }
+   }
 }
 
 
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/5] mesa: Use proper structure for glGet*(GL_TEXTURE_COORD_ARRAY*).

2014-09-18 Thread Kenneth Graunke
The code in get.c that handles this uses ctx-Array.VAO-VertexAttrib,
which is a gl_vertex_attrib_array structure, not a gl_client_array.

The offsets of all fields happened to be the same in both structures, at
least on x86_64.  Size, Type, and Stride are obviously the same:
both structures start with the same fields, in the same order.

Enabled is dicier: there are different fields before it in both
structures, including pointer sized values which might need special
alignment.

Signed-off-by: Kenneth Graunke kenn...@whitecape.org
Cc: mesa-sta...@lists.freedesktop.org
---
 src/mesa/main/get_hash_params.py | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index aace8a5..da35684 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -203,10 +203,10 @@ descriptor=[
   [ COLOR_ARRAY_SIZE, LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA ],
   [ COLOR_ARRAY_TYPE, ARRAY_ENUM(VertexAttrib[VERT_ATTRIB_COLOR0].Type), 
NO_EXTRA ],
   [ COLOR_ARRAY_STRIDE, ARRAY_INT(VertexAttrib[VERT_ATTRIB_COLOR0].Stride), 
NO_EXTRA ],
-  [ TEXTURE_COORD_ARRAY, LOC_CUSTOM, TYPE_BOOLEAN, offsetof(struct 
gl_client_array, Enabled), NO_EXTRA ],
-  [ TEXTURE_COORD_ARRAY_SIZE, LOC_CUSTOM, TYPE_INT, offsetof(struct 
gl_client_array, Size), NO_EXTRA ],
-  [ TEXTURE_COORD_ARRAY_TYPE, LOC_CUSTOM, TYPE_ENUM, offsetof(struct 
gl_client_array, Type), NO_EXTRA ],
-  [ TEXTURE_COORD_ARRAY_STRIDE, LOC_CUSTOM, TYPE_INT, offsetof(struct 
gl_client_array, Stride), NO_EXTRA ],
+  [ TEXTURE_COORD_ARRAY, LOC_CUSTOM, TYPE_BOOLEAN, offsetof(struct 
gl_vertex_attrib_array, Enabled), NO_EXTRA ],
+  [ TEXTURE_COORD_ARRAY_SIZE, LOC_CUSTOM, TYPE_INT, offsetof(struct 
gl_vertex_attrib_array, Size), NO_EXTRA ],
+  [ TEXTURE_COORD_ARRAY_TYPE, LOC_CUSTOM, TYPE_ENUM, offsetof(struct 
gl_vertex_attrib_array, Type), NO_EXTRA ],
+  [ TEXTURE_COORD_ARRAY_STRIDE, LOC_CUSTOM, TYPE_INT, offsetof(struct 
gl_vertex_attrib_array, Stride), NO_EXTRA ],
 
 # GL_ARB_multitexture
   [ MAX_TEXTURE_UNITS, CONTEXT_INT(Const.MaxTextureUnits), NO_EXTRA ],
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/5] mesa: Remove some dead helper functions.

2014-09-18 Thread Kenneth Graunke
Dead since the _MaxElement removal, but these functions seemed generally
applicable, so I decided to remove them in a separate patch.

Signed-off-by: Kenneth Graunke kenn...@whitecape.org
---
 src/mesa/main/arrayobj.h | 26 --
 1 file changed, 26 deletions(-)

diff --git a/src/mesa/main/arrayobj.h b/src/mesa/main/arrayobj.h
index 1819cd1..3c1f918 100644
--- a/src/mesa/main/arrayobj.h
+++ b/src/mesa/main/arrayobj.h
@@ -78,32 +78,6 @@ extern void
 _mesa_update_vao_client_arrays(struct gl_context *ctx,
struct gl_vertex_array_object *vao);
 
-
-/** Returns the bitmask of all enabled arrays in fixed function mode.
- *
- *  In fixed function mode only the traditional fixed function arrays
- *  are available.
- */
-static inline GLbitfield64
-_mesa_array_object_get_enabled_ff(const struct gl_vertex_array_object *vao)
-{
-   return vao-_Enabled  VERT_BIT_FF_ALL;
-}
-
-/** Returns the bitmask of all enabled arrays in arb/glsl shader mode.
- *
- *  In arb/glsl shader mode all the fixed function and the arb/glsl generic
- *  arrays are available. Only the first generic array takes
- *  precedence over the legacy position array.
- */
-static inline GLbitfield64
-_mesa_array_object_get_enabled_arb(const struct gl_vertex_array_object *vao)
-{
-   GLbitfield64 enabled = vao-_Enabled;
-   return enabled  ~(VERT_BIT_POS  (enabled  VERT_ATTRIB_GENERIC0));
-}
-
-
 /*
  * API functions
  */
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 30/37] i965/gen6/gs: Buffer PSIZ/flags vertex data in gen6_gs_visitor

2014-09-18 Thread Jordan Justen
Reviewed-by: Jordan Justen jordan.l.jus...@intel.com

On Thu, Aug 14, 2014 at 4:12 AM, Iago Toral Quiroga ito...@igalia.com wrote:
 From: Samuel Iglesias Gonsalvez sigles...@igalia.com

 Since geometry shaders can alter the value of varyings packed in the first
 output VUE slot (PSIZ), we need to buffer it together with all the other
 vertex data so we can emit the right value for each vertex when we do the
 URB writes.

 This fixes the following piglit test in gen6:
 tests/spec/glsl-1.50/execution/redeclare-pervertex-out-subset-gs.shader_test

 Signed-off-by: Samuel Iglesias Gonsalvez sigles...@igalia.com
 ---
  src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp | 79 
 ++-
  1 file changed, 41 insertions(+), 38 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp 
 b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
 index b8eaa58..fca7536 100644
 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
 +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp
 @@ -178,16 +178,33 @@ gen6_gs_visitor::visit(ir_emit_vertex *)

/* Buffer all output slots for this vertex in vertex_output */
for (int slot = 0; slot  prog_data-vue_map.num_slots; ++slot) {
 - /* We will handle PSIZ for each vertex at thread end time since it
 -  * is not computed by the GS algorithm and requires specific 
 handling.
 -  */
   int varying = prog_data-vue_map.slot_to_varying[slot];
   if (varying != VARYING_SLOT_PSIZ) {
  dst_reg dst(this-vertex_output);
  dst.reladdr = ralloc(mem_ctx, src_reg);
  memcpy(dst.reladdr, this-vertex_output_offset, 
 sizeof(src_reg));
  emit_urb_slot(dst, varying);
 + } else {
 +/* The PSIZ slot can pack multiple varyings in different channels
 + * and emit_urb_slot() will produce a MOV instruction for each of
 + * them. Since we are writing to an array, that will translate to
 + * possibly multiple MOV instructions with an array destination 
 and
 + * each will generate a scratch write with the same offset into
 + * scratch space (thus, each one overwriting the previous). This 
 is
 + * not what we want. What we will do instead is emit PSIZ to a
 + * a regular temporary register, then move that resgister into 
 the
 + * array. This way we only have one instruction with an array
 + * destination and we only produce a single scratch write.
 + */
 +dst_reg tmp = dst_reg(src_reg(this, glsl_type::uvec4_type));
 +emit_urb_slot(tmp, varying);
 +dst_reg dst(this-vertex_output);
 +dst.reladdr = ralloc(mem_ctx, src_reg);
 +memcpy(dst.reladdr, this-vertex_output_offset, 
 sizeof(src_reg));
 +vec4_instruction *inst = emit(MOV(dst, src_reg(tmp)));
 +inst-force_writemask_all = true;
   }
 +
   emit(ADD(dst_reg(this-vertex_output_offset),
this-vertex_output_offset, 1u));
}
 @@ -427,17 +444,12 @@ gen6_gs_visitor::emit_thread_end()
 memcpy(data.reladdr, this-vertex_output_offset,
sizeof(src_reg));

 -   if (varying == VARYING_SLOT_PSIZ) {
 -  /* We did not buffer PSIZ, emit it directly here */
 -  emit_urb_slot(dst_reg(MRF, mrf), varying);
 -   } else {
 -  /* Copy this slot to the appropriate message register */
 -  dst_reg reg = dst_reg(MRF, mrf);
 -  reg.type = output_reg[varying].type;
 -  data.type = reg.type;
 -  vec4_instruction *inst = emit(MOV(reg, data));
 -  inst-force_writemask_all = true;
 -   }
 +   /* Copy this slot to the appropriate message register */
 +   dst_reg reg = dst_reg(MRF, mrf);
 +   reg.type = output_reg[varying].type;
 +   data.type = reg.type;
 +   vec4_instruction *inst = emit(MOV(reg, data));
 +   inst-force_writemask_all = true;

 mrf++;
 emit(ADD(dst_reg(this-vertex_output_offset),
 @@ -585,22 +597,19 @@ gen6_gs_visitor::xfb_buffer_output()
 /* Buffer all TF outputs for this vertex in xfb_output */
 for (int binding = 0; binding  
 prog_data-num_transform_feedback_bindings;
  binding++) {
 -  /* We will handle PSIZ for each vertex at thread end time since it
 -   * is not computed by the GS algorithm and requires specific handling.
 -   */
unsigned varying =
   prog_data-transform_feedback_bindings[binding];
 -  if (varying != VARYING_SLOT_PSIZ) {
 - dst_reg dst(this-xfb_output);
 - dst.reladdr = ralloc(mem_ctx, src_reg);
 - memcpy(dst.reladdr, this-xfb_output_offset, sizeof(src_reg));
 - dst.type = 

[Mesa-dev] [PATCH 0/2] nv50, nvc0: fix weirdo zs formats and their blits

2014-09-18 Thread Ilia Mirkin
There were reports of issues with gallium-nine. It's unclear whether mesa/st
uses these, the patches did not produce any piglit changes. However they seem
right...

Ilia Mirkin (2):
  nv50,nvc0: add missing depth/stencil formats to tile flag selection
  nv50,nvc0: fix 3d blit logic for odd depth/stencil formats

 src/gallium/drivers/nouveau/nv50/nv50_blit.h| 21 ++---
 src/gallium/drivers/nouveau/nv50/nv50_miptree.c |  4 
 src/gallium/drivers/nouveau/nv50/nv50_surface.c |  4 
 src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c |  4 
 4 files changed, 26 insertions(+), 7 deletions(-)

-- 
1.8.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] nv50, nvc0: add missing depth/stencil formats to tile flag selection

2014-09-18 Thread Ilia Mirkin
Reported-by: David Heidelberger david.heidelber...@ixit.cz
Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
---
 src/gallium/drivers/nouveau/nv50/nv50_miptree.c | 4 
 src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c | 4 
 2 files changed, 8 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c 
b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
index 14e5a0d..1aacaec 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
@@ -78,9 +78,12 @@ nv50_mt_choose_storage_type(struct nv50_miptree *mt, boolean 
compressed)
case PIPE_FORMAT_Z16_UNORM:
   tile_flags = 0x6c + ms;
   break;
+   case PIPE_FORMAT_X8Z24_UNORM:
+   case PIPE_FORMAT_S8X24_UINT:
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
   tile_flags = 0x18 + ms;
   break;
+   case PIPE_FORMAT_X24S8_UINT:
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
   tile_flags = 0x128 + ms;
@@ -88,6 +91,7 @@ nv50_mt_choose_storage_type(struct nv50_miptree *mt, boolean 
compressed)
case PIPE_FORMAT_Z32_FLOAT:
   tile_flags = 0x40 + ms;
   break;
+   case PIPE_FORMAT_X32_S8X24_UINT:
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
   tile_flags = 0x60 + ms;
   break;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
index 3baa752..1beda7d 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
@@ -53,12 +53,15 @@ nvc0_mt_choose_storage_type(struct nv50_miptree *mt, 
boolean compressed)
   else
  tile_flags = 0x01;
   break;
+   case PIPE_FORMAT_X8Z24_UNORM:
+   case PIPE_FORMAT_S8X24_UINT:
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
   if (compressed)
  tile_flags = 0x51 + ms;
   else
  tile_flags = 0x46;
   break;
+   case PIPE_FORMAT_X24S8_UINT:
case PIPE_FORMAT_Z24X8_UNORM:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
   if (compressed)
@@ -72,6 +75,7 @@ nvc0_mt_choose_storage_type(struct nv50_miptree *mt, boolean 
compressed)
   else
  tile_flags = 0x7b;
   break;
+   case PIPE_FORMAT_X32_S8X24_UINT:
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
   if (compressed)
  tile_flags = 0xce + ms;
-- 
1.8.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] nv50, nvc0: fix 3d blit logic for odd depth/stencil formats

2014-09-18 Thread Ilia Mirkin
Reported-by: David Heidelberger david.heidelber...@ixit.cz
Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
---
 src/gallium/drivers/nouveau/nv50/nv50_blit.h| 21 ++---
 src/gallium/drivers/nouveau/nv50/nv50_surface.c |  4 
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_blit.h 
b/src/gallium/drivers/nouveau/nv50/nv50_blit.h
index bdd6a63..756c4c1 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_blit.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_blit.h
@@ -111,10 +111,14 @@ nv50_blit_zeta_to_colour_format(enum pipe_format format)
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
case PIPE_FORMAT_Z24X8_UNORM:
+   case PIPE_FORMAT_X8Z24_UNORM:
+   case PIPE_FORMAT_X24S8_UINT:
+   case PIPE_FORMAT_S8X24_UINT:
   return PIPE_FORMAT_R8G8B8A8_UNORM;
case PIPE_FORMAT_Z32_FLOAT:
   return PIPE_FORMAT_R32_FLOAT;
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+   case PIPE_FORMAT_X32_S8X24_UINT:
   return PIPE_FORMAT_R32G32_FLOAT;
default:
   assert(0);
@@ -131,19 +135,21 @@ nv50_blit_derive_color_mask(const struct pipe_blit_info 
*info)
uint16_t color_mask = 0;
 
switch (info-dst.format) {
+   case PIPE_FORMAT_Z24X8_UNORM:
+   case PIPE_FORMAT_X24S8_UINT:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
   if (mask  PIPE_MASK_S)
  color_mask |= 0x1000;
-  /* fall through */
-   case PIPE_FORMAT_Z24X8_UNORM:
   if (mask  PIPE_MASK_Z)
  color_mask |= 0x0111;
   break;
+   case PIPE_FORMAT_X8Z24_UNORM:
+   case PIPE_FORMAT_S8X24_UINT:
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
-  if (mask  PIPE_MASK_Z)
- color_mask |= 0x1110;
   if (mask  PIPE_MASK_S)
  color_mask |= 0x0001;
+  if (mask  PIPE_MASK_Z)
+ color_mask |= 0x1110;
   break;
default:
   if (mask  (PIPE_MASK_R | PIPE_MASK_Z)) color_mask |= 0x0001;
@@ -162,17 +168,18 @@ nv50_blit_eng2d_get_mask(const struct pipe_blit_info 
*info)
uint32_t mask = 0;
 
switch (info-dst.format) {
+   case PIPE_FORMAT_Z24X8_UNORM:
+   case PIPE_FORMAT_X24S8_UINT:
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
   if (info-mask  PIPE_MASK_Z) mask |= 0x00ff;
   if (info-mask  PIPE_MASK_S) mask |= 0xff00;
   break;
+   case PIPE_FORMAT_X8Z24_UNORM:
+   case PIPE_FORMAT_S8X24_UINT:
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
   if (info-mask  PIPE_MASK_Z) mask |= 0xff00;
   if (info-mask  PIPE_MASK_S) mask |= 0x00ff;
   break;
-   case PIPE_FORMAT_X8Z24_UNORM:
-  if (info-mask  PIPE_MASK_Z) mask = 0x00ff;
-  break;
default:
   mask = 0x;
   break;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c 
b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
index 8ec4a5f..e1dd6e0 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -827,6 +827,7 @@ nv50_blit_select_mode(const struct pipe_blit_info *info)
switch (info-dst.resource-format) {
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
case PIPE_FORMAT_Z24X8_UNORM:
+   case PIPE_FORMAT_X24S8_UINT:
   switch (mask  PIPE_MASK_ZS) {
   case PIPE_MASK_ZS: return NV50_BLIT_MODE_Z24S8;
   case PIPE_MASK_Z:  return NV50_BLIT_MODE_Z24X8;
@@ -834,6 +835,8 @@ nv50_blit_select_mode(const struct pipe_blit_info *info)
  return NV50_BLIT_MODE_X24S8;
   }
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+   case PIPE_FORMAT_X8Z24_UNORM:
+   case PIPE_FORMAT_S8X24_UINT:
   switch (mask  PIPE_MASK_ZS) {
   case PIPE_MASK_ZS: return NV50_BLIT_MODE_S8Z24;
   case PIPE_MASK_Z:  return NV50_BLIT_MODE_X8Z24;
@@ -842,6 +845,7 @@ nv50_blit_select_mode(const struct pipe_blit_info *info)
   }
case PIPE_FORMAT_Z32_FLOAT:
case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+   case PIPE_FORMAT_X32_S8X24_UINT:
   switch (mask  PIPE_MASK_ZS) {
   case PIPE_MASK_ZS: return NV50_BLIT_MODE_ZS;
   case PIPE_MASK_Z:  return NV50_BLIT_MODE_PASS;
-- 
1.8.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev