Re: [Mesa-dev] [PATCH 1/6] radeonsi: remove redundant parameter in si_shader_binary_read

2015-07-16 Thread Tom Stellard
On Thu, Jul 16, 2015 at 06:55:26PM +0200, Marek Olšák wrote:
 From: Marek Olšák marek.ol...@amd.com
 

Reviewed-by: Tom Stellard thomas.stell...@amd.com

 ---
  src/gallium/drivers/radeonsi/si_compute.c | 2 +-
  src/gallium/drivers/radeonsi/si_shader.c  | 8 +++-
  src/gallium/drivers/radeonsi/si_shader.h  | 3 +--
  3 files changed, 5 insertions(+), 8 deletions(-)
 
 diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
 b/src/gallium/drivers/radeonsi/si_compute.c
 index 89bef2e..0361c99 100644
 --- a/src/gallium/drivers/radeonsi/si_compute.c
 +++ b/src/gallium/drivers/radeonsi/si_compute.c
 @@ -144,7 +144,7 @@ static void *si_create_compute_state(
* the shader code to the GPU.
*/
   init_scratch_buffer(sctx, program);
 - si_shader_binary_read(sctx-screen, program-shader, 
 program-shader.binary);
 + si_shader_binary_read(sctx-screen, program-shader);
  
  #endif
   program-input_buffer = si_resource_create_custom(sctx-b.b.screen,
 diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
 b/src/gallium/drivers/radeonsi/si_shader.c
 index 75a29ae..b988f6d 100644
 --- a/src/gallium/drivers/radeonsi/si_shader.c
 +++ b/src/gallium/drivers/radeonsi/si_shader.c
 @@ -2686,11 +2686,9 @@ void si_shader_apply_scratch_relocs(struct si_context 
 *sctx,
   }
  }
  
 -int si_shader_binary_read(struct si_screen *sscreen,
 - struct si_shader *shader,
 - const struct radeon_shader_binary *binary)
 +int si_shader_binary_read(struct si_screen *sscreen, struct si_shader 
 *shader)
  {
 -
 + const struct radeon_shader_binary *binary = shader-binary;
   unsigned i;
   unsigned code_size;
   unsigned char *ptr;
 @@ -2750,7 +2748,7 @@ int si_compile_llvm(struct si_screen *sscreen, struct 
 si_shader *shader,
   if (r) {
   return r;
   }
 - r = si_shader_binary_read(sscreen, shader, shader-binary);
 + r = si_shader_binary_read(sscreen, shader);
  
   FREE(shader-binary.config);
   FREE(shader-binary.rodata);
 diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
 b/src/gallium/drivers/radeonsi/si_shader.h
 index 8d309b4..1e8b52b 100644
 --- a/src/gallium/drivers/radeonsi/si_shader.h
 +++ b/src/gallium/drivers/radeonsi/si_shader.h
 @@ -191,8 +191,7 @@ int si_compile_llvm(struct si_screen *sscreen, struct 
 si_shader *shader,
   LLVMTargetMachineRef tm, LLVMModuleRef mod);
  void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader);
  unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned 
 index);
 -int si_shader_binary_read(struct si_screen *sscreen, struct si_shader 
 *shader,
 - const struct radeon_shader_binary *binary);
 +int si_shader_binary_read(struct si_screen *sscreen, struct si_shader 
 *shader);
  void si_shader_apply_scratch_relocs(struct si_context *sctx,
   struct si_shader *shader,
   uint64_t scratch_va);
 -- 
 2.1.0
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/6] radeonsi: upload shader rodata after updating scratch relocations

2015-07-16 Thread Tom Stellard
On Thu, Jul 16, 2015 at 06:55:27PM +0200, Marek Olšák wrote:
 From: Marek Olšák marek.ol...@amd.com
 

Reviewed-by: Tom Stellard thomas.stell...@amd.com
 Cc: 10.5 10.6 mesa-sta...@lists.freedesktop.org
 ---
  src/gallium/drivers/radeonsi/si_shader.c| 49 
 ++---
  src/gallium/drivers/radeonsi/si_shader.h|  1 +
  src/gallium/drivers/radeonsi/si_state_shaders.c |  8 +---
  3 files changed, 29 insertions(+), 29 deletions(-)
 
 diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
 b/src/gallium/drivers/radeonsi/si_shader.c
 index b988f6d..955e780 100644
 --- a/src/gallium/drivers/radeonsi/si_shader.c
 +++ b/src/gallium/drivers/radeonsi/si_shader.c
 @@ -2686,16 +2686,41 @@ void si_shader_apply_scratch_relocs(struct si_context 
 *sctx,
   }
  }
  
 +int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader 
 *shader)
 +{
 + const struct radeon_shader_binary *binary = shader-binary;
 + unsigned code_size = binary-code_size + binary-rodata_size;
 + unsigned char *ptr;
 +
 + r600_resource_reference(shader-bo, NULL);
 + shader-bo = si_resource_create_custom(sscreen-b.b,
 +PIPE_USAGE_IMMUTABLE,
 +code_size);
 + if (!shader-bo)
 + return -ENOMEM;
 +
 + ptr = sscreen-b.ws-buffer_map(shader-bo-cs_buf, NULL,
 + PIPE_TRANSFER_READ_WRITE);
 + util_memcpy_cpu_to_le32(ptr, binary-code, binary-code_size);
 + if (binary-rodata_size  0) {
 + ptr += binary-code_size;
 + util_memcpy_cpu_to_le32(ptr, binary-rodata,
 + binary-rodata_size);
 + }
 +
 + sscreen-b.ws-buffer_unmap(shader-bo-cs_buf);
 + return 0;
 +}
 +
  int si_shader_binary_read(struct si_screen *sscreen, struct si_shader 
 *shader)
  {
   const struct radeon_shader_binary *binary = shader-binary;
   unsigned i;
 - unsigned code_size;
 - unsigned char *ptr;
   bool dump  = r600_can_dump_shader(sscreen-b,
   shader-selector ? shader-selector-tokens : NULL);
  
   si_shader_binary_read_config(sscreen, shader, 0);
 + si_shader_binary_upload(sscreen, shader);
  
   if (dump) {
   if (!binary-disassembled) {
 @@ -2713,26 +2738,6 @@ int si_shader_binary_read(struct si_screen *sscreen, 
 struct si_shader *shader)
   shader-num_sgprs, shader-num_vgprs, binary-code_size,
   shader-lds_size, shader-scratch_bytes_per_wave);
   }
 -
 - /* copy new shader */
 - code_size = binary-code_size + binary-rodata_size;
 - r600_resource_reference(shader-bo, NULL);
 - shader-bo = si_resource_create_custom(sscreen-b.b, 
 PIPE_USAGE_IMMUTABLE,
 -code_size);
 - if (shader-bo == NULL) {
 - return -ENOMEM;
 - }
 -
 -
 - ptr = sscreen-b.ws-buffer_map(shader-bo-cs_buf, NULL, 
 PIPE_TRANSFER_READ_WRITE);
 - util_memcpy_cpu_to_le32(ptr, binary-code, binary-code_size);
 - if (binary-rodata_size  0) {
 - ptr += binary-code_size;
 - util_memcpy_cpu_to_le32(ptr, binary-rodata, 
 binary-rodata_size);
 - }
 -
 - sscreen-b.ws-buffer_unmap(shader-bo-cs_buf);
 -
   return 0;
  }
  
 diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
 b/src/gallium/drivers/radeonsi/si_shader.h
 index 1e8b52b..c12782f 100644
 --- a/src/gallium/drivers/radeonsi/si_shader.h
 +++ b/src/gallium/drivers/radeonsi/si_shader.h
 @@ -191,6 +191,7 @@ int si_compile_llvm(struct si_screen *sscreen, struct 
 si_shader *shader,
   LLVMTargetMachineRef tm, LLVMModuleRef mod);
  void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader);
  unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned 
 index);
 +int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader 
 *shader);
  int si_shader_binary_read(struct si_screen *sscreen, struct si_shader 
 *shader);
  void si_shader_apply_scratch_relocs(struct si_context *sctx,
   struct si_shader *shader,
 diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
 b/src/gallium/drivers/radeonsi/si_state_shaders.c
 index a842d9d..12bce9a 100644
 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
 +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
 @@ -749,7 +749,6 @@ static unsigned si_update_scratch_buffer(struct 
 si_context *sctx,
  {
   struct si_shader *shader;
   uint64_t scratch_va = sctx-scratch_buffer-gpu_address;
 - unsigned char *ptr;
  
   if (!sel)
   return 0;
 @@ -770,12 +769,7 @@ static unsigned si_update_scratch_buffer(struct 
 si_context *sctx,
   si_shader_apply_scratch_relocs(sctx, shader, scratch_va);
  
   /* Replace the shader bo with a new bo that has the relocs applied. */
 - r600_resource_reference(shader-bo, NULL);
 - 

Re: [Mesa-dev] [PATCH] i965/cs: Use dispatch width of 8 for cs terminate

2015-07-16 Thread Jordan Justen
On 2015-07-16 13:59:45, Jason Ekstrand wrote:
On Jul 16, 2015 2:00 PM, Jordan Justen jordan.l.jus...@intel.com
wrote:

 This prevents an assertion failure in brw_fs_live_variables.cpp,
 fs_live_variables::setup_one_write: Assertion `var  num_vars' failed.
 
Best guess is that you should just fix regs_read to return the right value
(1 in this case).  Most other send instructions use mlen but that may not
be needed tour CS_TERMINATE.

I think regs_read will fix a similar assert in setup_one_read. The MOV
will still hit the setup_one_write assert. Should I be generating the
MOV differently?

For example

   bld.group(8, 0).exec_all().MOV(payload, g0);

works in combo with an update to regs_read.

-Jordan

 Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
 Cc: Jason Ekstrand ja...@jlekstrand.net
 ---
  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 7 ---
  1 file changed, 4 insertions(+), 3 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 index 94d6a58..62dfb9a 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 @@ -1960,11 +1960,12 @@ fs_visitor::emit_cs_terminate()
  */
 struct brw_reg g0 = retype(brw_vec8_grf(0, 0),
BRW_REGISTER_TYPE_UD);
 fs_reg payload = fs_reg(GRF, alloc.allocate(1),
BRW_REGISTER_TYPE_UD);
 -   bld.exec_all().MOV(payload, g0);
 +   const fs_builder bld8 = bld.group(8, 0);
 +   bld8.exec_all().MOV(payload, g0);

 /* Send a message to the thread spawner to terminate the thread. */
 -   fs_inst *inst = bld.exec_all()
 -  .emit(CS_OPCODE_CS_TERMINATE, reg_undef,
payload);
 +   fs_inst *inst = bld8.exec_all()
 +   .emit(CS_OPCODE_CS_TERMINATE, reg_undef,
payload);
 inst-eot = true;
  }

 --
 2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/5] i965/miptree: Rewrite the miptree map logic

2015-07-16 Thread Matt Turner
On Tue, Jul 14, 2015 at 9:56 AM, Ben Widawsky
benjamin.widaw...@intel.com wrote:
 This patch rewrites the logic for determining which method we using for 
 mapping
 a miptree. It is my intention that that this patch, the required patches 
 before
 this do not change functionality, or if they do, it's in very obscure an
 unobservable cases.

 I have two reasons why I decided to write this patch. The existing logic was 
 way
 too tricky. In particular, the way in which it evaluated which operation to 
 use
 was out of order - specifically when it checked to use the blitter in
 use_intel_mipree_map_blit(), part of the check is to determine if it will 
 later
 be unable to use the GTT. The other reason is to make playing with the various
 operations much easier. For example, there are some theories being thrown 
 around
 that we might actually want to use the blitter where we use the GTT today, and
 vice versa. After this patch, benchmarking those changes is much more
 straightforward.

 It's pretty difficult for me to prove there is no real change going on. I ran 
 a
 subset of my benchmarks on this though. The following benchmarks show no perf
 difference on BDW with ministat with n=5 and CI=.95:
 OglBatch7
 OglDeferred
 OglFillPixel
 OglGeomPoint
 OglGeomTriList
 OglHdrBloom
 OglPSBump2
 OglPSPhong
 OglPSPom
 OglShMapPcf
 OglTerrainFlyInst
 OglTexMem512
 OglVSDiffuse8
 OglVSInstancing
 OglZBuffer
 plot3d
 trex

 It's important to point out that much of the changes effect non-LLC platform,

s/effect/affect/

 and I do not yet have data for that. I'll be collecting it over the next few
 days, but I figure this patch can get some comments meanwhile.

 Signed-off-by: Ben Widawsky b...@bwidawsk.net
 ---
  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 76 
 +--
  1 file changed, 37 insertions(+), 39 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
 b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 index 2788270..545fbf3 100644
 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 @@ -2283,6 +2283,8 @@ intel_miptree_unmap_movntdqa(struct brw_context *brw,
 map-buffer = NULL;
 map-ptr = NULL;
  }
 +#else
 +#define intel_miptree_map_movntdqa(x,y,z,w,a) abort()

Yuck.

  #endif

  static void
 @@ -2621,36 +2623,6 @@ can_blit_slice(struct brw_context *brw,
 return true;
  }

 -static bool
 -use_intel_mipree_map_blit(struct brw_context *brw,
 -  struct intel_mipmap_tree *mt,
 -  GLbitfield mode,
 -  unsigned int level,
 -  unsigned int slice)
 -{
 -   if (brw-has_llc 
 -   !(mode  GL_MAP_WRITE_BIT) 
 -   can_blit_slice(brw, mt, level, slice))
 -  return true;
 -
 -   if (mt-tiling != I915_TILING_NONE 
 -   mt-bo-size = brw-max_gtt_map_object_size) {
 -  /* XXX: This assertion is actually the final condition for platforms
 -   * without SSE4.1.  Returning false is not the right thing to do with
 -   * the current code. On those platforms, the goal of this function is 
 to give
 -   * preference to the GTT, and at this point we've determined we cannot 
 use
 -   * the GTT, and we cannot blit, so we are out of options.
 -   *
 -   * NOTE: It should be possible to actually handle the case, but AFAIK, 
 we
 -   * never get this assertion.
 -   */
 -  assert(can_blit_slice(brw, mt, level, slice));
 -  return true;
 -   }
 -
 -   return false;
 -}
 -
  /**
   * Parameter \a out_stride has type ptrdiff_t not because the buffer stride 
 may
   * exceed 32 bits but to diminish the likelihood subtle bugs in pointer
 @@ -2706,18 +2678,44 @@ intel_miptree_map(struct brw_context *brw,
goto done;
 }

 -   if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) {
 -  intel_miptree_map_blit(brw, mt, map, level, slice);
 +   /* First determine what the available option are, then pick from the best
 +* option based on the platform.
 +*/
 +   bool can_hw_blit = can_blit_slice(brw, mt, level, slice);
 +   bool can_use_gtt = mt-bo-size  brw-max_gtt_map_object_size;
  #if defined(USE_SSE41)
 -   } else if (!(mode  GL_MAP_WRITE_BIT) 
 -  !mt-compressed  cpu_has_sse4_1 
 -  (mt-pitch % 16 == 0)) {
 -  intel_miptree_map_movntdqa(brw, mt, map, level, slice);
 +   bool can_stream_map = cpu_has_sse4_1  mt-pitch % 16 == 0;
 +#else
 +   bool can_stream_map = false;
  #endif
 -   } else {
 -  assert(mode  GL_MAP_WRITE_BIT == 0);
 -  assert(!mt-compressed);
 +
 +   if (can_stream_map) {
 +  /* BENCHMARK_ME: GTT maps for non-llc */
 +  intel_miptree_map_movntdqa(brw, mt, map, level, slice);
 +  goto done;
 +   }

Just put this block inside the #if defined(USE_SSE41) where
can_stream_map is set and remove the abort(). I don't see any
advantage of separating them.
___

Re: [Mesa-dev] [PATCH v2] glsl: avoid compiler's segfault when processing operators with void arguments

2015-07-16 Thread Samuel Iglesias Gonsálvez


On 13/07/15 13:06, Samuel Iglesias Gonsálvez wrote:
 On 11/07/15 19:38, Renaud Gaubert wrote:
 This is done by returning an rvalue of type void in the
 ast_function_expression::hir function instead of a void expression.

 This produces (in the case of the ternary) an hir with a call
 to the void returning function and an assignment of a void variable
 which will be optimized out (the assignment) during the optimization
 pass.

 This fix results in having a valid subexpression in the many
 different cases where the subexpressions are functions whose
 return values are void.

 Thus preventing to dereference NULL in the following cases:
   * binary operator
   * unary operators
   * ternary operator
   * comparison operators (except equal and nequal operator)

 Equal and nequal had to be handled as a special case because
 instead of segfaulting on a forbidden syntax it was now accepting
 expressions with a void return value on either (or both) side of
 the expression.

 Signed-off-by: Renaud Gaubert ren...@lse.epita.fr
 Reviewed-by: Gabriel Laskar gabr...@lse.epita.fr
 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=85252

 ---
 Piglit tests were sent to the Piglit mailing list:
   * glsl-1.10 Adds tests on how void functions are handled

  src/glsl/ast_function.cpp | 9 -
  src/glsl/ast_to_hir.cpp   | 9 -
  2 files changed, 16 insertions(+), 2 deletions(-)

 diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
 index 92e26bf..ac32723 100644
 --- a/src/glsl/ast_function.cpp
 +++ b/src/glsl/ast_function.cpp
 @@ -1785,7 +1785,14 @@ ast_function_expression::hir(exec_list *instructions,
   /* an error has already been emitted */
   value = ir_rvalue::error_value(ctx);
} else {
 - value = generate_call(instructions, sig, actual_parameters, state);
 +value = generate_call(instructions, sig, actual_parameters, state);
 +if (!value) {
 +   ir_variable *const tmp = new(ctx) 
 ir_variable(glsl_type::void_type,
 + void_var,
 + ir_var_temporary);
 +   instructions-push_tail(tmp);
 +   value = new(ctx) ir_dereference_variable(tmp);
 +}
 
 Indention. You used two spaces indention instead of three.
 
 With that fixed,
 
Reviewed-by: Samuel Iglesias Gonsálvez sigles...@igalia.com
 
 If you don't have commit rights, I can fix the indention, add the R-b
 and push the patch to master (no need of v3 patch). Just tell me to do
 so (*).
 

Pushed! :)

Sam

 Thanks for your contribution!
 
 Sam
 
 (*) Also for your piglit patch. If nobody has pushed your piglit patch
 yet, I can do it.
 
}
  
return value;
 diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp
 index 8cb46be..0d0ad2a 100644
 --- a/src/glsl/ast_to_hir.cpp
 +++ b/src/glsl/ast_to_hir.cpp
 @@ -1270,7 +1270,14 @@ ast_expression::do_hir(exec_list *instructions,
 *applied to one operand that can make them match, in which
 *case this conversion is done.
 */
 -  if ((!apply_implicit_conversion(op[0]-type, op[1], state)
 +
 +  if (op[0]-type == glsl_type::void_type || op[1]-type == 
 glsl_type::void_type) {
 + _mesa_glsl_error( loc, state, `%s':  wrong operand types: 
 + no operation `%1$s' exists that takes a left-hand 
 
 + operand of type 'void' or a right operand of type 
 
 + 'void', (this-oper == ast_equal) ? == : !=);
 + error_emitted = true;
 +  } else if ((!apply_implicit_conversion(op[0]-type, op[1], state)
  !apply_implicit_conversion(op[1]-type, op[0], state))
|| (op[0]-type != op[1]-type)) {
   _mesa_glsl_error( loc, state, operands of `%s' must have the 
 same 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Register spilling issues in the NIR-vec4 backend

2015-07-16 Thread Iago Toral
On Wed, 2015-07-15 at 11:02 -0700, Connor Abbott wrote:
 On Wed, Jul 15, 2015 at 7:49 AM, Iago Toral ito...@igalia.com wrote:
  Hi,
 
  when we sent the patches for the new nir-vec4 backend we mentioned that
  we had a few dEQP tests that would fail to link because of register
  spilling. Now that we have added GS support we see a few instances of
  this problem popping up in a few GS piglit tests too, for example this
  one:
 
  tests/spec/glsl-1.50/execution/variable-indexing/gs-input-array-vec4-index-rd.shader_test
 
  I have been looking into what is going on with these tests and I came to
  the conclusion that the problem is a consequence of various factors, but
  probably the main thing contributing to it is the way our SSA pass
  works. That said, I am not that experienced with NIR, so it could also
  be that my analysis is missing something and I am just arriving to wrong
  conclusions, so I'll explain my thoughts below and hopefully someone
  else with more NIR experience can jump in and confirm or reject my
  analysis.
 
  The GS code in that test looks like this:
 
  for (int p = 0; p  3; p++) {
 color = ((index = ins[p].m1.length() ?
  ins[p].m2[index-ins[p].m1.length()] :
  ins[p].m1[index]) == expect) ?
 vec4(0.0, 1.0, 0.0, 1.0) : vec4(1.0, 0.0, 0.0, 1.0);
 gl_Position = gl_in[p].gl_Position;
 EmitVertex();
  }
 
  One thing that is immediately contributing to the register pressure is
  some really awful code generated because of the indirect array indexing
  on the inputs inside the loop. This is because of the
  lower_variable_index_to_cond_assign lowering pass called from
  brw_shader.cpp. This pass will convert that color assignment into a
  bunch of nested if/else statements which makes the generated GLSL IR
  code rather large, involving plenty of temporaries too. This is only
  made worse by the fact that loop unrolling will replicate that 3 times.
  The result is a huge pile of GLSL IR with a few dozens of nested if/else
  statements and temporaries that looks like [1] (that is only a fragment
  of the GLSL IR).
 
  One thing that is particularly relevant in that code is that it has
  multiple conditional assignments to the same variable
  (dereference_array_value) as a consequence of this lowering pass.
 
  That much, however, is common to the NIR and non-NIR paths. The problem
  in the NIR case is that all these assignments generate new SSA values,
  which then become new registers in the final NIR form. This leads to NIR
  code like [2].  In contrast, the old vec4 visitor path, is able to have
  writes to the same variable write to the same register.
 
  As a result, if I print the code right before register allocation in the
  NIR path [3] and I compare that to what we get with the old vec4 visitor
  path at that same point [4], it is clearly visible that this difference
  is allowing the vec4 visitor path to reduce register pressure (see how
  in [4] we have multiple writes to vgrf5, while in [3] we always write to
  a new vgrf every time).
 
  So, am I missing something or is this kind of result expected with NIR
  programs? Is there anything in the nir-vec4 pass that we can do to fix
  this or does this need to be fixed when going out of SSA moe inside NIR?
 
  Iago
 
  [1] http://pastebin.com/5uA8ex2S
  [2] http://pastebin.com/pqLfvAVN
  [3] http://pastebin.com/64nSuUH8
  [4] http://pastebin.com/WCrdYxzt
 
  ___
  mesa-dev mailing list
  mesa-dev@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 
 Hi Iago,
 
 Indeed, NIR does convert conditional writes to conditional selectss --
 it's a required part of the conversion to SSA, and since our HW has a
 conditional select instruction that's just as fast as doing a
 conditional move, we haven't bothered much to try and change it back
 during out-of-SSA. However, doing this shouldn't make things worse. In
 your example, vgrf9, vgrf15, and vgrf17 all have very short live
 intervals and don't interfere with vgrf11 (unless there's another use
 of them somewhere after the snippet you pasted), which means that the
 register allocator is free to allocate the destinations of all the
 selects to the same register.
 
 What's happening, though, is that you're running into our terrible
 liveness analysis. After doing the proper liveness analysis, we figure
 out the place each register first becomes live and last becomes dead,
 and then we consider registers that have overlapping ranges to
 interfere. So we consider vgrf11 to interfere with vgrf15 and vgrf17,
 even though it really doesn't. The trouble with making it do the right
 thing is that we may actually need to extend the live ranges of
 registers when the exec masks don't match up, either because one uses
 writemask_all or because they have incompatible exec masks due to
 containing different datatypes (half-float vs. float, etc.). For
 example, in your snippet, 

Re: [Mesa-dev] [PATCH] i965/nir/fs: removed unneeded support for global variables

2015-07-16 Thread Alejandro Piñeiro
Ping. Any possibility of a patch review? FWIW, the patch itself is
pretty small.


On 26/06/15 13:47, Alejandro Piñeiro wrote:
 As functions are inlined, and nir_lower_global_vars_to_local gets
 run, all global variables are lowered to local variables.
 ---

 Jason Enkstrand already confirmed that global support is not needed
 on the bug open for the nir/vec4 support:
 https://bugs.freedesktop.org/show_bug.cgi?id=89580#c9

 So this patch just apply that answer to the fs path. 

 Full piglit run. No regressions.

  src/mesa/drivers/dri/i965/brw_fs.h   |  1 -
  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 14 ++
  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |  1 -
  3 files changed, 2 insertions(+), 14 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
 b/src/mesa/drivers/dri/i965/brw_fs.h
 index 243baf6..c49d0f8 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs.h
 +++ b/src/mesa/drivers/dri/i965/brw_fs.h
 @@ -345,7 +345,6 @@ public:
 unsigned max_grf;
  
 fs_reg *nir_locals;
 -   fs_reg *nir_globals;
 fs_reg nir_inputs;
 fs_reg nir_outputs;
 fs_reg *nir_system_values;
 diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
 b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
 index 59081ea..a648a5a 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
 @@ -55,14 +55,6 @@ fs_visitor::emit_nir_code()
  
 nir_emit_system_values(nir);
  
 -   nir_globals = ralloc_array(mem_ctx, fs_reg, nir-reg_alloc);
 -   foreach_list_typed(nir_register, reg, node, nir-registers) {
 -  unsigned array_elems =
 - reg-num_array_elems == 0 ? 1 : reg-num_array_elems;
 -  unsigned size = array_elems * reg-num_components;
 -  nir_globals[reg-index] = bld.vgrf(BRW_REGISTER_TYPE_F, size);
 -   }
 -
 /* get the main function and emit it */
 nir_foreach_overload(nir, overload) {
assert(strcmp(overload-function-name, main) == 0);
 @@ -1151,10 +1143,8 @@ fs_reg_for_nir_reg(fs_visitor *v, nir_register 
 *nir_reg,
 unsigned base_offset, nir_src *indirect)
  {
 fs_reg reg;
 -   if (nir_reg-is_global)
 -  reg = v-nir_globals[nir_reg-index];
 -   else
 -  reg = v-nir_locals[nir_reg-index];
 +
 +   reg = v-nir_locals[nir_reg-index];
  
 reg = offset(reg, base_offset * nir_reg-num_components);
 if (indirect) {
 diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
 b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 index 9a4bad6..90d5706 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 @@ -2012,7 +2012,6 @@ fs_visitor::fs_visitor(const struct brw_compiler 
 *compiler, void *log_data,
 this-no16_msg = NULL;
  
 this-nir_locals = NULL;
 -   this-nir_globals = NULL;
  
 memset(this-payload, 0, sizeof(this-payload));
 memset(this-outputs, 0, sizeof(this-outputs));

-- 
Alejandro Piñeiro (apinhe...@igalia.com)

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Fix comment about DRM_IOCTL_I915_GEM_WAIT.

2015-07-16 Thread Daniel Vetter
On Wed, Jul 15, 2015 at 12:20:15PM -0700, Kristian Høgsberg wrote:
 On Wed, Jul 15, 2015 at 10:22 AM, Kenneth Graunke kenn...@whitecape.org 
 wrote:
  From: Chris Wilson ch...@chris-wilson.co.uk
 
  The kernel actually waits forever when supplied a timeout value  0,
  rather than returning immediately.  See i915_gem_wait_ioctl() in
  i915_gem.c's call to __i915_wait_request().
 
  (split by Ken from a large patch authored by Chris Wilson)
 
  Reviewed-by: Kenneth Graunke kenn...@whitecape.org
  ---
   src/mesa/drivers/dri/i965/intel_syncobj.c | 6 +++---
   1 file changed, 3 insertions(+), 3 deletions(-)
 
  diff --git a/src/mesa/drivers/dri/i965/intel_syncobj.c 
  b/src/mesa/drivers/dri/i965/intel_syncobj.c
  index c44c4be..c2f4fa9 100644
  --- a/src/mesa/drivers/dri/i965/intel_syncobj.c
  +++ b/src/mesa/drivers/dri/i965/intel_syncobj.c
  @@ -105,9 +105,9 @@ brw_fence_client_wait(struct brw_context *brw, struct 
  brw_fence *fence,
  assert(fence-batch_bo);
 
  /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and returns
  -* immediately for timeouts = 0.  The best we can do is to clamp the
  -* timeout to INT64_MAX.  This limits the maximum timeout from 584 
  years to
  -* 292 years - likely not a big deal.
  +* immediately for timeout == 0, and indefinitely if timeout is 
  negative.
  +* The best we can do is to clamp the timeout to INT64_MAX.  This limits
  +* the maximum timeout from 584 years to 292 years - likely not a big 
  deal.
 
 No, there are kernel versions in the wild that has this bug. The
 comment after the patch says:
 
  /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and returns
  * immediately for timeout == 0, and indefinitely if timeout is negative.
  * The best we can do is to clamp the timeout to INT64_MAX.  This limits
  * the maximum timeout from 584 years to 292 years - likely not a big deal.
 
 which doesn't make sense. If we feel like we need to point out that
 we've fixed the bug, that's fine, but keep the part about how some
 kernels are broken so it's clear why the workaround is needed.

That fixed got cc: stable'ed. If you're still running broken kernels you
don't care enough about anything that I don't think this would matter
either. It was just that it took us a few releases to spot this.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] st/dri: expose sRGB visuals (v2)

2015-07-16 Thread Emil Velikov
On 12 July 2015 at 20:10, Marek Olšák mar...@gmail.com wrote:
 From: Marek Olšák marek.ol...@amd.com

 v2: The fix for the darkness in Ubuntu Unity is in the hunk
 with the 4-line comment.
 ---
  src/gallium/state_trackers/dri/dri2.c |  2 ++
  src/gallium/state_trackers/dri/dri_drawable.c |  7 ++-
  src/gallium/state_trackers/dri/dri_screen.c   | 23 +++
  src/mesa/state_tracker/st_manager.c   |  1 +
  4 files changed, 28 insertions(+), 5 deletions(-)

 diff --git a/src/gallium/state_trackers/dri/dri2.c 
 b/src/gallium/state_trackers/dri/dri2.c
 index 1eda036..5aa785c 100644
 --- a/src/gallium/state_trackers/dri/dri2.c
 +++ b/src/gallium/state_trackers/dri/dri2.c
 @@ -189,9 +189,11 @@ dri2_drawable_get_buffers(struct dri_drawable *drawable,
 */
switch(format) {
case PIPE_FORMAT_B8G8R8A8_UNORM:
 +  case PIPE_FORMAT_B8G8R8A8_SRGB:
  depth = 32;
  break;
case PIPE_FORMAT_B8G8R8X8_UNORM:
 +  case PIPE_FORMAT_B8G8R8X8_SRGB:
  depth = 24;
  break;
case PIPE_FORMAT_B5G6R5_UNORM:
Have you looked what it'll take for get __DRI_IMAGE (dri3?) going ?
From a quick look we need a couple extra defines, and some minimal
changes in the st/dri2.

__DRI_SWRAST on the other hand, might not be as straightforward.

Note that I'm not suggesting that you do either of these, merely curious.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] st/dri: enable 3D textures and sRGB colorspace for EGL

2015-07-16 Thread Emil Velikov
On 12 July 2015 at 20:10, Marek Olšák mar...@gmail.com wrote:
 From: Anatoli Antonovitch anatoli.antonovi...@amd.com

 ---
  src/gallium/state_trackers/dri/dri_query_renderer.c | 14 ++
  1 file changed, 14 insertions(+)

 diff --git a/src/gallium/state_trackers/dri/dri_query_renderer.c 
 b/src/gallium/state_trackers/dri/dri_query_renderer.c
 index 4a28ac3..ea31b6c 100644
 --- a/src/gallium/state_trackers/dri/dri_query_renderer.c
 +++ b/src/gallium/state_trackers/dri/dri_query_renderer.c
 @@ -42,6 +42,20 @@ dri2_query_renderer_integer(__DRIscreen *_screen, int 
 param,
PIPE_CAP_UMA);
return 0;

 +   case __DRI2_RENDERER_HAS_TEXTURE_3D:
 +  value[0] =
 + screen-base.screen-get_param(screen-base.screen,
 +PIPE_CAP_MAX_TEXTURE_3D_LEVELS) != 0;
 +  return 0;
 +
 +   case __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB:
 +  value[0] =
 + screen-base.screen-is_format_supported(screen-base.screen,
 +  PIPE_FORMAT_B8G8R8A8_SRGB,
From a quick look some drivers that handle PIPE_FORMAT_B8G8R8A8_SRGB
are not so keen on PIPE_FORMAT_B8G8R8X8_SRGB. Should the state-tracker
handle/have a fall-back for this (set the alpha channel to max and use
the former format), or it is a driver decision ?

Cheers,
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] st/dri: expose sRGB visuals (v2)

2015-07-16 Thread Marek Olšák
On Thu, Jul 16, 2015 at 3:06 PM, Emil Velikov emil.l.veli...@gmail.com wrote:
 On 12 July 2015 at 20:10, Marek Olšák mar...@gmail.com wrote:
 From: Marek Olšák marek.ol...@amd.com

 v2: The fix for the darkness in Ubuntu Unity is in the hunk
 with the 4-line comment.
 ---
  src/gallium/state_trackers/dri/dri2.c |  2 ++
  src/gallium/state_trackers/dri/dri_drawable.c |  7 ++-
  src/gallium/state_trackers/dri/dri_screen.c   | 23 +++
  src/mesa/state_tracker/st_manager.c   |  1 +
  4 files changed, 28 insertions(+), 5 deletions(-)

 diff --git a/src/gallium/state_trackers/dri/dri2.c 
 b/src/gallium/state_trackers/dri/dri2.c
 index 1eda036..5aa785c 100644
 --- a/src/gallium/state_trackers/dri/dri2.c
 +++ b/src/gallium/state_trackers/dri/dri2.c
 @@ -189,9 +189,11 @@ dri2_drawable_get_buffers(struct dri_drawable *drawable,
 */
switch(format) {
case PIPE_FORMAT_B8G8R8A8_UNORM:
 +  case PIPE_FORMAT_B8G8R8A8_SRGB:
  depth = 32;
  break;
case PIPE_FORMAT_B8G8R8X8_UNORM:
 +  case PIPE_FORMAT_B8G8R8X8_SRGB:
  depth = 24;
  break;
case PIPE_FORMAT_B5G6R5_UNORM:
 Have you looked what it'll take for get __DRI_IMAGE (dri3?) going ?
 From a quick look we need a couple extra defines, and some minimal
 changes in the st/dri2.

I have really no idea. It shouldn't be difficult though. I had thought
Axel Davy had implemented __DRI_IMAGE, so I don't know.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/4] egl: implement EGL_KHR_gl_colorspace (v2)

2015-07-16 Thread Emil Velikov
On 12 July 2015 at 20:10, Marek Olšák mar...@gmail.com wrote:
 From: Marek Olšák marek.ol...@amd.com

 v2: add missing break

 --- a/src/egl/drivers/dri2/egl_dri2.h
 +++ b/src/egl/drivers/dri2/egl_dri2.h
 @@ -285,6 +285,8 @@ struct dri2_egl_config
 _EGLConfig base;
 const __DRIconfig *dri_single_config;
 const __DRIconfig *dri_double_config;
 +   const __DRIconfig *dri_srgb_single_config;
 +   const __DRIconfig *dri_srgb_double_config;
Nitpick: If you change the current configs to two-sized arrays, one
will be able simplify the conditionals.
So common things like

if (srgb)
   conf-dri_srgb_single_config = dri_config;
else
   conf-dri_single_config = dri_config;

will become

conf-dri_srgb_single_config[srgb] = dri_config;


 --- a/src/egl/drivers/dri2/platform_android.c
 +++ b/src/egl/drivers/dri2/platform_android.c

 @@ -230,6 +231,9 @@ droid_create_surface(_EGLDriver *drv, _EGLDisplay *disp, 
 EGLint type,
window-query(window, NATIVE_WINDOW_HEIGHT, dri2_surf-base.Height);
 }

 +   config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
 +dri2_surf-base.GLColorspace);
 +
 dri2_surf-dri_drawable =
(*dri2_dpy-dri2-createNewDrawable)(dri2_dpy-dri_screen,
dri2_conf-dri_double_config,
Please use the new config in the above call.

Thanks sorting out the android platform !

Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] c99_math: Implement exp2f for MSVC.

2015-07-16 Thread Jose Fonseca

On 16/07/15 05:30, Matt Turner wrote:

---
This will go in before my double promotion series which uses exp2f.

  include/c99_math.h | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/include/c99_math.h b/include/c99_math.h
index 7ed7cc2..0ca5a73 100644
--- a/include/c99_math.h
+++ b/include/c99_math.h
@@ -140,6 +140,12 @@ llrintf(float f)
 return rounded;
  }

+static inline float
+exp2f(float f)
+{
+   return powf(2.0f, f);
+}
+
  #endif /* C99 */





Looks good. Thanks.

Reviewed-by: Jose Fonseca jfons...@vmware.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/4] st/dri: enable 3D textures and sRGB colorspace for EGL

2015-07-16 Thread Marek Olšák
On Thu, Jul 16, 2015 at 3:14 PM, Emil Velikov emil.l.veli...@gmail.com wrote:
 On 12 July 2015 at 20:10, Marek Olšák mar...@gmail.com wrote:
 From: Anatoli Antonovitch anatoli.antonovi...@amd.com

 ---
  src/gallium/state_trackers/dri/dri_query_renderer.c | 14 ++
  1 file changed, 14 insertions(+)

 diff --git a/src/gallium/state_trackers/dri/dri_query_renderer.c 
 b/src/gallium/state_trackers/dri/dri_query_renderer.c
 index 4a28ac3..ea31b6c 100644
 --- a/src/gallium/state_trackers/dri/dri_query_renderer.c
 +++ b/src/gallium/state_trackers/dri/dri_query_renderer.c
 @@ -42,6 +42,20 @@ dri2_query_renderer_integer(__DRIscreen *_screen, int 
 param,
PIPE_CAP_UMA);
return 0;

 +   case __DRI2_RENDERER_HAS_TEXTURE_3D:
 +  value[0] =
 + screen-base.screen-get_param(screen-base.screen,
 +PIPE_CAP_MAX_TEXTURE_3D_LEVELS) != 
 0;
 +  return 0;
 +
 +   case __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB:
 +  value[0] =
 + screen-base.screen-is_format_supported(screen-base.screen,
 +  PIPE_FORMAT_B8G8R8A8_SRGB,
 From a quick look some drivers that handle PIPE_FORMAT_B8G8R8A8_SRGB
 are not so keen on PIPE_FORMAT_B8G8R8X8_SRGB. Should the state-tracker
 handle/have a fall-back for this (set the alpha channel to max and use
 the former format), or it is a driver decision ?

If a driver doesn't support PIPE_FORMAT_B8G8R8X8_SRGB, such a DRI
config isn't even created, so I think the driver cannot receive a
visual requesting that format.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Seeking advice speeding up glsl optimisation passes for AoA

2015-07-16 Thread Timothy Arceri
On Wed, 2015-07-15 at 11:53 -0700, Eric Anholt wrote:
 Timothy Arceri t_arc...@yahoo.com.au writes:
 
  Hi guys,
  
  As I've mentioned a couple of times in previous patches some of the cts 
  AoA
  tests are taking very long time to compile. This is due to excessive
  optimisation passes mainly in the glsl optimisations (there are some 
  slowdowns
  in the intel backend too but these seemed to go away when I tried the new 
  nir
  vec4 backend).
  
  I fixed part of the problem with this patch to do the dead code 
  elimination in
  a single pass [1]. 
  These excessive passes exist in normal shaders but its generally not an 
  issue
  as the number of passes is generally quite low, and inexpensive. However 
  when
  you have an 8 dimensional array constantly walking this becomes quite
  expensive.
  
  The remaining issue I'm seeking some advice for is with constant
  propagation/folding.
  
  It seems for interators used in loops you can get into a situation where 
  an
  optimisation pass is needed for each loop iteration in order to make all
  values of the iterator constant.
  
  I didn't have look too find some real world examples of this in the public
  shader-db. For example here is it happening for a Unity shader:
 
 How about if we just disable the GLSL IR constant prop pass when NIR is
 enabled?

Thanks for the suggestion, I was hoping that would be the answer but seems we
can't do it yet. However I seem to have a solution to my problem which was
quite simple in the end.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/14] mesa: overhaul the glGetTexImage code

2015-07-16 Thread Brian Paul

On 07/15/2015 07:15 PM, Ilia Mirkin wrote:

The reason why you chose to call select_tex_image() several times
through a get_tex_image flow eludes me. Why not just get it once at
the beginning and pass it around? The old APIs also used a
gl_texture_image and not a gl_texture_object. (Obviously the object is
implicitly gettable from the image when necessary.)


It's kind of arbitrary, but I leaned toward passing a gl_texture_object 
and level instead.


Passing a gl_texture_image to getteximage_error_check(), for example, 
wouldn't allow us to get rid of the 'level' parameter since it still 
needs to be error-checked.  We could return a gl_texture_image from 
get_texture_image_dims() but it would be null in a number of situations. 
 Some situations would require raising GL_INVALID_VALUE, others 
GL_INVALID_OPERATION.


I was also hesitant to pass around a gl_texture_image pointer because in 
the case of non-array cube maps we actually care about six 
gl_texture_images.  When the code still passed an image I was often 
asking myself is this the positive x image or one specified by target 
or one specified by zoffset?  I found it easier to understand when I 
switched to a gl_texture_object.


select_tex_image() is a cheap call.  Are you concerned about the cost of 
calling it?


-Brian

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] st/dri: expose sRGB visuals (v2)

2015-07-16 Thread Emil Velikov
On 16/07/15 14:38, Marek Olšák wrote:
 On Thu, Jul 16, 2015 at 3:06 PM, Emil Velikov emil.l.veli...@gmail.com 
 wrote:
 On 12 July 2015 at 20:10, Marek Olšák mar...@gmail.com wrote:
 From: Marek Olšák marek.ol...@amd.com

 v2: The fix for the darkness in Ubuntu Unity is in the hunk
 with the 4-line comment.
 ---
  src/gallium/state_trackers/dri/dri2.c |  2 ++
  src/gallium/state_trackers/dri/dri_drawable.c |  7 ++-
  src/gallium/state_trackers/dri/dri_screen.c   | 23 +++
  src/mesa/state_tracker/st_manager.c   |  1 +
  4 files changed, 28 insertions(+), 5 deletions(-)

 diff --git a/src/gallium/state_trackers/dri/dri2.c 
 b/src/gallium/state_trackers/dri/dri2.c
 index 1eda036..5aa785c 100644
 --- a/src/gallium/state_trackers/dri/dri2.c
 +++ b/src/gallium/state_trackers/dri/dri2.c
 @@ -189,9 +189,11 @@ dri2_drawable_get_buffers(struct dri_drawable 
 *drawable,
 */
switch(format) {
case PIPE_FORMAT_B8G8R8A8_UNORM:
 +  case PIPE_FORMAT_B8G8R8A8_SRGB:
  depth = 32;
  break;
case PIPE_FORMAT_B8G8R8X8_UNORM:
 +  case PIPE_FORMAT_B8G8R8X8_SRGB:
  depth = 24;
  break;
case PIPE_FORMAT_B5G6R5_UNORM:
 Have you looked what it'll take for get __DRI_IMAGE (dri3?) going ?
 From a quick look we need a couple extra defines, and some minimal
 changes in the st/dri2.
 
 I have really no idea. It shouldn't be difficult though. I had thought
 Axel Davy had implemented __DRI_IMAGE, so I don't know.
 
True he has, but there is a bit of divergense. The above path is hit for
dri2 drivers. The __DRI_IMAGE equivalent is dri_image_drawable_get_buffers.

-Emil

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] st/dri: expose sRGB visuals (v2)

2015-07-16 Thread Marek Olšák
I'm not familiar with DRI3, but it looks like sRGB visuals should work
with DRI3 too.

Now that I think about it, I can drop the change in
dri2_drawable_get_buffers, because it's not needed with the change in
dri_drawable_get_format.

Marek

On Thu, Jul 16, 2015 at 4:08 PM, Emil Velikov emil.l.veli...@gmail.com wrote:
 On 16/07/15 14:38, Marek Olšák wrote:
 On Thu, Jul 16, 2015 at 3:06 PM, Emil Velikov emil.l.veli...@gmail.com 
 wrote:
 On 12 July 2015 at 20:10, Marek Olšák mar...@gmail.com wrote:
 From: Marek Olšák marek.ol...@amd.com

 v2: The fix for the darkness in Ubuntu Unity is in the hunk
 with the 4-line comment.
 ---
  src/gallium/state_trackers/dri/dri2.c |  2 ++
  src/gallium/state_trackers/dri/dri_drawable.c |  7 ++-
  src/gallium/state_trackers/dri/dri_screen.c   | 23 +++
  src/mesa/state_tracker/st_manager.c   |  1 +
  4 files changed, 28 insertions(+), 5 deletions(-)

 diff --git a/src/gallium/state_trackers/dri/dri2.c 
 b/src/gallium/state_trackers/dri/dri2.c
 index 1eda036..5aa785c 100644
 --- a/src/gallium/state_trackers/dri/dri2.c
 +++ b/src/gallium/state_trackers/dri/dri2.c
 @@ -189,9 +189,11 @@ dri2_drawable_get_buffers(struct dri_drawable 
 *drawable,
 */
switch(format) {
case PIPE_FORMAT_B8G8R8A8_UNORM:
 +  case PIPE_FORMAT_B8G8R8A8_SRGB:
  depth = 32;
  break;
case PIPE_FORMAT_B8G8R8X8_UNORM:
 +  case PIPE_FORMAT_B8G8R8X8_SRGB:
  depth = 24;
  break;
case PIPE_FORMAT_B5G6R5_UNORM:
 Have you looked what it'll take for get __DRI_IMAGE (dri3?) going ?
 From a quick look we need a couple extra defines, and some minimal
 changes in the st/dri2.

 I have really no idea. It shouldn't be difficult though. I had thought
 Axel Davy had implemented __DRI_IMAGE, so I don't know.

 True he has, but there is a bit of divergense. The above path is hit for
 dri2 drivers. The __DRI_IMAGE equivalent is dri_image_drawable_get_buffers.

 -Emil

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/4] i965/fs: Fix return value of fs_inst::regs_read() for BAD_FILE.

2015-07-16 Thread Francisco Jerez
Typically BAD_FILE sources are used to mark a source as not present
what implies that no registers are read.  This will become much more
frequent with logical send opcodes which have a large number of
sources, many of them optionally used and marked as BAD_FILE when they
aren't applicable.  It will prove to be useful to be able to rely on
the value of regs_read() regardless of whether a source is present or
not.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 09ff587..d031352 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -709,6 +709,7 @@ fs_inst::regs_read(int arg) const
 
switch (src[arg].file) {
case BAD_FILE:
+  return 0;
case UNIFORM:
case IMM:
   return 1;
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Register spilling issues in the NIR-vec4 backend

2015-07-16 Thread Jason Ekstrand
On Jul 15, 2015 11:20 PM, Iago Toral ito...@igalia.com wrote:

 On Wed, 2015-07-15 at 11:02 -0700, Connor Abbott wrote:
  On Wed, Jul 15, 2015 at 7:49 AM, Iago Toral ito...@igalia.com wrote:
   Hi,
  
   when we sent the patches for the new nir-vec4 backend we mentioned
that
   we had a few dEQP tests that would fail to link because of register
   spilling. Now that we have added GS support we see a few instances of
   this problem popping up in a few GS piglit tests too, for example this
   one:
  
  
tests/spec/glsl-1.50/execution/variable-indexing/gs-input-array-vec4-index-rd.shader_test
  
   I have been looking into what is going on with these tests and I came
to
   the conclusion that the problem is a consequence of various factors,
but
   probably the main thing contributing to it is the way our SSA pass
   works. That said, I am not that experienced with NIR, so it could also
   be that my analysis is missing something and I am just arriving to
wrong
   conclusions, so I'll explain my thoughts below and hopefully someone
   else with more NIR experience can jump in and confirm or reject my
   analysis.
  
   The GS code in that test looks like this:
  
   for (int p = 0; p  3; p++) {
  color = ((index = ins[p].m1.length() ?
   ins[p].m2[index-ins[p].m1.length()] :
   ins[p].m1[index]) == expect) ?
  vec4(0.0, 1.0, 0.0, 1.0) : vec4(1.0, 0.0, 0.0, 1.0);
  gl_Position = gl_in[p].gl_Position;
  EmitVertex();
   }
  
   One thing that is immediately contributing to the register pressure is
   some really awful code generated because of the indirect array
indexing
   on the inputs inside the loop. This is because of the
   lower_variable_index_to_cond_assign lowering pass called from
   brw_shader.cpp. This pass will convert that color assignment into a
   bunch of nested if/else statements which makes the generated GLSL IR
   code rather large, involving plenty of temporaries too. This is only
   made worse by the fact that loop unrolling will replicate that 3
times.
   The result is a huge pile of GLSL IR with a few dozens of nested
if/else
   statements and temporaries that looks like [1] (that is only a
fragment
   of the GLSL IR).
  
   One thing that is particularly relevant in that code is that it has
   multiple conditional assignments to the same variable
   (dereference_array_value) as a consequence of this lowering pass.
  
   That much, however, is common to the NIR and non-NIR paths. The
problem
   in the NIR case is that all these assignments generate new SSA values,
   which then become new registers in the final NIR form. This leads to
NIR
   code like [2].  In contrast, the old vec4 visitor path, is able to
have
   writes to the same variable write to the same register.
  
   As a result, if I print the code right before register allocation in
the
   NIR path [3] and I compare that to what we get with the old vec4
visitor
   path at that same point [4], it is clearly visible that this
difference
   is allowing the vec4 visitor path to reduce register pressure (see how
   in [4] we have multiple writes to vgrf5, while in [3] we always write
to
   a new vgrf every time).
  
   So, am I missing something or is this kind of result expected with NIR
   programs? Is there anything in the nir-vec4 pass that we can do to
fix
   this or does this need to be fixed when going out of SSA moe inside
NIR?
  
   Iago
  
   [1] http://pastebin.com/5uA8ex2S
   [2] http://pastebin.com/pqLfvAVN
   [3] http://pastebin.com/64nSuUH8
   [4] http://pastebin.com/WCrdYxzt
  
   ___
   mesa-dev mailing list
   mesa-dev@lists.freedesktop.org
   http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 
  Hi Iago,
 
  Indeed, NIR does convert conditional writes to conditional selectss --
  it's a required part of the conversion to SSA, and since our HW has a
  conditional select instruction that's just as fast as doing a
  conditional move, we haven't bothered much to try and change it back
  during out-of-SSA. However, doing this shouldn't make things worse. In
  your example, vgrf9, vgrf15, and vgrf17 all have very short live
  intervals and don't interfere with vgrf11 (unless there's another use
  of them somewhere after the snippet you pasted), which means that the
  register allocator is free to allocate the destinations of all the
  selects to the same register.
 
  What's happening, though, is that you're running into our terrible
  liveness analysis. After doing the proper liveness analysis, we figure
  out the place each register first becomes live and last becomes dead,
  and then we consider registers that have overlapping ranges to
  interfere. So we consider vgrf11 to interfere with vgrf15 and vgrf17,
  even though it really doesn't. The trouble with making it do the right
  thing is that we may actually need to extend the live ranges of
  registers when the exec masks don't match up, either because one uses
  

[Mesa-dev] [PATCH 1/3] i965/fs: Fix stride for immediate registers.

2015-07-16 Thread Francisco Jerez
When the width field was removed from fs_reg the BROADCAST handling
code in opt_algebraic() started to miss a number of trivial
optimization cases resulting in the ugly indirect-addressing sequence
to be emitted unnecessarily for some variable-indexed texturing and
UBO loads regardless of one of the sources of BROADCAST being
immediate.  Apparently the reason was that we were setting the stride
field to one for immediates even though they are typically uniform.
Width used to be set to one too which is why this optimization used to
work previously until the reg.width == 1 check was removed.

The stride field of vector immediates is intentionally left equal to
one, because they are strictly speaking not uniform.  The assertion in
fs_generator makes sure that immediates have the expected stride as
consistency check.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp   | 3 +++
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 4 
 2 files changed, 7 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index ff0675d..537ccbe 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -362,6 +362,7 @@ fs_reg::fs_reg(float f)
init();
this-file = IMM;
this-type = BRW_REGISTER_TYPE_F;
+   this-stride = 0;
this-fixed_hw_reg.dw1.f = f;
 }
 
@@ -371,6 +372,7 @@ fs_reg::fs_reg(int32_t i)
init();
this-file = IMM;
this-type = BRW_REGISTER_TYPE_D;
+   this-stride = 0;
this-fixed_hw_reg.dw1.d = i;
 }
 
@@ -380,6 +382,7 @@ fs_reg::fs_reg(uint32_t u)
init();
this-file = IMM;
this-type = BRW_REGISTER_TYPE_UD;
+   this-stride = 0;
this-fixed_hw_reg.dw1.ud = u;
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index bae7216..8a3af47 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -79,6 +79,10 @@ brw_reg_from_fs_reg(fs_inst *inst, fs_reg *reg)
   brw_reg = byte_offset(brw_reg, reg-subreg_offset);
   break;
case IMM:
+  assert(reg-stride == (reg-type == BRW_REGISTER_TYPE_V ||
+ reg-type == BRW_REGISTER_TYPE_UV ||
+ reg-type == BRW_REGISTER_TYPE_VF ? 1 : 0));
+
   switch (reg-type) {
   case BRW_REGISTER_TYPE_F:
 brw_reg = brw_imm_f(reg-fixed_hw_reg.dw1.f);
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] i965: Fix stride field for the result of emit_uniformize().

2015-07-16 Thread Francisco Jerez
This is essentially the same problem fixed in an earlier patch for
immediates.  Setting the stride to zero will be particularly useful
for my future SIMD lowering pass, because we will be able to just
check whether the stride of a source register is zero and skip
emitting the copies required to unzip it in that case.

Instead of setting stride to zero in every caller of emit_uniformize()
I've changed the function to return the result as its return value
(previously it was being written into a caller-provided destination
register), because this way we can enforce that the result is used with
the correct regioning from the function itself.

The changes to the prototype of its VEC4 counterpart are mainly for
the sake of symmetry, VEC4 registers don't have stride.
---
 src/mesa/drivers/dri/i965/brw_fs_builder.h | 16 +---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp   |  4 ++--
 src/mesa/drivers/dri/i965/brw_vec4.h   |  7 +--
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 18 ++
 4 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h 
b/src/mesa/drivers/dri/i965/brw_fs_builder.h
index 34646d7..ab4680f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
@@ -350,17 +350,19 @@ namespace brw {
   }
 
   /**
-   * Copy any live channel from \p src to the first channel of \p dst.
+   * Copy any live channel from \p src to the first channel of the result.
*/
-  void
-  emit_uniformize(const dst_reg dst, const src_reg src) const
+  src_reg
+  emit_uniformize(const src_reg src) const
   {
  const fs_builder ubld = exec_all();
- const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD);
+ const dst_reg chan_index = component(vgrf(BRW_REGISTER_TYPE_UD), 0);
+ const dst_reg dst = component(vgrf(src.type), 0);
+
+ ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
+ ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index);
 
- ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, component(chan_index, 0));
- ubld.emit(SHADER_OPCODE_BROADCAST, component(dst, 0),
-   src, component(chan_index, 0));
+ return dst;
   }
 
   /**
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 3099dc4..4e45118 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1386,7 +1386,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder bld, 
nir_intrinsic_instr *instr
  surf_index = vgrf(glsl_type::uint_type);
  bld.ADD(surf_index, get_nir_src(instr-src[0]),
  fs_reg(stage_prog_data-binding_table.ubo_start));
- bld.emit_uniformize(surf_index, surf_index);
+ surf_index = bld.emit_uniformize(surf_index);
 
  /* Assume this may touch any UBO. It would be nice to provide
   * a tighter bound, but the array information is already lowered away.
@@ -1681,7 +1681,7 @@ fs_visitor::nir_emit_texture(const fs_builder bld, 
nir_tex_instr *instr)
  /* Emit code to evaluate the actual indexing expression */
  sampler_reg = vgrf(glsl_type::uint_type);
  bld.ADD(sampler_reg, src, fs_reg(sampler));
- bld.emit_uniformize(sampler_reg, sampler_reg);
+ sampler_reg = bld.emit_uniformize(sampler_reg);
  break;
   }
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 3643651..7bf027a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -293,8 +293,11 @@ public:
void emit_lrp(const dst_reg dst,
  const src_reg x, const src_reg y, const src_reg a);
 
-   /** Copy any live channel from \p src to the first channel of \p dst. */
-   void emit_uniformize(const dst_reg dst, const src_reg src);
+   /**
+* Copy any live channel from \p src to the first channel of the
+* result.
+*/
+   src_reg emit_uniformize(const src_reg src);
 
void emit_block_move(dst_reg *dst, src_reg *src,
 const struct glsl_type *type, brw_predicate predicate);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index f351bf4..a6eee47 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1374,15 +1374,19 @@ vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
   emit(pull);
 }
 
-void
-vec4_visitor::emit_uniformize(const dst_reg dst, const src_reg src)
+src_reg
+vec4_visitor::emit_uniformize(const src_reg src)
 {
const src_reg chan_index(this, glsl_type::uint_type);
+   const dst_reg dst = retype(dst_reg(this, glsl_type::uint_type),
+  src.type);
 
emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, dst_reg(chan_index))
 

[Mesa-dev] [PATCH 2/3] i965/fs: Fix stride field for uniforms.

2015-07-16 Thread Francisco Jerez
This fixes essentially the same problem as for immediates.  Registers
of the UNIFORM file are typically accessed according to the formula:

 read_uniform(r, channel_index, array_index) =
read_element(r, channel_index * 0 + array_index * 1)

Which matches the general direct addressing formula for stride=0:

 read_direct(r, channel_index, array_index) =
read_element(r, channel_index * stride +
array_index * max{1, stride * width})

In either case if reladdr is present the access will be according to
the composition of two register regions, the first one determining the
per-channel array_index used for the second, like:

 read_indirect(r, channel_index, array_index) =
read_direct(r, channel_index,
read(r.reladdr, channel_index, array_index))

where:
 read(r, channel_index, array_index) = if r.reladdr == NULL
then read_direct(r, channel_index, array_index)
else read_indirect(r, channel_index, array_index)

In conclusion we can handle uniforms consistently with the other
register files if we set stride to zero.  After lowering to a GRF
using VARYING_PULL_CONSTANT_LOAD in demote_pull_constant_loads() the
stride of the source is set to one again because the result of
VARYING_PULL_CONSTANT_LOAD is generally non-uniform.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 537ccbe..2e835b1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -819,6 +819,7 @@ fs_reg::fs_reg(enum register_file file, int reg)
this-file = file;
this-reg = reg;
this-type = BRW_REGISTER_TYPE_F;
+   this-stride = (file == UNIFORM ? 0 : 1);
 }
 
 /** Fixed HW reg constructor. */
@@ -828,6 +829,7 @@ fs_reg::fs_reg(enum register_file file, int reg, enum 
brw_reg_type type)
this-file = file;
this-reg = reg;
this-type = type;
+   this-stride = (file == UNIFORM ? 0 : 1);
 }
 
 /* For SIMD16, we need to follow from the uniform setup of SIMD8 dispatch.
@@ -1268,6 +1270,7 @@ fs_visitor::assign_curb_setup()
  constant_nr / 8,
  constant_nr % 8);
 
+assert(inst-src[i].stride == 0);
inst-src[i].file = HW_REG;
inst-src[i].fixed_hw_reg = byte_offset(
retype(brw_reg, inst-src[i].type),
@@ -1818,6 +1821,8 @@ fs_visitor::demote_pull_constants()
  fs_reg 
surf_index(stage_prog_data-binding_table.pull_constants_start);
  fs_reg dst = vgrf(glsl_type::float_type);
 
+ assert(inst-src[i].stride == 0);
+
  /* Generate a pull load into dst. */
  if (inst-src[i].reladdr) {
 VARYING_PULL_CONSTANT_LOAD(ibld, dst,
@@ -1825,6 +1830,7 @@ fs_visitor::demote_pull_constants()
*inst-src[i].reladdr,
pull_index);
 inst-src[i].reladdr = NULL;
+inst-src[i].stride = 1;
  } else {
 fs_reg offset = fs_reg((unsigned)(pull_index * 4)  ~15);
 ibld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/4] i965/fs: Add stub lowering pass for logical send-message opcodes.

2015-07-16 Thread Francisco Jerez
This pass will house ad-hoc lowering code for several send
message-like virtual opcodes that will represent their logically
independent arguments as separate instruction sources rather than as a
single payload blob.  This pass will basically just take the separate
arguments that are supposed to be part of the payload and concatenate
them to construct a message in the form required by the hardware.
Virtual instructions in separate-source form will eventually allow
some simplification of the visitor code and make several
transformations easier like lowering SIMD16 instructions to SIMD8
algorithmically in cases where the hardware doesn't support the former
natively.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 29 -
 src/mesa/drivers/dri/i965/brw_fs.h   |  1 +
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 2e835b1..09ff587 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3179,6 +3179,30 @@ fs_visitor::lower_integer_multiplication()
return progress;
 }
 
+bool
+fs_visitor::lower_logical_sends()
+{
+   bool progress = false;
+
+   foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
+  const fs_builder ibld = bld.exec_all(inst-force_writemask_all)
+ .group(inst-exec_size, inst-force_sechalf)
+ .at(block, inst);
+
+  switch (inst-opcode) {
+  default:
+ continue;
+  }
+
+  progress = true;
+   }
+
+   if (progress)
+  invalidate_live_intervals();
+
+   return progress;
+}
+
 void
 fs_visitor::dump_instructions()
 {
@@ -3626,9 +3650,12 @@ fs_visitor::optimize()
   backend_shader::dump_instructions(filename);
}
 
-   bool progress;
+   bool progress = false;
int iteration = 0;
int pass_num = 0;
+
+   OPT(lower_logical_sends);
+
do {
   progress = false;
   pass_num = 0;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index c005666..f3850d1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -182,6 +182,7 @@ public:
void no16(const char *msg);
void lower_uniform_pull_constant_loads();
bool lower_load_payload();
+   bool lower_logical_sends();
bool lower_integer_multiplication();
bool opt_combine_constants();
 
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/4] i965/fs: Add builder emit method taking a variable number of source registers.

2015-07-16 Thread Francisco Jerez
And start using it in fs_builder::LOAD_PAYLOAD().  This will be used
to emit logical send message opcodes which have an unusually large
number of arguments.
---
 src/mesa/drivers/dri/i965/brw_fs_builder.h | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h 
b/src/mesa/drivers/dri/i965/brw_fs_builder.h
index ab4680f..1dba66f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_builder.h
+++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h
@@ -307,6 +307,17 @@ namespace brw {
   }
 
   /**
+   * Create and insert an instruction with a variable number of sources
+   * into the program.
+   */
+  instruction *
+  emit(enum opcode opcode, const dst_reg dst, const src_reg srcs[],
+   unsigned n) const
+  {
+ return emit(instruction(opcode, dispatch_width(), dst, srcs, n));
+  }
+
+  /**
* Insert a preallocated instruction into the program.
*/
   instruction *
@@ -518,9 +529,7 @@ namespace brw {
   LOAD_PAYLOAD(const dst_reg dst, const src_reg *src,
unsigned sources, unsigned header_size) const
   {
- instruction *inst = emit(instruction(SHADER_OPCODE_LOAD_PAYLOAD,
-  dispatch_width(), dst,
-  src, sources));
+ instruction *inst = emit(SHADER_OPCODE_LOAD_PAYLOAD, dst, src, 
sources);
  inst-header_size = header_size;
  inst-regs_written = header_size +
   (sources - header_size) * (dispatch_width() / 8);
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/4] i965/fs: Implement pass to lower instructions of unsupported SIMD width.

2015-07-16 Thread Francisco Jerez
This lowering pass implements an algorithm to expand SIMDN
instructions into a sequence of SIMDM instructions in cases where the
hardware doesn't support the original execution size natively for some
particular instruction.  The most important use-cases are:

 - Lowering send message instructions that don't support SIMD16
   natively into SIMD8 (several texturing, framebuffer write and typed
   surface operations).

 - Lowering messages that don't support SIMD8 natively into SIMD16
   (*cough*gen4*cough*).

 - 64-bit precision operations (e.g. FP64 and 64-bit integer
   multiplication).

 - SIMD32.

The algorithm works by splitting the sources of the original
instruction into chunks of width appropriate for the lowered
instructions, and then interleaving the results component-wise into
the destination of the original instruction.  The pass is controlled
by the get_lowered_simd_width() function that currently just returns
the original execution size making the whole pass a no-op for the
moment until some user is introduced.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 142 +++
 src/mesa/drivers/dri/i965/brw_fs.h   |   1 +
 2 files changed, 143 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index d031352..eeb6938 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3204,6 +3204,147 @@ fs_visitor::lower_logical_sends()
return progress;
 }
 
+/**
+ * Get the closest native SIMD width supported by the hardware for instruction
+ * \p inst.  The instruction will be left untouched by
+ * fs_visitor::lower_simd_width() if the returned value is equal to the
+ * original execution size.
+ */
+static unsigned
+get_lowered_simd_width(const struct brw_device_info *devinfo,
+   const fs_inst *inst)
+{
+   switch (inst-opcode) {
+   default:
+  return inst-exec_size;
+   }
+}
+
+/**
+ * The \p rows array of registers represents a \p num_rows by \p num_columns
+ * matrix in row-major order, write it in column-major order into the register
+ * passed as destination.  \p stride gives the separation between matrix
+ * elements in the input in fs_builder::dispatch_width() units.
+ */
+static void
+emit_transpose(const fs_builder bld,
+   const fs_reg dst, const fs_reg *rows,
+   unsigned num_rows, unsigned num_columns, unsigned stride)
+{
+   fs_reg *const components = new fs_reg[num_rows * num_columns];
+
+   for (unsigned i = 0; i  num_columns; ++i) {
+  for (unsigned j = 0; j  num_rows; ++j)
+ components[num_rows * i + j] = offset(rows[j], bld, stride * i);
+   }
+
+   bld.LOAD_PAYLOAD(dst, components, num_rows * num_columns, 0);
+
+   delete[] components;
+}
+
+bool
+fs_visitor::lower_simd_width()
+{
+   bool progress = false;
+
+   foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
+  const unsigned lower_width = get_lowered_simd_width(devinfo, inst);
+
+  if (lower_width != inst-exec_size) {
+ /* Builder matching the original instruction. */
+ const fs_builder ibld = bld.at(block, inst)
+.exec_all(inst-force_writemask_all)
+.group(inst-exec_size, 
inst-force_sechalf);
+
+ /* Split the copies in chunks of the execution width of either the
+  * original or the lowered instruction, whichever is lower.
+  */
+ const unsigned copy_width = MIN2(lower_width, inst-exec_size);
+ const unsigned n = inst-exec_size / copy_width;
+ const unsigned dst_size = inst-regs_written * REG_SIZE /
+inst-dst.component_size(inst-exec_size);
+ fs_reg dsts[4];
+
+ assert(n  0  n = ARRAY_SIZE(dsts) 
+!inst-writes_accumulator  !inst-mlen);
+
+ for (unsigned i = 0; i  n; i++) {
+/* Emit a copy of the original instruction with the lowered width.
+ * If the EOT flag was set throw it away except for the last
+ * instruction to avoid killing the thread prematurely.
+ */
+fs_inst tmp_inst = *inst;
+tmp_inst.exec_size = lower_width;
+tmp_inst.eot = inst-eot  i == n - 1;
+
+/* Set exec_all if the lowered width is higher than the original
+ * to avoid breaking the compiler invariant that no control
+ * flow-masked instruction is wider than the shader's
+ * dispatch_width.  Then emit the lowered instruction.
+ */
+const fs_builder lbld = ibld.exec_all(lower_width  
inst-exec_size)
+.group(lower_width, i);
+fs_inst *split_inst = lbld.emit(tmp_inst);
+
+for (unsigned j = 0; j  inst-sources; j++) {
+   if (inst-src[j].file != BAD_FILE 
+   !is_uniform(inst-src[j])) {
+  /* Get the i-th copy_width-wide chunk of 

[Mesa-dev] [PATCH 04/12] i965/fs: Fix slight layering violation in emit_single_fb_writes().

2015-07-16 Thread Francisco Jerez
In cases where the color0 argument wasn't being provided,
emit_single_fb_writes() would take the alpha channel directly from the
visitor state instead of taking it from its arguments.  This sort of
hack didn't fit nicely into the logical send-message approach because
all parameters of the instruction have to be visible to the SIMD
lowering pass for it to be able to split them into halves at all.

Fix it by using LOAD_PAYLOAD in fs_visitor::emit_fb_writes() to
provide an actual color0 vector with undefined contents except for the
alpha component to match the previous behavior when no color buffers
are enabled.
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 22 ++
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 94d6a58..6564d5f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1551,17 +1551,7 @@ fs_visitor::emit_single_fb_write(const fs_builder bld,
 
payload_header_size = length;
 
-   if (color0.file == BAD_FILE) {
-  /* Even if there's no color buffers enabled, we still need to send
-   * alpha out the pipeline to our null renderbuffer to support
-   * alpha-testing, alpha-to-coverage, and so on.
-   */
-  if (this-outputs[0].file != BAD_FILE)
- setup_color_payload(sources[length + 3],
- offset(this-outputs[0], bld, 3),
- 1, exec_size, false);
-  length += 4;
-   } else if (color1.file == BAD_FILE) {
+   if (color1.file == BAD_FILE) {
   if (src0_alpha.file != BAD_FILE) {
  setup_color_payload(sources[length], src0_alpha, 1, exec_size, 
false);
  length++;
@@ -1709,7 +1699,15 @@ fs_visitor::emit_fb_writes()
* alpha out the pipeline to our null renderbuffer to support
* alpha-testing, alpha-to-coverage, and so on.
*/
-  inst = emit_single_fb_write(bld, reg_undef, reg_undef, reg_undef, 0,
+  /* FINISHME: Factor out this frequently recurring pattern into a
+   * helper function.
+   */
+  const fs_reg srcs[] = { reg_undef, reg_undef,
+  reg_undef, offset(this-outputs[0], bld, 3) };
+  const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 4);
+  bld.LOAD_PAYLOAD(tmp, srcs, 4, 0);
+
+  inst = emit_single_fb_write(bld, tmp, reg_undef, reg_undef, 4,
   dispatch_width);
   inst-target = 0;
}
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/12] i965/fs: Move up prog_data-uses_omask assignment up to brw_codegen_wm_prog().

2015-07-16 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 --
 src/mesa/drivers/dri/i965/brw_wm.c   | 3 ++-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 08d9abf..c489010 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1535,8 +1535,6 @@ fs_visitor::emit_single_fb_write(const fs_builder bld,
   length++;
}
 
-   prog_data-uses_omask =
-  prog-OutputsWritten  BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
if (prog_data-uses_omask) {
   assert(this-sample_mask.file != BAD_FILE);
   /* Hand over gl_SampleMask. Only lower 16 bits are relevant.  Since
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c 
b/src/mesa/drivers/dri/i965/brw_wm.c
index 592a729..b590b17 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -181,7 +181,8 @@ brw_codegen_wm_prog(struct brw_context *brw,
 * so the shader definitely kills pixels.
 */
prog_data.uses_kill = fp-program.UsesKill || key-alpha_test_func;
-
+   prog_data.uses_omask =
+  fp-program.Base.OutputsWritten  
BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK);
prog_data.computed_depth_mode = computed_depth_mode(fp-program);
 
/* Use ALT floating point mode for ARB programs so that 0^0 == 1. */
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/12] i965/fs: Honour the instruction force_sechalf and exec_size fields for FB writes.

2015-07-16 Thread Francisco Jerez
We were previously guessing the half based on the EOT flag which seems
rather gross.
---
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 8a3af47..e94f34e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -221,11 +221,11 @@ fs_generator::fire_fb_write(fs_inst *inst,
if (inst-opcode == FS_OPCODE_REP_FB_WRITE)
   msg_control = 
BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED;
else if (prog_data-dual_src_blend) {
-  if (dispatch_width == 8 || !inst-eot)
+  if (!inst-force_sechalf)
  msg_control = 
BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
   else
  msg_control = 
BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23;
-   } else if (dispatch_width == 16)
+   } else if (inst-exec_size == 16)
   msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
else
   msg_control = 
BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/12] i965/fs: Define logical framebuffer write opcode.

2015-07-16 Thread Francisco Jerez
The logical variant is largely equivalent to the original opcode but
instead of taking a single payload source it expects the arguments
that make up the payload separately as individual sources, like:

 fb_write_logical null, color0, color1, src0_alpha,
src_depth, dst_depth, sample_mask, num_components

This patch defines the opcode and usual instruction boilerplate,
including a placeholder lowering function provided mostly as
self-documentation of its source registers.
---
 src/mesa/drivers/dri/i965/brw_defines.h  |  1 +
 src/mesa/drivers/dri/i965/brw_fs.cpp | 34 
 src/mesa/drivers/dri/i965/brw_fs.h   |  2 +-
 src/mesa/drivers/dri/i965/brw_shader.cpp |  2 ++
 4 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 5bf53e3..65685a9 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -875,6 +875,7 @@ enum opcode {
 * instructions.
 */
FS_OPCODE_FB_WRITE = 128,
+   FS_OPCODE_FB_WRITE_LOGICAL,
FS_OPCODE_BLORP_FB_WRITE,
FS_OPCODE_REP_FB_WRITE,
SHADER_OPCODE_RCP,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index eeb6938..ae050b7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -701,6 +701,13 @@ fs_inst::regs_read(int arg) const
  return 1;
   break;
 
+   case FS_OPCODE_FB_WRITE_LOGICAL:
+  assert(src[6].file == IMM);
+  /* First/second FB write color. */
+  if (arg  2)
+ components = src[6].fixed_hw_reg.dw1.ud;
+  break;
+
default:
   if (is_tex()  arg == 0  src[0].file == GRF)
  return mlen;
@@ -3180,6 +3187,25 @@ fs_visitor::lower_integer_multiplication()
return progress;
 }
 
+static void
+lower_fb_write_logical_send(const fs_builder bld, fs_inst *inst,
+const brw_wm_prog_data *prog_data,
+const brw_wm_prog_key *key,
+const fs_visitor::thread_payload payload)
+{
+   assert(inst-src[6].file == IMM);
+   const brw_device_info *devinfo = bld.shader-devinfo;
+   const fs_reg color0 = inst-src[0];
+   const fs_reg color1 = inst-src[1];
+   const fs_reg src0_alpha = inst-src[2];
+   const fs_reg src_depth = inst-src[3];
+   const fs_reg dst_depth = inst-src[4];
+   fs_reg sample_mask = inst-src[5];
+   const unsigned components = inst-src[6].fixed_hw_reg.dw1.ud;
+
+   assert(!Not implemented);
+}
+
 bool
 fs_visitor::lower_logical_sends()
 {
@@ -3191,6 +3217,14 @@ fs_visitor::lower_logical_sends()
  .at(block, inst);
 
   switch (inst-opcode) {
+  case FS_OPCODE_FB_WRITE_LOGICAL:
+ assert(stage == MESA_SHADER_FRAGMENT);
+ lower_fb_write_logical_send(ibld, inst,
+ (const brw_wm_prog_data *)prog_data,
+ (const brw_wm_prog_key *)key,
+ payload);
+ break;
+
   default:
  continue;
   }
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 9582648..3533d38 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -387,7 +387,7 @@ public:
fs_reg result;
 
/** Register numbers for thread payload fields. */
-   struct {
+   struct thread_payload {
   uint8_t source_depth_reg;
   uint8_t source_w_reg;
   uint8_t aa_dest_stencil_reg;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index d66baf3..44681e4 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -533,6 +533,8 @@ brw_instruction_name(enum opcode op)
   return opcode_descs[op].name;
case FS_OPCODE_FB_WRITE:
   return fb_write;
+   case FS_OPCODE_FB_WRITE_LOGICAL:
+  return fb_write_logical;
case FS_OPCODE_BLORP_FB_WRITE:
   return blorp_fb_write;
case FS_OPCODE_REP_FB_WRITE:
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/4] st/dri: expose sRGB visuals (v2)

2015-07-16 Thread Emil Velikov
On 16/07/15 15:31, Marek Olšák wrote:
 I'm not familiar with DRI3, but it looks like sRGB visuals should work
 with DRI3 too.
 
 Now that I think about it, I can drop the change in
 dri2_drawable_get_buffers, because it's not needed with the change in
 dri_drawable_get_format.
 
In theory one might want to update the image_format passed to the loader
(image.loader-getBuffers()) although it seems like we don't need to.

So yes, both __DRI_SWRAST and __DRI_IMAGE should just work. Wrt swrast,
perhaps we can follow up on patch 2, for the drm, wayland and x11
platforms (android doesn't have swrast support yet). If you want I can
go that.

-Emil

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/12] i965/fs: Make sure that the type sizes are compatible during copy propagation.

2015-07-16 Thread Francisco Jerez
It's surprising that we weren't checking for this already.  A future
patch will cause code like the following to be emitted:

 MOV(16) tmp1:uw, src
 MOV(8) dst1:ud, tmp8,8,1:ud

The second MOV comes from the expansion of a LOAD_PAYLOAD header copy,
so I don't have control over its types.  Copy propagation will happily
turn this into:

 MOV(8) dst1:ud, src

Which has different semantics.  Fix it by preventing propagation in
cases where a single channel of the instruction would span several
channels of the copy (this requirement could in fact be relaxed if the
copy is just a trivial memcpy, but this case is unusual enough that I
don't think it matters in practice).

I'm deliberately only checking if the type of the instruction is
larger than the original, because the converse case seems to be
handled correctly already in the code below.
---
 src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 54e9114..269bdb5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -339,6 +339,14 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, 
acp_entry *entry)
if (entry-src.stride * inst-src[arg].stride  4)
   return false;
 
+   /* Bail if the instruction type is larger than the execution type of the
+* copy, what implies that each channel is reading multiple channels of the
+* destination of the copy, and simply replacing the sources would give a
+* program with different semantics.
+*/
+   if (type_sz(entry-dst.type)  type_sz(inst-src[arg].type))
+  return false;
+
/* Bail if the result of composing both strides cannot be expressed
 * as another stride. This avoids, for example, trying to transform
 * this:
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/12] i965/fs: Remove the FS_OPCODE_SET_OMASK pseudo-opcode.

2015-07-16 Thread Francisco Jerez
This is now unused.
---
 src/mesa/drivers/dri/i965/brw_defines.h|  1 -
 src/mesa/drivers/dri/i965/brw_fs.h |  4 ---
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 35 --
 src/mesa/drivers/dri/i965/brw_shader.cpp   |  2 --
 4 files changed, 42 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 65685a9..9099676 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -970,7 +970,6 @@ enum opcode {
FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7,
FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
FS_OPCODE_DISCARD_JUMP,
-   FS_OPCODE_SET_OMASK,
FS_OPCODE_SET_SAMPLE_ID,
FS_OPCODE_SET_SIMD4X2_OFFSET,
FS_OPCODE_PACK_HALF_2x16_SPLIT,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 3533d38..1ae79a9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -496,10 +496,6 @@ private:
   struct brw_reg msg_data,
   unsigned msg_type);
 
-   void generate_set_omask(fs_inst *inst,
-   struct brw_reg dst,
-   struct brw_reg sample_mask);
-
void generate_set_sample_id(fs_inst *inst,
struct brw_reg dst,
struct brw_reg src0,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index e94f34e..2d5d352 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1364,37 +1364,6 @@ fs_generator::generate_set_simd4x2_offset(fs_inst *inst,
brw_pop_insn_state(p);
 }
 
-/* Sets vstride=16, width=8, hstride=2 or vstride=0, width=1, hstride=0
- * (when mask is passed as a uniform) of register mask before moving it
- * to register dst.
- */
-void
-fs_generator::generate_set_omask(fs_inst *inst,
- struct brw_reg dst,
- struct brw_reg mask)
-{
-   bool stride_8_8_1 =
-(mask.vstride == BRW_VERTICAL_STRIDE_8 
- mask.width == BRW_WIDTH_8 
- mask.hstride == BRW_HORIZONTAL_STRIDE_1);
-
-   bool stride_0_1_0 = has_scalar_region(mask);
-
-   assert(stride_8_8_1 || stride_0_1_0);
-   assert(dst.type == BRW_REGISTER_TYPE_UW);
-
-   brw_push_insn_state(p);
-   brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-
-   if (stride_8_8_1) {
-  brw_MOV(p, dst, retype(stride(mask, 16, 8, 2), dst.type));
-   } else if (stride_0_1_0) {
-  brw_MOV(p, dst, retype(mask, dst.type));
-   }
-   brw_pop_insn_state(p);
-}
-
 /* Sets vstride=1, width=4, hstride=0 of register src1 during
  * the ADD instruction.
  */
@@ -2074,10 +2043,6 @@ fs_generator::generate_code(const cfg_t *cfg, int 
dispatch_width)
  brw_broadcast(p, dst, src[0], src[1]);
  break;
 
-  case FS_OPCODE_SET_OMASK:
- generate_set_omask(inst, dst, src[0]);
- break;
-
   case FS_OPCODE_SET_SAMPLE_ID:
  generate_set_sample_id(inst, dst, src[0], src[1]);
  break;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 44681e4..36a383b 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -660,8 +660,6 @@ brw_instruction_name(enum opcode op)
case FS_OPCODE_DISCARD_JUMP:
   return discard_jump;
 
-   case FS_OPCODE_SET_OMASK:
-  return set_omask;
case FS_OPCODE_SET_SAMPLE_ID:
   return set_sample_id;
case FS_OPCODE_SET_SIMD4X2_OFFSET:
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/12] i965/fs: Hook up SIMD lowering to unroll FB writes of unsupported width.

2015-07-16 Thread Francisco Jerez
This shouldn't have any effect because we don't emit logical
framebuffer writes yet.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 9 +
 1 file changed, 9 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index ae050b7..70fdc5e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3249,6 +3249,15 @@ get_lowered_simd_width(const struct brw_device_info 
*devinfo,
const fs_inst *inst)
 {
switch (inst-opcode) {
+   case FS_OPCODE_FB_WRITE_LOGICAL:
+  /* Gen6 doesn't support SIMD16 depth writes but we cannot handle them
+   * here.
+   */
+  assert(devinfo-gen != 6 || inst-src[3].file == BAD_FILE ||
+ inst-exec_size = 8);
+  /* Dual-source FB writes are unsupported in SIMD16 mode. */
+  return (inst-src[1].file != BAD_FILE ? 8 : inst-exec_size);
+
default:
   return inst-exec_size;
}
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/12] i965/fs: Don't attempt to copy the useless half of oMask for SIMD8 FB writes.

2015-07-16 Thread Francisco Jerez
There's no need to initialize the wrong half of oMask in the payload
when we're doing an 8-wide framebuffer write because it will be
ignored by the hardware anyway.  By doing it this way we can let the
SIMD lowering pass split the sample_mask source as a regular
per-channel source, otherwise we would have to introduce some sort of
per-instruction source query or use fs_inst::header_size for the
lowering pass to be able to find out whether some source is
header-like, and leave the source untouched in that case.

As a bonus this achieves the same purpose as the previous code without
making use of the SET_OMASK pseudo-instruction, which will be removed
in a future commit.
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 26 ++
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index b5a42b1..ba4b177 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1499,6 +1499,7 @@ fs_visitor::emit_single_fb_write(const fs_builder bld,
assert(stage == MESA_SHADER_FRAGMENT);
brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this-prog_data;
brw_wm_prog_key *key = (brw_wm_prog_key*) this-key;
+   const fs_builder ubld = bld.group(exec_size, use_2nd_half);
int header_size = 2, payload_header_size;
 
/* We can potentially have a message length of up to 15, so we have to set
@@ -1536,14 +1537,24 @@ fs_visitor::emit_single_fb_write(const fs_builder bld,
}
 
if (prog_data-uses_omask) {
-  assert(this-sample_mask.file != BAD_FILE);
-  /* Hand over gl_SampleMask. Only lower 16 bits are relevant.  Since
-   * it's unsinged single words, one vgrf is always 16-wide.
-   */
   sources[length] = fs_reg(GRF, alloc.allocate(1),
-   BRW_REGISTER_TYPE_UW);
-  bld.exec_all().annotate(FB write oMask)
- .emit(FS_OPCODE_SET_OMASK, sources[length], this-sample_mask);
+   BRW_REGISTER_TYPE_UD);
+
+  /* Hand over gl_SampleMask.  Only the lower 16 bits of each channel are
+   * relevant.  Since it's unsigned single words one vgrf is always
+   * 16-wide, but only the lower or higher 8 channels will be used by the
+   * hardware when doing a SIMD8 write depending on whether we have
+   * selected the subspans for the first or second half respectively.
+   */
+  fs_reg sample_mask = this-sample_mask;
+  assert(sample_mask.file != BAD_FILE  type_sz(sample_mask.type) == 4);
+  sample_mask.type = BRW_REGISTER_TYPE_UW;
+  sample_mask.stride *= 2;
+
+  ubld.annotate(FB write oMask)
+  .MOV(half(retype(sources[length], BRW_REGISTER_TYPE_UW),
+use_2nd_half),
+   half(sample_mask, use_2nd_half));
   length++;
}
 
@@ -1590,7 +1601,6 @@ fs_visitor::emit_single_fb_write(const fs_builder bld,
if (payload.dest_depth_reg)
   sources[length++] = fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0));
 
-   const fs_builder ubld = bld.group(exec_size, use_2nd_half);
fs_inst *load;
fs_inst *write;
if (devinfo-gen = 7) {
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/12] i965/fs: Implement lowering of logical framebuffer writes.

2015-07-16 Thread Francisco Jerez
This does essentially the same thing as
fs_visitor::emit_single_fb_write(), with some slight differences:

 - We don't have to worry about exec_size and use_2nd_half anymore,
   16-wide sources have already been lowered to 8-wide thanks to the
   previous commit and the manual argument unzipping is no longer
   required.

 - The src/dst_depth and sample_mask values are now explicit sources
   of the instruction instead of being taken from the visitor state
   directly.  The same goes for the kill-pixel mask that will be
   passed to the instruction explicitly as predicate.

 - Everything is now done in static functions to improve
   encapsulation.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 137 ++-
 1 file changed, 136 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 70fdc5e..787849d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3188,6 +3188,25 @@ fs_visitor::lower_integer_multiplication()
 }
 
 static void
+setup_color_payload(const fs_builder bld, const brw_wm_prog_key *key,
+fs_reg *dst, fs_reg color, unsigned components)
+{
+   if (key-clamp_fragment_color) {
+  fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, 4);
+  assert(color.type == BRW_REGISTER_TYPE_F);
+
+  for (unsigned i = 0; i  components; i++)
+ set_saturate(true,
+  bld.MOV(offset(tmp, bld, i), offset(color, bld, i)));
+
+  color = tmp;
+   }
+
+   for (unsigned i = 0; i  components; i++)
+  dst[i] = offset(color, bld, i);
+}
+
+static void
 lower_fb_write_logical_send(const fs_builder bld, fs_inst *inst,
 const brw_wm_prog_data *prog_data,
 const brw_wm_prog_key *key,
@@ -3203,7 +3222,123 @@ lower_fb_write_logical_send(const fs_builder bld, 
fs_inst *inst,
fs_reg sample_mask = inst-src[5];
const unsigned components = inst-src[6].fixed_hw_reg.dw1.ud;
 
-   assert(!Not implemented);
+   /* We can potentially have a message length of up to 15, so we have to set
+* base_mrf to either 0 or 1 in order to fit in m0..m15.
+*/
+   fs_reg sources[15];
+   int header_size = 2, payload_header_size;
+   unsigned length = 0;
+
+   /* From the Sandy Bridge PRM, volume 4, page 198:
+*
+* Dispatched Pixel Enables. One bit per pixel indicating
+*  which pixels were originally enabled when the thread was
+*  dispatched. This field is only required for the end-of-
+*  thread message and on all dual-source messages.
+*/
+   if (devinfo-gen = 6 
+   (devinfo-is_haswell || devinfo-gen = 8 || !prog_data-uses_kill) 
+   color1.file == BAD_FILE 
+   key-nr_color_regions == 1) {
+  header_size = 0;
+   }
+
+   if (header_size != 0) {
+  assert(header_size == 2);
+  /* Allocate 2 registers for a header */
+  length += 2;
+   }
+
+   if (payload.aa_dest_stencil_reg) {
+  sources[length] = fs_reg(GRF, bld.shader-alloc.allocate(1));
+  bld.group(8, 0).exec_all().annotate(FB write stencil/AA alpha)
+ .MOV(sources[length],
+  fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0)));
+  length++;
+   }
+
+   if (prog_data-uses_omask) {
+  sources[length] = fs_reg(GRF, bld.shader-alloc.allocate(1),
+   BRW_REGISTER_TYPE_UD);
+
+  /* Hand over gl_SampleMask.  Only the lower 16 bits of each channel are
+   * relevant.  Since it's unsigned single words one vgrf is always
+   * 16-wide, but only the lower or higher 8 channels will be used by the
+   * hardware when doing a SIMD8 write depending on whether we have
+   * selected the subspans for the first or second half respectively.
+   */
+  assert(sample_mask.file != BAD_FILE  type_sz(sample_mask.type) == 4);
+  sample_mask.type = BRW_REGISTER_TYPE_UW;
+  sample_mask.stride *= 2;
+
+  bld.exec_all().annotate(FB write oMask)
+ .MOV(half(retype(sources[length], BRW_REGISTER_TYPE_UW),
+   inst-force_sechalf),
+  sample_mask);
+  length++;
+   }
+
+   payload_header_size = length;
+
+   if (src0_alpha.file != BAD_FILE) {
+  /* FIXME: This is being passed at the wrong location in the payload and
+   * doesn't work when gl_SampleMask and MRTs are used simultaneously.
+   * It's supposed to be immediately before oMask but there seems to be no
+   * reasonable way to pass them in the correct order because LOAD_PAYLOAD
+   * requires header sources to form a contiguous segment at the beginning
+   * of the message and src0_alpha has per-channel semantics.
+   */
+  setup_color_payload(bld, key, sources[length], src0_alpha, 1);
+  length++;
+   }
+
+   setup_color_payload(bld, key, sources[length], color0, components);
+   length += 4;
+
+   if (color1.file != BAD_FILE) {
+  setup_color_payload(bld, 

[Mesa-dev] [PATCH 05/12] i965/fs: Simplify control flow in emit_single_fb_write().

2015-07-16 Thread Francisco Jerez
Flatten the if ladder to match the way that the ordering of these
fields is specified in the hardware documentation a bit more closely.
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 28 
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 6564d5f..08d9abf 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1551,19 +1551,23 @@ fs_visitor::emit_single_fb_write(const fs_builder bld,
 
payload_header_size = length;
 
-   if (color1.file == BAD_FILE) {
-  if (src0_alpha.file != BAD_FILE) {
- setup_color_payload(sources[length], src0_alpha, 1, exec_size, 
false);
- length++;
-  }
+   if (src0_alpha.file != BAD_FILE) {
+  /* FIXME: This is being passed at the wrong location in the payload and
+   * doesn't work when gl_SampleMask and MRTs are used simultaneously.
+   * It's supposed to be immediately before oMask but there seems to be no
+   * reasonable way to pass them in the correct order because LOAD_PAYLOAD
+   * requires header sources to form a contiguous segment at the beginning
+   * of the message and src0_alpha has per-channel semantics.
+   */
+  setup_color_payload(sources[length], src0_alpha, 1, exec_size, false);
+  length++;
+   }
 
-  setup_color_payload(sources[length], color0, components,
-  exec_size, use_2nd_half);
-  length += 4;
-   } else {
-  setup_color_payload(sources[length], color0, components,
-  exec_size, use_2nd_half);
-  length += 4;
+   setup_color_payload(sources[length], color0, components,
+   exec_size, use_2nd_half);
+   length += 4;
+
+   if (color1.file != BAD_FILE) {
   setup_color_payload(sources[length], color1, components,
   exec_size, use_2nd_half);
   length += 4;
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/12] i965/fs: Reimplement emit_single_fb_write() in terms of logical framebuffer writes.

2015-07-16 Thread Francisco Jerez
The only non-trivial thing it still has to do is figure out where to
take the src/dst depth values from and predicate the instruction if
discard is in use.  The manual SIMD unrolling logic in the dual-source
case goes away because this is now handled transparently by the SIMD
lowering pass.
---
 src/mesa/drivers/dri/i965/brw_fs.h   |   5 +-
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 203 +++
 2 files changed, 20 insertions(+), 188 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 1ae79a9..64f89d4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -290,13 +290,10 @@ public:
bool optimize_frontfacing_ternary(nir_alu_instr *instr,
  const fs_reg result);
 
-   void setup_color_payload(fs_reg *dst, fs_reg color, unsigned components,
-unsigned exec_size, bool use_2nd_half);
void emit_alpha_test();
fs_inst *emit_single_fb_write(const brw::fs_builder bld,
  fs_reg color1, fs_reg color2,
- fs_reg src0_alpha, unsigned components,
- unsigned exec_size, bool use_2nd_half = 
false);
+ fs_reg src0_alpha, unsigned components);
void emit_fb_writes();
void emit_urb_writes();
void emit_cs_terminate();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index ba4b177..bcfeaa0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1409,33 +1409,6 @@ fs_visitor::emit_interpolation_setup_gen6()
}
 }
 
-void
-fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components,
-unsigned exec_size, bool use_2nd_half)
-{
-   brw_wm_prog_key *key = (brw_wm_prog_key*) this-key;
-   fs_inst *inst;
-
-   if (key-clamp_fragment_color) {
-  fs_reg tmp = vgrf(glsl_type::vec4_type);
-  assert(color.type == BRW_REGISTER_TYPE_F);
-  for (unsigned i = 0; i  components; i++) {
- inst = bld.MOV(offset(tmp, bld, i), offset(color, bld, i));
- inst-saturate = true;
-  }
-  color = tmp;
-   }
-
-   if (exec_size  dispatch_width) {
-  unsigned half_idx = use_2nd_half ? 1 : 0;
-  for (unsigned i = 0; i  components; i++)
- dst[i] = half(offset(color, bld, i), half_idx);
-   } else {
-  for (unsigned i = 0; i  components; i++)
- dst[i] = offset(color, bld, i);
-   }
-}
-
 static enum brw_conditional_mod
 cond_for_alpha_func(GLenum func)
 {
@@ -1493,146 +1466,34 @@ fs_visitor::emit_alpha_test()
 fs_inst *
 fs_visitor::emit_single_fb_write(const fs_builder bld,
  fs_reg color0, fs_reg color1,
- fs_reg src0_alpha, unsigned components,
- unsigned exec_size, bool use_2nd_half)
+ fs_reg src0_alpha, unsigned components)
 {
assert(stage == MESA_SHADER_FRAGMENT);
brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this-prog_data;
-   brw_wm_prog_key *key = (brw_wm_prog_key*) this-key;
-   const fs_builder ubld = bld.group(exec_size, use_2nd_half);
-   int header_size = 2, payload_header_size;
-
-   /* We can potentially have a message length of up to 15, so we have to set
-* base_mrf to either 0 or 1 in order to fit in m0..m15.
-*/
-   fs_reg *sources = ralloc_array(mem_ctx, fs_reg, 15);
-   int length = 0;
-
-   /* From the Sandy Bridge PRM, volume 4, page 198:
-*
-* Dispatched Pixel Enables. One bit per pixel indicating
-*  which pixels were originally enabled when the thread was
-*  dispatched. This field is only required for the end-of-
-*  thread message and on all dual-source messages.
-*/
-   if (devinfo-gen = 6 
-   (devinfo-is_haswell || devinfo-gen = 8 || !prog_data-uses_kill) 
-   color1.file == BAD_FILE 
-   key-nr_color_regions == 1) {
-  header_size = 0;
-   }
-
-   if (header_size != 0) {
-  assert(header_size == 2);
-  /* Allocate 2 registers for a header */
-  length += 2;
-   }
-
-   if (payload.aa_dest_stencil_reg) {
-  sources[length] = fs_reg(GRF, alloc.allocate(1));
-  bld.group(8, 0).exec_all().annotate(FB write stencil/AA alpha)
- .MOV(sources[length],
-  fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg, 0)));
-  length++;
-   }
-
-   if (prog_data-uses_omask) {
-  sources[length] = fs_reg(GRF, alloc.allocate(1),
-   BRW_REGISTER_TYPE_UD);
-
-  /* Hand over gl_SampleMask.  Only the lower 16 bits of each channel are
-   * relevant.  Since it's unsigned single words one vgrf is always
-   * 16-wide, but only the lower or higher 8 channels will be used by the
-   * hardware when doing a SIMD8 

[Mesa-dev] [PATCH 07/12] i965/fs: Move up Gen6 no16 check to emit_fb_writes().

2015-07-16 Thread Francisco Jerez
And update the comment.
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 20 +++-
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index c489010..b5a42b1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1572,15 +1572,6 @@ fs_visitor::emit_single_fb_write(const fs_builder bld,
}
 
if (source_depth_to_render_target) {
-  if (devinfo-gen == 6) {
-/* For outputting oDepth on gen6, SIMD8 writes have to be
- * used.  This would require SIMD8 moves of each half to
- * message regs, kind of like pre-gen5 SIMD16 FB writes.
- * Just bail on doing so for now.
- */
-no16(Missing support for simd16 depth writes on gen6\n);
-  }
-
   if (prog-OutputsWritten  BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
 /* Hand over gl_FragDepth. */
 assert(this-frag_depth.file != BAD_FILE);
@@ -1643,6 +1634,17 @@ fs_visitor::emit_fb_writes()
brw_wm_prog_key *key = (brw_wm_prog_key*) this-key;
 
fs_inst *inst = NULL;
+
+   if (source_depth_to_render_target  devinfo-gen == 6) {
+  /* For outputting oDepth on gen6, SIMD8 writes have to be used.  This
+   * would require SIMD8 moves of each half to message regs, e.g. by using
+   * the SIMD lowering pass.  Unfortunately this is more difficult than it
+   * sounds because the SIMD8 single-source message lacks channel selects
+   * for the second and third subspans.
+   */
+  no16(Missing support for simd16 depth writes on gen6\n);
+   }
+
if (do_dual_src) {
   const fs_builder abld = bld.annotate(FB dual-source write);
 
-- 
2.4.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/12] cso: remove clip state handling

2015-07-16 Thread Ilia Mirkin
Sssort of. But drivers that were expecting this to happen will now
need to do the compare themselves. For example nv50 (and I assume
nvc0) just always copy the new values in and set the dirty flag.
Really you're just pushing this down into the driver, which is
reasonable, but in the process you're regressing a bunch of drivers
(in terms of state updates).

On Thu, Jul 16, 2015 at 12:54 PM, Marek Olšák mar...@gmail.com wrote:
 From: Marek Olšák marek.ol...@amd.com

 There is no need for this.
 ---
  src/gallium/auxiliary/cso_cache/cso_context.c | 44 
 ---
  src/gallium/auxiliary/cso_cache/cso_context.h | 13 
  src/mesa/state_tracker/st_atom_clip.c |  2 +-
  3 files changed, 1 insertion(+), 58 deletions(-)

 diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c 
 b/src/gallium/auxiliary/cso_cache/cso_context.c
 index 1b4e2e6..3e7fe22 100644
 --- a/src/gallium/auxiliary/cso_cache/cso_context.c
 +++ b/src/gallium/auxiliary/cso_cache/cso_context.c
 @@ -111,9 +111,6 @@ struct cso_context {
 uint render_condition_mode, render_condition_mode_saved;
 boolean render_condition_cond, render_condition_cond_saved;

 -   struct pipe_clip_state clip;
 -   struct pipe_clip_state clip_saved;
 -
 struct pipe_framebuffer_state fb, fb_saved;
 struct pipe_viewport_state vp, vp_saved;
 struct pipe_blend_color blend_color;
 @@ -910,47 +907,6 @@ void cso_restore_tesseval_shader(struct cso_context *ctx)
 ctx-tesseval_shader_saved = NULL;
  }

 -/* clip state */
 -
 -static INLINE void
 -clip_state_cpy(struct pipe_clip_state *dst,
 -   const struct pipe_clip_state *src)
 -{
 -   memcpy(dst-ucp, src-ucp, sizeof(dst-ucp));
 -}
 -
 -static INLINE int
 -clip_state_cmp(const struct pipe_clip_state *a,
 -   const struct pipe_clip_state *b)
 -{
 -   return memcmp(a-ucp, b-ucp, sizeof(a-ucp));
 -}
 -
 -void
 -cso_set_clip(struct cso_context *ctx,
 - const struct pipe_clip_state *clip)
 -{
 -   if (clip_state_cmp(ctx-clip, clip)) {
 -  clip_state_cpy(ctx-clip, clip);
 -  ctx-pipe-set_clip_state(ctx-pipe, clip);
 -   }
 -}
 -
 -void
 -cso_save_clip(struct cso_context *ctx)
 -{
 -   clip_state_cpy(ctx-clip_saved, ctx-clip);
 -}
 -
 -void
 -cso_restore_clip(struct cso_context *ctx)
 -{
 -   if (clip_state_cmp(ctx-clip, ctx-clip_saved)) {
 -  clip_state_cpy(ctx-clip, ctx-clip_saved);
 -  ctx-pipe-set_clip_state(ctx-pipe, ctx-clip_saved);
 -   }
 -}
 -
  enum pipe_error
  cso_set_vertex_elements(struct cso_context *ctx,
  unsigned count,
 diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h 
 b/src/gallium/auxiliary/cso_cache/cso_context.h
 index c9a4226..3bee429 100644
 --- a/src/gallium/auxiliary/cso_cache/cso_context.h
 +++ b/src/gallium/auxiliary/cso_cache/cso_context.h
 @@ -188,19 +188,6 @@ void cso_save_render_condition(struct cso_context *cso);
  void cso_restore_render_condition(struct cso_context *cso);


 -/* clip state */
 -
 -void
 -cso_set_clip(struct cso_context *cso,
 - const struct pipe_clip_state *clip);
 -
 -void
 -cso_save_clip(struct cso_context *cso);
 -
 -void
 -cso_restore_clip(struct cso_context *cso);
 -
 -
  /* sampler view state */

  void
 diff --git a/src/mesa/state_tracker/st_atom_clip.c 
 b/src/mesa/state_tracker/st_atom_clip.c
 index f82c133..0d2a7e1 100644
 --- a/src/mesa/state_tracker/st_atom_clip.c
 +++ b/src/mesa/state_tracker/st_atom_clip.c
 @@ -60,7 +60,7 @@ static void update_clip( struct st_context *st )
use_eye ? ctx-Transform.EyeUserPlane
: ctx-Transform._ClipUserPlane, sizeof(clip.ucp));
 st-state.clip = clip;
 -   cso_set_clip(st-cso_context, clip);
 +   st-pipe-set_clip_state(st-pipe, clip);
  }


 --
 2.1.0

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/4] Last EGL 1.5 bits (v2)

2015-07-16 Thread Emil Velikov
Hi Marek,

On 12 July 2015 at 20:10, Marek Olšák mar...@gmail.com wrote:
 Hi,

 These are the last EGL 1.5 bits. I have fixed the Ubuntu Unity darkness issue 
 in patch 1 and dropped the ugly workaround from libGL.

 Please help review the patches.

Aside from a small issue in patch #2 the series looks good. Would be
better if others take a look though.

Fwiw the series is
Reviewed-by: Emil Velikov emil.l.veli...@gmail.com

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/12] st/mesa: use cso_set_samplers

2015-07-16 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

---
 src/mesa/state_tracker/st_atom_sampler.c  | 9 -
 src/mesa/state_tracker/st_cb_drawpixels.c | 9 -
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_sampler.c 
b/src/mesa/state_tracker/st_atom_sampler.c
index b68eb16..4ab82bc 100644
--- a/src/mesa/state_tracker/st_atom_sampler.c
+++ b/src/mesa/state_tracker/st_atom_sampler.c
@@ -245,6 +245,7 @@ update_shader_samplers(struct st_context *st,
GLuint unit;
GLbitfield samplers_used;
const GLuint old_max = *num_samplers;
+   const struct pipe_sampler_state *states[PIPE_MAX_SAMPLERS];
 
samplers_used = prog-SamplersUsed;
 
@@ -261,13 +262,11 @@ update_shader_samplers(struct st_context *st,
  const GLuint texUnit = prog-SamplerUnits[unit];
 
  convert_sampler(st, sampler, texUnit);
-
+ states[unit] = sampler;
  *num_samplers = unit + 1;
-
- cso_single_sampler(st-cso_context, shader_stage, unit, sampler);
   }
   else if (samplers_used != 0 || unit  old_max) {
- cso_single_sampler(st-cso_context, shader_stage, unit, NULL);
+ states[unit] = NULL;
   }
   else {
  /* if we've reset all the old samplers and we have no more new ones */
@@ -275,7 +274,7 @@ update_shader_samplers(struct st_context *st,
   }
}
 
-   cso_single_sampler_done(st-cso_context, shader_stage);
+   cso_set_samplers(st-cso_context, shader_stage, *num_samplers, states);
 }
 
 
diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c 
b/src/mesa/state_tracker/st_cb_drawpixels.c
index f67f00d..b372697 100644
--- a/src/mesa/state_tracker/st_cb_drawpixels.c
+++ b/src/mesa/state_tracker/st_cb_drawpixels.c
@@ -757,6 +757,8 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint 
y, GLfloat z,
/* texture sampling state: */
{
   struct pipe_sampler_state sampler;
+  const struct pipe_sampler_state *states[2] = {sampler, sampler};
+
   memset(sampler, 0, sizeof(sampler));
   sampler.wrap_s = PIPE_TEX_WRAP_CLAMP;
   sampler.wrap_t = PIPE_TEX_WRAP_CLAMP;
@@ -766,11 +768,8 @@ draw_textured_quad(struct gl_context *ctx, GLint x, GLint 
y, GLfloat z,
   sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST;
   sampler.normalized_coords = normalized;
 
-  cso_single_sampler(cso, PIPE_SHADER_FRAGMENT, 0, sampler);
-  if (num_sampler_view  1) {
- cso_single_sampler(cso, PIPE_SHADER_FRAGMENT, 1, sampler);
-  }
-  cso_single_sampler_done(cso, PIPE_SHADER_FRAGMENT);
+  cso_set_samplers(cso, PIPE_SHADER_FRAGMENT,
+   num_sampler_view  1 ? 2 : 1, states);
}
 
/* viewport state: viewport matching window dims */
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/12] cso: remove clip state handling

2015-07-16 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

There is no need for this.
---
 src/gallium/auxiliary/cso_cache/cso_context.c | 44 ---
 src/gallium/auxiliary/cso_cache/cso_context.h | 13 
 src/mesa/state_tracker/st_atom_clip.c |  2 +-
 3 files changed, 1 insertion(+), 58 deletions(-)

diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c 
b/src/gallium/auxiliary/cso_cache/cso_context.c
index 1b4e2e6..3e7fe22 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -111,9 +111,6 @@ struct cso_context {
uint render_condition_mode, render_condition_mode_saved;
boolean render_condition_cond, render_condition_cond_saved;
 
-   struct pipe_clip_state clip;
-   struct pipe_clip_state clip_saved;
-
struct pipe_framebuffer_state fb, fb_saved;
struct pipe_viewport_state vp, vp_saved;
struct pipe_blend_color blend_color;
@@ -910,47 +907,6 @@ void cso_restore_tesseval_shader(struct cso_context *ctx)
ctx-tesseval_shader_saved = NULL;
 }
 
-/* clip state */
-
-static INLINE void
-clip_state_cpy(struct pipe_clip_state *dst,
-   const struct pipe_clip_state *src)
-{
-   memcpy(dst-ucp, src-ucp, sizeof(dst-ucp));
-}
-
-static INLINE int
-clip_state_cmp(const struct pipe_clip_state *a,
-   const struct pipe_clip_state *b)
-{
-   return memcmp(a-ucp, b-ucp, sizeof(a-ucp));
-}
-
-void
-cso_set_clip(struct cso_context *ctx,
- const struct pipe_clip_state *clip)
-{
-   if (clip_state_cmp(ctx-clip, clip)) {
-  clip_state_cpy(ctx-clip, clip);
-  ctx-pipe-set_clip_state(ctx-pipe, clip);
-   }
-}
-
-void
-cso_save_clip(struct cso_context *ctx)
-{
-   clip_state_cpy(ctx-clip_saved, ctx-clip);
-}
-
-void
-cso_restore_clip(struct cso_context *ctx)
-{
-   if (clip_state_cmp(ctx-clip, ctx-clip_saved)) {
-  clip_state_cpy(ctx-clip, ctx-clip_saved);
-  ctx-pipe-set_clip_state(ctx-pipe, ctx-clip_saved);
-   }
-}
-
 enum pipe_error
 cso_set_vertex_elements(struct cso_context *ctx,
 unsigned count,
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h 
b/src/gallium/auxiliary/cso_cache/cso_context.h
index c9a4226..3bee429 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -188,19 +188,6 @@ void cso_save_render_condition(struct cso_context *cso);
 void cso_restore_render_condition(struct cso_context *cso);
 
 
-/* clip state */
-
-void
-cso_set_clip(struct cso_context *cso,
- const struct pipe_clip_state *clip);
-
-void
-cso_save_clip(struct cso_context *cso);
-
-void
-cso_restore_clip(struct cso_context *cso);
-
-
 /* sampler view state */
 
 void
diff --git a/src/mesa/state_tracker/st_atom_clip.c 
b/src/mesa/state_tracker/st_atom_clip.c
index f82c133..0d2a7e1 100644
--- a/src/mesa/state_tracker/st_atom_clip.c
+++ b/src/mesa/state_tracker/st_atom_clip.c
@@ -60,7 +60,7 @@ static void update_clip( struct st_context *st )
   use_eye ? ctx-Transform.EyeUserPlane
   : ctx-Transform._ClipUserPlane, sizeof(clip.ucp));
st-state.clip = clip;
-   cso_set_clip(st-cso_context, clip);
+   st-pipe-set_clip_state(st-pipe, clip);
 }
 
 
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/12] st/mesa: don't call st_validate_state in BlitFramebuffer

2015-07-16 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

None of the draw states are used here.
This fixes a crash in piglit: ext_framebuffer_blit/blit-early

Calling st_manager_validate_framebuffers is the minimum requirement here.

Cc: mesa-sta...@lists.freedesktop.org
---
 src/mesa/state_tracker/st_cb_blit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/state_tracker/st_cb_blit.c 
b/src/mesa/state_tracker/st_cb_blit.c
index 6d93718..1396906 100644
--- a/src/mesa/state_tracker/st_cb_blit.c
+++ b/src/mesa/state_tracker/st_cb_blit.c
@@ -39,7 +39,7 @@
 #include st_cb_bitmap.h
 #include st_cb_blit.h
 #include st_cb_fbo.h
-#include st_atom.h
+#include st_manager.h
 
 #include util/u_format.h
 
@@ -92,7 +92,7 @@ st_BlitFramebuffer(struct gl_context *ctx,
} clip;
struct pipe_blit_info blit;
 
-   st_validate_state(st);
+   st_manager_validate_framebuffers(st);
 
/* Make sure bitmap rendering has landed in the framebuffers */
st_flush_bitmap_cache(st);
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/12] tgsi: allow dumping to a file directly

2015-07-16 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/auxiliary/tgsi/tgsi_dump.c| 19 +++
 src/gallium/auxiliary/tgsi/tgsi_dump.h|  5 +
 src/gallium/auxiliary/util/u_dump_state.c |  7 +++
 3 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c 
b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index c80d7a2..8ceb5b4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -48,6 +48,7 @@ struct dump_ctx
int indent;

uint indentation;
+   FILE *file;
 
void (*dump_printf)(struct dump_ctx *ctx, const char *format, ...);
 };
@@ -58,7 +59,10 @@ dump_ctx_printf(struct dump_ctx *ctx, const char *format, 
...)
va_list ap;
(void)ctx;
va_start(ap, format);
-   _debug_vprintf(format, ap);
+   if (ctx-file)
+  vfprintf(ctx-file, format, ap);
+   else
+  _debug_vprintf(format, ap);
va_end(ap);
 }
 
@@ -659,9 +663,7 @@ prolog(
 }
 
 void
-tgsi_dump(
-   const struct tgsi_token *tokens,
-   uint flags )
+tgsi_dump_to_file(const struct tgsi_token *tokens, uint flags, FILE *file)
 {
struct dump_ctx ctx;
 
@@ -677,10 +679,17 @@ tgsi_dump(
ctx.indent = 0;
ctx.dump_printf = dump_ctx_printf;
ctx.indentation = 0;
+   ctx.file = file;
 
tgsi_iterate_shader( tokens, ctx.iter );
 }
 
+void
+tgsi_dump(const struct tgsi_token *tokens, uint flags)
+{
+   tgsi_dump_to_file(tokens, flags, NULL);
+}
+
 struct str_dump_ctx
 {
struct dump_ctx base;
@@ -733,6 +742,7 @@ tgsi_dump_str(
ctx.base.indent = 0;
ctx.base.dump_printf = str_dump_ctx_printf;
ctx.base.indentation = 0;
+   ctx.base.file = NULL;
 
ctx.str = str;
ctx.str[0] = 0;
@@ -756,6 +766,7 @@ tgsi_dump_instruction_str(
ctx.base.indent = 0;
ctx.base.dump_printf = str_dump_ctx_printf;
ctx.base.indentation = 0;
+   ctx.base.file = NULL;
 
ctx.str = str;
ctx.str[0] = 0;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h 
b/src/gallium/auxiliary/tgsi/tgsi_dump.h
index bc873a5..7c8f92e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h
@@ -32,6 +32,8 @@
 #include pipe/p_defines.h
 #include pipe/p_shader_tokens.h
 
+#include stdio.h
+
 #if defined __cplusplus
 extern C {
 #endif
@@ -44,6 +46,9 @@ tgsi_dump_str(
size_t size);
 
 void
+tgsi_dump_to_file(const struct tgsi_token *tokens, uint flags, FILE *file);
+
+void
 tgsi_dump(
const struct tgsi_token *tokens,
uint flags );
diff --git a/src/gallium/auxiliary/util/u_dump_state.c 
b/src/gallium/auxiliary/util/u_dump_state.c
index 88027cb..b4c92f0 100644
--- a/src/gallium/auxiliary/util/u_dump_state.c
+++ b/src/gallium/auxiliary/util/u_dump_state.c
@@ -426,7 +426,6 @@ util_dump_clip_state(FILE *stream, const struct 
pipe_clip_state *state)
 void
 util_dump_shader_state(FILE *stream, const struct pipe_shader_state *state)
 {
-   char str[8192];
unsigned i;
 
if(!state) {
@@ -434,12 +433,12 @@ util_dump_shader_state(FILE *stream, const struct 
pipe_shader_state *state)
   return;
}
 
-   tgsi_dump_str(state-tokens, 0, str, sizeof(str));
-
util_dump_struct_begin(stream, pipe_shader_state);
 
util_dump_member_begin(stream, tokens);
-   util_dump_string(stream, str);
+   fprintf(stream, \\n);
+   tgsi_dump_to_file(state-tokens, 0, stream);
+   fprintf(stream, \);
util_dump_member_end(stream);
 
util_dump_member_begin(stream, stream_output);
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/12] cso: only allow saving and restoring fragment sampler views

2015-07-16 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

Not needed for other shader stages.
---
 src/gallium/auxiliary/cso_cache/cso_context.c | 103 +-
 src/gallium/auxiliary/cso_cache/cso_context.h |   4 +-
 src/gallium/auxiliary/hud/hud_context.c   |   4 +-
 src/gallium/auxiliary/postprocess/pp_run.c|   4 +-
 src/gallium/auxiliary/util/u_blit.c   |   4 +-
 src/mesa/state_tracker/st_cb_bitmap.c |   4 +-
 src/mesa/state_tracker/st_cb_drawpixels.c |   4 +-
 7 files changed, 63 insertions(+), 64 deletions(-)

diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c 
b/src/gallium/auxiliary/cso_cache/cso_context.c
index 744b00c..b3ff1a3 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -66,12 +66,6 @@ struct sampler_info
 
void *samplers_saved[PIPE_MAX_SAMPLERS];
unsigned nr_samplers_saved;
-
-   struct pipe_sampler_view *views[PIPE_MAX_SHADER_SAMPLER_VIEWS];
-   unsigned nr_views;
-
-   struct pipe_sampler_view *views_saved[PIPE_MAX_SHADER_SAMPLER_VIEWS];
-   unsigned nr_views_saved;
 };
 
 
@@ -85,6 +79,12 @@ struct cso_context {
boolean has_tessellation;
boolean has_streamout;
 
+   struct pipe_sampler_view *fragment_views[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   unsigned nr_fragment_views;
+
+   struct pipe_sampler_view 
*fragment_views_saved[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+   unsigned nr_fragment_views_saved;
+
struct sampler_info samplers[PIPE_SHADER_TYPES];
 
struct pipe_vertex_buffer aux_vertex_buffer_current;
@@ -297,7 +297,7 @@ out:
  */
 void cso_destroy_context( struct cso_context *ctx )
 {
-   unsigned i, shader;
+   unsigned i;
 
if (ctx-pipe) {
   ctx-pipe-set_index_buffer(ctx-pipe, NULL);
@@ -347,13 +347,9 @@ void cso_destroy_context( struct cso_context *ctx )
  ctx-pipe-set_stream_output_targets(ctx-pipe, 0, NULL, NULL);
}
 
-   /* free sampler views for each shader stage */
-   for (shader = 0; shader  Elements(ctx-samplers); shader++) {
-  struct sampler_info *info = ctx-samplers[shader];
-  for (i = 0; i  PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) {
- pipe_sampler_view_reference(info-views[i], NULL);
- pipe_sampler_view_reference(info-views_saved[i], NULL);
-  }
+   for (i = 0; i  PIPE_MAX_SHADER_SAMPLER_VIEWS; i++) {
+  pipe_sampler_view_reference(ctx-fragment_views[i], NULL);
+  pipe_sampler_view_reference(ctx-fragment_views_saved[i], NULL);
}
 
util_unreference_framebuffer_state(ctx-fb);
@@ -1281,71 +1277,74 @@ cso_set_sampler_views(struct cso_context *ctx,
   unsigned count,
   struct pipe_sampler_view **views)
 {
-   struct sampler_info *info = ctx-samplers[shader_stage];
-   unsigned i;
-   boolean any_change = FALSE;
+   if (shader_stage == PIPE_SHADER_FRAGMENT) {
+  unsigned i;
+  boolean any_change = FALSE;
 
-   /* reference new views */
-   for (i = 0; i  count; i++) {
-  any_change |= info-views[i] != views[i];
-  pipe_sampler_view_reference(info-views[i], views[i]);
-   }
-   /* unref extra old views, if any */
-   for (; i  info-nr_views; i++) {
-  any_change |= info-views[i] != NULL;
-  pipe_sampler_view_reference(info-views[i], NULL);
-   }
+  /* reference new views */
+  for (i = 0; i  count; i++) {
+ any_change |= ctx-fragment_views[i] != views[i];
+ pipe_sampler_view_reference(ctx-fragment_views[i], views[i]);
+  }
+  /* unref extra old views, if any */
+  for (; i  ctx-nr_fragment_views; i++) {
+ any_change |= ctx-fragment_views[i] != NULL;
+ pipe_sampler_view_reference(ctx-fragment_views[i], NULL);
+  }
 
-   /* bind the new sampler views */
-   if (any_change) {
-  ctx-pipe-set_sampler_views(ctx-pipe, shader_stage, 0,
-   MAX2(info-nr_views, count),
-   info-views);
-   }
+  /* bind the new sampler views */
+  if (any_change) {
+ ctx-pipe-set_sampler_views(ctx-pipe, shader_stage, 0,
+  MAX2(ctx-nr_fragment_views, count),
+  ctx-fragment_views);
+  }
 
-   info-nr_views = count;
+  ctx-nr_fragment_views = count;
+   }
+   else
+  ctx-pipe-set_sampler_views(ctx-pipe, shader_stage, 0, count, views);
 }
 
 
 void
-cso_save_sampler_views(struct cso_context *ctx, unsigned shader_stage)
+cso_save_fragment_sampler_views(struct cso_context *ctx)
 {
-   struct sampler_info *info = ctx-samplers[shader_stage];
unsigned i;
 
-   info-nr_views_saved = info-nr_views;
+   ctx-nr_fragment_views_saved = ctx-nr_fragment_views;
 
-   for (i = 0; i  info-nr_views; i++) {
-  assert(!info-views_saved[i]);
-  pipe_sampler_view_reference(info-views_saved[i], info-views[i]);
+   for (i = 0; i  ctx-nr_fragment_views; i++) {
+  assert(!ctx-fragment_views_saved[i]);
+  

[Mesa-dev] [PATCH 07/12] cso: only allow saving and restoring fragment sampler states

2015-07-16 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/auxiliary/cso_cache/cso_context.c | 28 +++
 src/gallium/auxiliary/cso_cache/cso_context.h |  4 ++--
 src/gallium/auxiliary/hud/hud_context.c   |  4 ++--
 src/gallium/auxiliary/postprocess/pp_run.c|  4 ++--
 src/gallium/auxiliary/util/u_blit.c   |  4 ++--
 src/mesa/state_tracker/st_cb_bitmap.c |  4 ++--
 src/mesa/state_tracker/st_cb_drawpixels.c |  4 ++--
 7 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c 
b/src/gallium/auxiliary/cso_cache/cso_context.c
index 9aa53db..1b4e2e6 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -58,9 +58,6 @@ struct sampler_info
 {
void *samplers[PIPE_MAX_SAMPLERS];
unsigned nr_samplers;
-
-   void *samplers_saved[PIPE_MAX_SAMPLERS];
-   unsigned nr_samplers_saved;
 };
 
 
@@ -80,6 +77,9 @@ struct cso_context {
struct pipe_sampler_view 
*fragment_views_saved[PIPE_MAX_SHADER_SAMPLER_VIEWS];
unsigned nr_fragment_views_saved;
 
+   void *fragment_samplers_saved[PIPE_MAX_SAMPLERS];
+   unsigned nr_fragment_samplers_saved;
+
struct sampler_info samplers[PIPE_SHADER_TYPES];
 
struct pipe_vertex_buffer aux_vertex_buffer_current;
@@ -1229,21 +1229,25 @@ cso_set_samplers(struct cso_context *ctx,
 }
 
 void
-cso_save_samplers(struct cso_context *ctx, unsigned shader_stage)
+cso_save_fragment_samplers(struct cso_context *ctx)
 {
-   struct sampler_info *info = ctx-samplers[shader_stage];
-   info-nr_samplers_saved = info-nr_samplers;
-   memcpy(info-samplers_saved, info-samplers, sizeof(info-samplers));
+   struct sampler_info *info = ctx-samplers[PIPE_SHADER_FRAGMENT];
+
+   ctx-nr_fragment_samplers_saved = info-nr_samplers;
+   memcpy(ctx-fragment_samplers_saved, info-samplers,
+  sizeof(info-samplers));
 }
 
 
 void
-cso_restore_samplers(struct cso_context *ctx, unsigned shader_stage)
+cso_restore_fragment_samplers(struct cso_context *ctx)
 {
-   struct sampler_info *info = ctx-samplers[shader_stage];
-   info-nr_samplers = info-nr_samplers_saved;
-   memcpy(info-samplers, info-samplers_saved, sizeof(info-samplers));
-   single_sampler_done(ctx, shader_stage);
+   struct sampler_info *info = ctx-samplers[PIPE_SHADER_FRAGMENT];
+
+   info-nr_samplers = ctx-nr_fragment_samplers_saved;
+   memcpy(info-samplers, ctx-fragment_samplers_saved,
+  sizeof(info-samplers));
+   single_sampler_done(ctx, PIPE_SHADER_FRAGMENT);
 }
 
 
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h 
b/src/gallium/auxiliary/cso_cache/cso_context.h
index 9d12aaf..c9a4226 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -72,10 +72,10 @@ cso_set_samplers(struct cso_context *cso,
  const struct pipe_sampler_state **states);
 
 void
-cso_save_samplers(struct cso_context *cso, unsigned shader_stage);
+cso_save_fragment_samplers(struct cso_context *cso);
 
 void
-cso_restore_samplers(struct cso_context *cso, unsigned shader_stage);
+cso_restore_fragment_samplers(struct cso_context *cso);
 
 /* Alternate interface to support state trackers that like to modify
  * samplers one at a time:
diff --git a/src/gallium/auxiliary/hud/hud_context.c 
b/src/gallium/auxiliary/hud/hud_context.c
index 4602b7c..4631cd3 100644
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -438,7 +438,7 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
cso_save_depth_stencil_alpha(cso);
cso_save_fragment_shader(cso);
cso_save_fragment_sampler_views(cso);
-   cso_save_samplers(cso, PIPE_SHADER_FRAGMENT);
+   cso_save_fragment_samplers(cso);
cso_save_rasterizer(cso);
cso_save_viewport(cso);
cso_save_stream_outputs(cso);
@@ -568,7 +568,7 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
cso_restore_depth_stencil_alpha(cso);
cso_restore_fragment_shader(cso);
cso_restore_fragment_sampler_views(cso);
-   cso_restore_samplers(cso, PIPE_SHADER_FRAGMENT);
+   cso_restore_fragment_samplers(cso);
cso_restore_rasterizer(cso);
cso_restore_viewport(cso);
cso_restore_stream_outputs(cso);
diff --git a/src/gallium/auxiliary/postprocess/pp_run.c 
b/src/gallium/auxiliary/postprocess/pp_run.c
index 04f92c9..caa2062 100644
--- a/src/gallium/auxiliary/postprocess/pp_run.c
+++ b/src/gallium/auxiliary/postprocess/pp_run.c
@@ -125,7 +125,7 @@ pp_run(struct pp_queue_t *ppq, struct pipe_resource *in,
cso_save_rasterizer(cso);
cso_save_sample_mask(cso);
cso_save_min_samples(cso);
-   cso_save_samplers(cso, PIPE_SHADER_FRAGMENT);
+   cso_save_fragment_samplers(cso);
cso_save_fragment_sampler_views(cso);
cso_save_stencil_ref(cso);
cso_save_stream_outputs(cso);
@@ -196,7 +196,7 @@ pp_run(struct pp_queue_t *ppq, struct pipe_resource *in,
cso_restore_rasterizer(cso);

[Mesa-dev] [PATCH 12/12] cso: eliminate some sampler function wrappers

2015-07-16 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/auxiliary/cso_cache/cso_context.c | 39 +++
 src/gallium/auxiliary/cso_cache/cso_context.h |  6 ++---
 2 files changed, 12 insertions(+), 33 deletions(-)

diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c 
b/src/gallium/auxiliary/cso_cache/cso_context.c
index 3e7fe22..d3fd6aa 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -1069,11 +1069,9 @@ unsigned cso_get_aux_vertex_buffer_slot(struct 
cso_context *ctx)
 
 / fragment/vertex sampler view state */
 
-static enum pipe_error
-single_sampler(struct cso_context *ctx,
-   struct sampler_info *info,
-   unsigned idx,
-   const struct pipe_sampler_state *templ)
+enum pipe_error
+cso_single_sampler(struct cso_context *ctx, unsigned shader_stage,
+   unsigned idx, const struct pipe_sampler_state *templ)
 {
void *handle = NULL;
 
@@ -1109,24 +1107,13 @@ single_sampler(struct cso_context *ctx,
   }
}
 
-   info-samplers[idx] = handle;
-
+   ctx-samplers[shader_stage].samplers[idx] = handle;
return PIPE_OK;
 }
 
-enum pipe_error
-cso_single_sampler(struct cso_context *ctx,
-   unsigned shader_stage,
-   unsigned idx,
-   const struct pipe_sampler_state *templ)
-{
-   return single_sampler(ctx, ctx-samplers[shader_stage], idx, templ);
-}
-
-
 
-static void
-single_sampler_done(struct cso_context *ctx, unsigned shader_stage)
+void
+cso_single_sampler_done(struct cso_context *ctx, unsigned shader_stage)
 {
struct sampler_info *info = ctx-samplers[shader_stage];
unsigned i;
@@ -1142,12 +1129,6 @@ single_sampler_done(struct cso_context *ctx, unsigned 
shader_stage)
   info-samplers);
 }
 
-void
-cso_single_sampler_done(struct cso_context *ctx, unsigned shader_stage)
-{
-   single_sampler_done(ctx, shader_stage);
-}
-
 
 /*
  * If the function encouters any errors it will return the
@@ -1168,18 +1149,18 @@ cso_set_samplers(struct cso_context *ctx,
 */
 
for (i = 0; i  nr; i++) {
-  temp = single_sampler(ctx, info, i, templates[i]);
+  temp = cso_single_sampler(ctx, shader_stage, i, templates[i]);
   if (temp != PIPE_OK)
  error = temp;
}
 
for ( ; i  info-nr_samplers; i++) {
-  temp = single_sampler(ctx, info, i, NULL);
+  temp = cso_single_sampler(ctx, shader_stage, i, NULL);
   if (temp != PIPE_OK)
  error = temp;
}
 
-   single_sampler_done(ctx, shader_stage);
+   cso_single_sampler_done(ctx, shader_stage);
 
return error;
 }
@@ -1203,7 +1184,7 @@ cso_restore_fragment_samplers(struct cso_context *ctx)
info-nr_samplers = ctx-nr_fragment_samplers_saved;
memcpy(info-samplers, ctx-fragment_samplers_saved,
   sizeof(info-samplers));
-   single_sampler_done(ctx, PIPE_SHADER_FRAGMENT);
+   cso_single_sampler_done(ctx, PIPE_SHADER_FRAGMENT);
 }
 
 
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h 
b/src/gallium/auxiliary/cso_cache/cso_context.h
index 3bee429..f0a2739 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -81,10 +81,8 @@ cso_restore_fragment_samplers(struct cso_context *cso);
  * samplers one at a time:
  */
 enum pipe_error
-cso_single_sampler(struct cso_context *cso,
-   unsigned shader_stage,
-   unsigned count,
-   const struct pipe_sampler_state *states);
+cso_single_sampler(struct cso_context *cso, unsigned shader_stage,
+   unsigned idx, const struct pipe_sampler_state *states);
 
 void
 cso_single_sampler_done(struct cso_context *cso, unsigned shader_stage);
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/12] gallium/util: improve dump functions

2015-07-16 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/auxiliary/util/u_dump.h |  20 ++-
 src/gallium/auxiliary/util/u_dump_defines.c |  41 +
 src/gallium/auxiliary/util/u_dump_state.c   | 233 ++--
 3 files changed, 241 insertions(+), 53 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_dump.h 
b/src/gallium/auxiliary/util/u_dump.h
index 3ddf518..1c0e879 100644
--- a/src/gallium/auxiliary/util/u_dump.h
+++ b/src/gallium/auxiliary/util/u_dump.h
@@ -88,14 +88,16 @@ util_dump_tex_filter(unsigned value, boolean shortened);
 const char *
 util_dump_query_type(unsigned value, boolean shortened);
 
+const char *
+util_dump_mode(unsigned value, boolean shortened);
+
 
 /*
  * p_state.h, through a FILE
  */
 
 void
-util_dump_template(FILE *stream,
-   const struct pipe_resource *templat);
+util_dump_resource(FILE *stream, const struct pipe_resource *state);
 
 void
 util_dump_rasterizer_state(FILE *stream,
@@ -157,10 +159,20 @@ void
 util_dump_image_view(FILE *stream, const struct pipe_image_view *state);
 
 void
+util_dump_sampler_view(FILE *stream, const struct pipe_sampler_view *state);
+
+void
 util_dump_transfer(FILE *stream,
const struct pipe_transfer *state);
 
 void
+util_dump_constant_buffer(FILE *stream,
+  const struct pipe_constant_buffer *state);
+
+void
+util_dump_index_buffer(FILE *stream, const struct pipe_index_buffer *state);
+
+void
 util_dump_vertex_buffer(FILE *stream,
 const struct pipe_vertex_buffer *state);
 
@@ -169,6 +181,10 @@ util_dump_vertex_element(FILE *stream,
  const struct pipe_vertex_element *state);
 
 void
+util_dump_stream_output_target(FILE *stream,
+   const struct pipe_stream_output_target *state);
+
+void
 util_dump_draw_info(FILE *stream, const struct pipe_draw_info *state);
 
 void
diff --git a/src/gallium/auxiliary/util/u_dump_defines.c 
b/src/gallium/auxiliary/util/u_dump_defines.c
index 03fd15d..9021887 100644
--- a/src/gallium/auxiliary/util/u_dump_defines.c
+++ b/src/gallium/auxiliary/util/u_dump_defines.c
@@ -392,3 +392,44 @@ util_dump_query_type_short_names[] = {
 };
 
 DEFINE_UTIL_DUMP_CONTINUOUS(query_type)
+
+
+static const char *
+util_dump_mode_names[] = {
+   PIPE_PRIM_POINTS,
+   PIPE_PRIM_LINES,
+   PIPE_PRIM_LINE_LOOP,
+   PIPE_PRIM_LINE_STRIP,
+   PIPE_PRIM_TRIANGLES,
+   PIPE_PRIM_TRIANGLE_STRIP,
+   PIPE_PRIM_TRIANGLE_FAN,
+   PIPE_PRIM_QUADS,
+   PIPE_PRIM_QUAD_STRIP,
+   PIPE_PRIM_POLYGON,
+   PIPE_PRIM_LINES_ADJACENCY,
+   PIPE_PRIM_LINE_STRIP_ADJACENCY,
+   PIPE_PRIM_TRIANGLES_ADJACENCY,
+   PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY,
+   PIPE_PRIM_PATCHES,
+};
+
+static const char *
+util_dump_mode_short_names[] = {
+   points,
+   lines,
+   line_loop,
+   line_strip,
+   triangles,
+   triangle_strip,
+   triangle_fan,
+   quads,
+   quad_strip,
+   polygon,
+   lines_adjacency,
+   line_strip_adjacency,
+   triangles_adjacency,
+   triangle_strip_adjacency,
+   patches,
+};
+
+DEFINE_UTIL_DUMP_CONTINUOUS(mode)
diff --git a/src/gallium/auxiliary/util/u_dump_state.c 
b/src/gallium/auxiliary/util/u_dump_state.c
index b4c92f0..58f8cbe 100644
--- a/src/gallium/auxiliary/util/u_dump_state.c
+++ b/src/gallium/auxiliary/util/u_dump_state.c
@@ -247,6 +247,42 @@ util_dump_enum_func(FILE *stream, unsigned value)
util_dump_enum(stream, util_dump_func(value, TRUE));
 }
 
+static void
+util_dump_enum_mode(FILE *stream, unsigned value)
+{
+   util_dump_enum(stream, util_dump_mode(value, TRUE));
+}
+
+static void
+util_dump_enum_tex_target(FILE *stream, unsigned value)
+{
+   util_dump_enum(stream, util_dump_tex_target(value, TRUE));
+}
+
+static void
+util_dump_enum_tex_filter(FILE *stream, unsigned value)
+{
+   util_dump_enum(stream, util_dump_tex_filter(value, TRUE));
+}
+
+static void
+util_dump_enum_tex_mipfilter(FILE *stream, unsigned value)
+{
+   util_dump_enum(stream, util_dump_tex_mipfilter(value, TRUE));
+}
+
+static void
+util_dump_enum_tex_wrap(FILE *stream, unsigned value)
+{
+   util_dump_enum(stream, util_dump_tex_wrap(value, TRUE));
+}
+
+static void
+util_dump_enum_stencil_op(FILE *stream, unsigned value)
+{
+   util_dump_enum(stream, util_dump_stencil_op(value, TRUE));
+}
+
 
 /*
  * Public functions
@@ -254,38 +290,28 @@ util_dump_enum_func(FILE *stream, unsigned value)
 
 
 void
-util_dump_template(FILE *stream, const struct pipe_resource *templat)
+util_dump_resource(FILE *stream, const struct pipe_resource *state)
 {
-   if(!templat) {
+   if(!state) {
   util_dump_null(stream);
   return;
}
 
util_dump_struct_begin(stream, pipe_resource);
 
-   util_dump_member(stream, int, templat, target);
-   util_dump_member(stream, format, templat, format);
-
-   util_dump_member_begin(stream, width);
-   util_dump_uint(stream, templat-width0);
-   util_dump_member_end(stream);
-
-   util_dump_member_begin(stream, height);
-   util_dump_uint(stream, 

[Mesa-dev] [PATCH 06/12] cso: drop inefficient checking for redundant sampler state changes

2015-07-16 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

Drivers can do this better, because they can skip redundant state changes
at per-slot granularity.
---
 src/gallium/auxiliary/cso_cache/cso_context.c | 28 ++-
 1 file changed, 2 insertions(+), 26 deletions(-)

diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c 
b/src/gallium/auxiliary/cso_cache/cso_context.c
index b3ff1a3..9aa53db 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -56,11 +56,6 @@
  */
 struct sampler_info
 {
-   struct {
-  void *samplers[PIPE_MAX_SAMPLERS];
-  unsigned nr_samplers;
-   } hw;
-
void *samplers[PIPE_MAX_SAMPLERS];
unsigned nr_samplers;
 
@@ -1187,27 +1182,8 @@ single_sampler_done(struct cso_context *ctx, unsigned 
shader_stage)
}
 
info-nr_samplers = i;
-
-   if (info-hw.nr_samplers != info-nr_samplers ||
-   memcmp(info-hw.samplers,
-  info-samplers,
-  info-nr_samplers * sizeof(void *)) != 0)
-   {
-  memcpy(info-hw.samplers,
- info-samplers,
- info-nr_samplers * sizeof(void *));
-
-  /* set remaining slots/pointers to null */
-  for (i = info-nr_samplers; i  info-hw.nr_samplers; i++)
- info-samplers[i] = NULL;
-
-  ctx-pipe-bind_sampler_states(ctx-pipe, shader_stage, 0,
- MAX2(info-nr_samplers,
-  info-hw.nr_samplers),
- info-samplers);
-
-  info-hw.nr_samplers = info-nr_samplers;
-   }
+   ctx-pipe-bind_sampler_states(ctx-pipe, shader_stage, 0, i,
+  info-samplers);
 }
 
 void
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/12] st/dri: fix EGL_KHR_fence_sync since the last radeonsi change broke it

2015-07-16 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

Broken by f1be3d8cdde17a9b9ae283e1bab2f46b992d3bf3, which returns NULL
if no commands have been submitted.
---
 src/gallium/include/pipe/p_context.h  |  5 -
 src/gallium/state_trackers/dri/dri2.c | 14 ++
 2 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/src/gallium/include/pipe/p_context.h 
b/src/gallium/include/pipe/p_context.h
index f89dae9..1439333 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -385,7 +385,10 @@ struct pipe_context {
 * Flush draw commands
 *
 * NOTE: use screen-fence_reference() (or equivalent) to transfer
-* new fence ref to **fence, to ensure that previous fence is unref'd
+* new fence ref to **fence, to ensure that previous fence is unref'd.
+*
+* If the returned fence is NULL, it means that all commands have already
+* been finished, or no commands have been submitted yet.
 *
 * \param fence  if not NULL, an old fence to unref and transfer a
 *new fence reference to
diff --git a/src/gallium/state_trackers/dri/dri2.c 
b/src/gallium/state_trackers/dri/dri2.c
index 91b4431..45359ff 100644
--- a/src/gallium/state_trackers/dri/dri2.c
+++ b/src/gallium/state_trackers/dri/dri2.c
@@ -1307,12 +1307,6 @@ dri2_create_fence(__DRIcontext *_ctx)
   return NULL;
 
ctx-flush(ctx, fence-pipe_fence, 0);
-
-   if (!fence-pipe_fence) {
-  FREE(fence);
-  return NULL;
-   }
-
return fence;
 }
 
@@ -1350,8 +1344,6 @@ dri2_destroy_fence(__DRIscreen *_screen, void *_fence)
   screen-fence_reference(screen, fence-pipe_fence, NULL);
else if (fence-cl_event)
   driscreen-opencl_dri_event_release(fence-cl_event);
-   else
-  assert(0);
 
FREE(fence);
 }
@@ -1377,10 +1369,8 @@ dri2_client_wait_sync(__DRIcontext *_ctx, void *_fence, 
unsigned flags,
   else
  return driscreen-opencl_dri_event_wait(fence-cl_event, timeout);
}
-   else {
-  assert(0);
-  return false;
-   }
+   else
+  return true;
 }
 
 static void
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/12] gallium/tests: use cso_set_samplers

2015-07-16 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/tests/trivial/quad-tex.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/gallium/tests/trivial/quad-tex.c 
b/src/gallium/tests/trivial/quad-tex.c
index daae577..c019c7bb 100644
--- a/src/gallium/tests/trivial/quad-tex.c
+++ b/src/gallium/tests/trivial/quad-tex.c
@@ -297,6 +297,8 @@ static void close_prog(struct program *p)
 
 static void draw(struct program *p)
 {
+   const struct pipe_sampler_state *samplers[] = {p-sampler};
+
/* set the render target */
cso_set_framebuffer(p-cso, p-framebuffer);
 
@@ -310,8 +312,7 @@ static void draw(struct program *p)
cso_set_viewport(p-cso, p-viewport);
 
/* sampler */
-   cso_single_sampler(p-cso, PIPE_SHADER_FRAGMENT, 0, p-sampler);
-   cso_single_sampler_done(p-cso, PIPE_SHADER_FRAGMENT);
+   cso_set_samplers(p-cso, PIPE_SHADER_FRAGMENT, 1, samplers);
 
/* texture sampler view */
cso_set_sampler_views(p-cso, PIPE_SHADER_FRAGMENT, 1, p-view);
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/12] gallium/util: use cso_set_samplers

2015-07-16 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/auxiliary/postprocess/pp_colors.c |  4 ++--
 src/gallium/auxiliary/postprocess/pp_mlaa.c   | 23 ++-
 src/gallium/auxiliary/util/u_blit.c   |  6 --
 3 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/src/gallium/auxiliary/postprocess/pp_colors.c 
b/src/gallium/auxiliary/postprocess/pp_colors.c
index 247e4df..e6ea010 100644
--- a/src/gallium/auxiliary/postprocess/pp_colors.c
+++ b/src/gallium/auxiliary/postprocess/pp_colors.c
@@ -37,6 +37,7 @@ pp_nocolor(struct pp_queue_t *ppq, struct pipe_resource *in,
 {
 
struct pp_program *p = ppq-p;
+   const struct pipe_sampler_state *samplers[] = {p-sampler_point};
 
pp_filter_setup_in(p, in);
pp_filter_setup_out(p, out);
@@ -44,8 +45,7 @@ pp_nocolor(struct pp_queue_t *ppq, struct pipe_resource *in,
pp_filter_set_fb(p);
pp_filter_misc_state(p);
 
-   cso_single_sampler(p-cso, PIPE_SHADER_FRAGMENT, 0, p-sampler_point);
-   cso_single_sampler_done(p-cso, PIPE_SHADER_FRAGMENT);
+   cso_set_samplers(p-cso, PIPE_SHADER_FRAGMENT, 1, samplers);
cso_set_sampler_views(p-cso, PIPE_SHADER_FRAGMENT, 1, p-view);
 
cso_set_vertex_shader_handle(p-cso, ppq-shaders[n][0]);
diff --git a/src/gallium/auxiliary/postprocess/pp_mlaa.c 
b/src/gallium/auxiliary/postprocess/pp_mlaa.c
index 147d14d..024a248 100644
--- a/src/gallium/auxiliary/postprocess/pp_mlaa.c
+++ b/src/gallium/auxiliary/postprocess/pp_mlaa.c
@@ -141,8 +141,10 @@ pp_jimenezmlaa_run(struct pp_queue_t *ppq, struct 
pipe_resource *in,
p-pipe-clear(p-pipe, PIPE_CLEAR_STENCIL | PIPE_CLEAR_COLOR0,
   p-clear_color, 0, 0);
 
-   cso_single_sampler(p-cso, PIPE_SHADER_FRAGMENT, 0, p-sampler_point);
-   cso_single_sampler_done(p-cso, PIPE_SHADER_FRAGMENT);
+   {
+  const struct pipe_sampler_state *samplers[] = {p-sampler_point};
+  cso_set_samplers(p-cso, PIPE_SHADER_FRAGMENT, 1, samplers);
+   }
cso_set_sampler_views(p-cso, PIPE_SHADER_FRAGMENT, 1, p-view);
 
cso_set_vertex_shader_handle(p-cso, ppq-shaders[n][1]);/* offsetvs */
@@ -168,10 +170,11 @@ pp_jimenezmlaa_run(struct pp_queue_t *ppq, struct 
pipe_resource *in,
 
pp_filter_set_clear_fb(p);
 
-   cso_single_sampler(p-cso, PIPE_SHADER_FRAGMENT, 0, p-sampler_point);
-   cso_single_sampler(p-cso, PIPE_SHADER_FRAGMENT, 1, p-sampler_point);
-   cso_single_sampler(p-cso, PIPE_SHADER_FRAGMENT, 2, p-sampler);
-   cso_single_sampler_done(p-cso, PIPE_SHADER_FRAGMENT);
+   {
+  const struct pipe_sampler_state *samplers[] =
+ {p-sampler_point, p-sampler_point, p-sampler};
+  cso_set_samplers(p-cso, PIPE_SHADER_FRAGMENT, 3, samplers);
+   }
 
arr[0] = p-view;
cso_set_sampler_views(p-cso, PIPE_SHADER_FRAGMENT, 3, arr);
@@ -199,9 +202,11 @@ pp_jimenezmlaa_run(struct pp_queue_t *ppq, struct 
pipe_resource *in,
u_sampler_view_default_template(v_tmp, in, in-format);
arr[0] = p-pipe-create_sampler_view(p-pipe, in, v_tmp);
 
-   cso_single_sampler(p-cso, PIPE_SHADER_FRAGMENT, 0, p-sampler_point);
-   cso_single_sampler(p-cso, PIPE_SHADER_FRAGMENT, 1, p-sampler_point);
-   cso_single_sampler_done(p-cso, PIPE_SHADER_FRAGMENT);
+   {
+  const struct pipe_sampler_state *samplers[] =
+ {p-sampler_point, p-sampler_point};
+  cso_set_samplers(p-cso, PIPE_SHADER_FRAGMENT, 2, samplers);
+   }
 
arr[1] = p-view;
cso_set_sampler_views(p-cso, PIPE_SHADER_FRAGMENT, 2, arr);
diff --git a/src/gallium/auxiliary/util/u_blit.c 
b/src/gallium/auxiliary/util/u_blit.c
index df128c8..edb1f34 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -572,8 +572,10 @@ util_blit_pixels_tex(struct blit_state *ctx,
ctx-sampler.normalized_coords = normalized;
ctx-sampler.min_img_filter = filter;
ctx-sampler.mag_img_filter = filter;
-   cso_single_sampler(ctx-cso, PIPE_SHADER_FRAGMENT, 0, ctx-sampler);
-   cso_single_sampler_done(ctx-cso, PIPE_SHADER_FRAGMENT);
+   {
+  const struct pipe_sampler_state *samplers[] = {ctx-sampler};
+  cso_set_samplers(ctx-cso, PIPE_SHADER_FRAGMENT, 1, samplers);
+   }
 
/* viewport */
ctx-viewport.scale[0] = 0.5f * dst-width;
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/6] radeonsi: remove switch statement in si_create_context

2015-07-16 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

and make si_init_config static
---
 src/gallium/drivers/radeonsi/si_pipe.c  | 13 ++---
 src/gallium/drivers/radeonsi/si_state.c |  6 +-
 src/gallium/drivers/radeonsi/si_state.h |  1 -
 3 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index 2b6a6ff..0878b88 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -128,17 +128,8 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen, void *
sctx-atoms.s.streamout_begin = sctx-b.streamout.begin_atom;
sctx-atoms.s.streamout_enable = sctx-b.streamout.enable_atom;
 
-   switch (sctx-b.chip_class) {
-   case SI:
-   case CIK:
-   si_init_state_functions(sctx);
-   si_init_shader_functions(sctx);
-   si_init_config(sctx);
-   break;
-   default:
-   R600_ERR(Unsupported chip class %d.\n, sctx-b.chip_class);
-   goto fail;
-   }
+   si_init_state_functions(sctx);
+   si_init_shader_functions(sctx);
 
if (sscreen-b.debug_flags  DBG_FORCE_DMA)
sctx-b.b.resource_copy_region = sctx-b.dma_copy;
diff --git a/src/gallium/drivers/radeonsi/si_state.c 
b/src/gallium/drivers/radeonsi/si_state.c
index 1d4a4e8..316c689 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2920,6 +2920,8 @@ static void si_need_gfx_cs_space(struct pipe_context 
*ctx, unsigned num_dw,
si_need_cs_space((struct si_context*)ctx, num_dw, include_draw_vbo);
 }
 
+static void si_init_config(struct si_context *sctx);
+
 void si_init_state_functions(struct si_context *sctx)
 {
si_init_atom(sctx-framebuffer.atom, sctx-atoms.s.framebuffer, 
si_emit_framebuffer_state, 0);
@@ -2981,6 +2983,8 @@ void si_init_state_functions(struct si_context *sctx)
} else {
sctx-b.dma_copy = si_dma_copy;
}
+
+   si_init_config(sctx);
 }
 
 static void
@@ -3087,7 +3091,7 @@ si_write_harvested_raster_configs(struct si_context *sctx,
   INSTANCE_BROADCAST_WRITES);
 }
 
-void si_init_config(struct si_context *sctx)
+static void si_init_config(struct si_context *sctx)
 {
struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
 
diff --git a/src/gallium/drivers/radeonsi/si_state.h 
b/src/gallium/drivers/radeonsi/si_state.h
index 634f947..0c1fdb4 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -256,7 +256,6 @@ boolean si_is_format_supported(struct pipe_screen *screen,
unsigned sample_count,
unsigned usage);
 void si_init_state_functions(struct si_context *sctx);
-void si_init_config(struct si_context *sctx);
 unsigned cik_bank_wh(unsigned bankwh);
 unsigned cik_db_pipe_config(struct si_screen *sscreen, unsigned tile_mode);
 unsigned cik_macro_tile_aspect(unsigned macro_tile_aspect);
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/6] radeonsi: don't change pipe_resource in resource_copy_region

2015-07-16 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

Copied from r600g. pipe_resource can be shared by multiple threads, so we
shouldn't change it.
---
 src/gallium/drivers/radeon/r600_pipe_common.h |   1 -
 src/gallium/drivers/radeonsi/si_blit.c| 201 +++---
 src/gallium/drivers/radeonsi/si_state.c   |  62 ++--
 src/gallium/drivers/radeonsi/si_state.h   |   6 +
 4 files changed, 110 insertions(+), 160 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index a471426..aeb1e7a 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -214,7 +214,6 @@ struct r600_texture {
float   depth_clear_value;
 
boolnon_disp_tiling; /* R600-Cayman only */
-   unsignedmipmap_shift;
 };
 
 struct r600_surface {
diff --git a/src/gallium/drivers/radeonsi/si_blit.c 
b/src/gallium/drivers/radeonsi/si_blit.c
index 6c7b383..f6db3f5 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -455,89 +455,6 @@ struct texture_orig_info {
unsigned npix0_y;
 };
 
-static void si_compressed_to_blittable(struct pipe_resource *tex,
-  unsigned level,
-  struct texture_orig_info *orig)
-{
-   struct r600_texture *rtex = (struct r600_texture*)tex;
-   unsigned pixsize = util_format_get_blocksize(rtex-resource.b.b.format);
-   int new_format;
-   int new_height, new_width;
-
-   orig-format = tex-format;
-   orig-width0 = tex-width0;
-   orig-height0 = tex-height0;
-   orig-npix0_x = rtex-surface.level[0].npix_x;
-   orig-npix0_y = rtex-surface.level[0].npix_y;
-   orig-npix_x = rtex-surface.level[level].npix_x;
-   orig-npix_y = rtex-surface.level[level].npix_y;
-
-   if (pixsize == 8)
-   new_format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */
-   else
-   new_format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */
-
-   new_width = util_format_get_nblocksx(tex-format, orig-width0);
-   new_height = util_format_get_nblocksy(tex-format, orig-height0);
-
-   tex-width0 = new_width;
-   tex-height0 = new_height;
-   tex-format = new_format;
-   rtex-surface.level[0].npix_x = util_format_get_nblocksx(orig-format, 
orig-npix0_x);
-   rtex-surface.level[0].npix_y = util_format_get_nblocksy(orig-format, 
orig-npix0_y);
-   rtex-surface.level[level].npix_x = 
util_format_get_nblocksx(orig-format, orig-npix_x);
-   rtex-surface.level[level].npix_y = 
util_format_get_nblocksy(orig-format, orig-npix_y);
-
-   /* By dividing the dimensions by 4, we effectively decrement
-* last_level by 2, therefore the last 2 mipmap levels disappear and
-* aren't blittable. Note that the last 3 mipmap levels (4x4, 2x2,
-* 1x1) have equal slice sizes, which is an important assumption
-* for this to work.
-*
-* In order to make the last 2 mipmap levels blittable, we have to
-* add the slice size of the last mipmap level to the texture
-* address, so that even though the hw thinks it reads last_level-2,
-* it will actually read last_level-1, and if we add the slice size*2,
-* it will read last_level. That's how this workaround works.
-*/
-   if (level  rtex-resource.b.b.last_level-2)
-   rtex-mipmap_shift = level - (rtex-resource.b.b.last_level-2);
-}
-
-static void si_change_format(struct pipe_resource *tex,
-unsigned level,
-struct texture_orig_info *orig,
-enum pipe_format format)
-{
-   struct r600_texture *rtex = (struct r600_texture*)tex;
-
-   orig-format = tex-format;
-   orig-width0 = tex-width0;
-   orig-height0 = tex-height0;
-   orig-npix0_x = rtex-surface.level[0].npix_x;
-   orig-npix0_y = rtex-surface.level[0].npix_y;
-   orig-npix_x = rtex-surface.level[level].npix_x;
-   orig-npix_y = rtex-surface.level[level].npix_y;
-
-   tex-format = format;
-}
-
-static void si_reset_blittable_to_orig(struct pipe_resource *tex,
-  unsigned level,
-  struct texture_orig_info *orig)
-{
-   struct r600_texture *rtex = (struct r600_texture*)tex;
-
-   tex-format = orig-format;
-   tex-width0 = orig-width0;
-   tex-height0 = orig-height0;
-   rtex-surface.level[0].npix_x = orig-npix0_x;
-   rtex-surface.level[0].npix_y = orig-npix0_y;
-   rtex-surface.level[level].npix_x = orig-npix_x;
-   rtex-surface.level[level].npix_y = orig-npix_y;
-   rtex-mipmap_shift = 0;
-}
-
 void si_resource_copy_region(struct pipe_context *ctx,
 

[Mesa-dev] [PATCH 1/6] radeonsi: remove redundant parameter in si_shader_binary_read

2015-07-16 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

---
 src/gallium/drivers/radeonsi/si_compute.c | 2 +-
 src/gallium/drivers/radeonsi/si_shader.c  | 8 +++-
 src/gallium/drivers/radeonsi/si_shader.h  | 3 +--
 3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 89bef2e..0361c99 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -144,7 +144,7 @@ static void *si_create_compute_state(
 * the shader code to the GPU.
 */
init_scratch_buffer(sctx, program);
-   si_shader_binary_read(sctx-screen, program-shader, 
program-shader.binary);
+   si_shader_binary_read(sctx-screen, program-shader);
 
 #endif
program-input_buffer = si_resource_create_custom(sctx-b.b.screen,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 75a29ae..b988f6d 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2686,11 +2686,9 @@ void si_shader_apply_scratch_relocs(struct si_context 
*sctx,
}
 }
 
-int si_shader_binary_read(struct si_screen *sscreen,
-   struct si_shader *shader,
-   const struct radeon_shader_binary *binary)
+int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader)
 {
-
+   const struct radeon_shader_binary *binary = shader-binary;
unsigned i;
unsigned code_size;
unsigned char *ptr;
@@ -2750,7 +2748,7 @@ int si_compile_llvm(struct si_screen *sscreen, struct 
si_shader *shader,
if (r) {
return r;
}
-   r = si_shader_binary_read(sscreen, shader, shader-binary);
+   r = si_shader_binary_read(sscreen, shader);
 
FREE(shader-binary.config);
FREE(shader-binary.rodata);
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 8d309b4..1e8b52b 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -191,8 +191,7 @@ int si_compile_llvm(struct si_screen *sscreen, struct 
si_shader *shader,
LLVMTargetMachineRef tm, LLVMModuleRef mod);
 void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader);
 unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
-int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader,
-   const struct radeon_shader_binary *binary);
+int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader);
 void si_shader_apply_scratch_relocs(struct si_context *sctx,
struct si_shader *shader,
uint64_t scratch_va);
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/6] radeonsi: upload shader rodata after updating scratch relocations

2015-07-16 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

Cc: 10.5 10.6 mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/radeonsi/si_shader.c| 49 ++---
 src/gallium/drivers/radeonsi/si_shader.h|  1 +
 src/gallium/drivers/radeonsi/si_state_shaders.c |  8 +---
 3 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index b988f6d..955e780 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2686,16 +2686,41 @@ void si_shader_apply_scratch_relocs(struct si_context 
*sctx,
}
 }
 
+int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader 
*shader)
+{
+   const struct radeon_shader_binary *binary = shader-binary;
+   unsigned code_size = binary-code_size + binary-rodata_size;
+   unsigned char *ptr;
+
+   r600_resource_reference(shader-bo, NULL);
+   shader-bo = si_resource_create_custom(sscreen-b.b,
+  PIPE_USAGE_IMMUTABLE,
+  code_size);
+   if (!shader-bo)
+   return -ENOMEM;
+
+   ptr = sscreen-b.ws-buffer_map(shader-bo-cs_buf, NULL,
+   PIPE_TRANSFER_READ_WRITE);
+   util_memcpy_cpu_to_le32(ptr, binary-code, binary-code_size);
+   if (binary-rodata_size  0) {
+   ptr += binary-code_size;
+   util_memcpy_cpu_to_le32(ptr, binary-rodata,
+   binary-rodata_size);
+   }
+
+   sscreen-b.ws-buffer_unmap(shader-bo-cs_buf);
+   return 0;
+}
+
 int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader)
 {
const struct radeon_shader_binary *binary = shader-binary;
unsigned i;
-   unsigned code_size;
-   unsigned char *ptr;
bool dump  = r600_can_dump_shader(sscreen-b,
shader-selector ? shader-selector-tokens : NULL);
 
si_shader_binary_read_config(sscreen, shader, 0);
+   si_shader_binary_upload(sscreen, shader);
 
if (dump) {
if (!binary-disassembled) {
@@ -2713,26 +2738,6 @@ int si_shader_binary_read(struct si_screen *sscreen, 
struct si_shader *shader)
shader-num_sgprs, shader-num_vgprs, binary-code_size,
shader-lds_size, shader-scratch_bytes_per_wave);
}
-
-   /* copy new shader */
-   code_size = binary-code_size + binary-rodata_size;
-   r600_resource_reference(shader-bo, NULL);
-   shader-bo = si_resource_create_custom(sscreen-b.b, 
PIPE_USAGE_IMMUTABLE,
-  code_size);
-   if (shader-bo == NULL) {
-   return -ENOMEM;
-   }
-
-
-   ptr = sscreen-b.ws-buffer_map(shader-bo-cs_buf, NULL, 
PIPE_TRANSFER_READ_WRITE);
-   util_memcpy_cpu_to_le32(ptr, binary-code, binary-code_size);
-   if (binary-rodata_size  0) {
-   ptr += binary-code_size;
-   util_memcpy_cpu_to_le32(ptr, binary-rodata, 
binary-rodata_size);
-   }
-
-   sscreen-b.ws-buffer_unmap(shader-bo-cs_buf);
-
return 0;
 }
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 1e8b52b..c12782f 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -191,6 +191,7 @@ int si_compile_llvm(struct si_screen *sscreen, struct 
si_shader *shader,
LLVMTargetMachineRef tm, LLVMModuleRef mod);
 void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader);
 unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
+int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader 
*shader);
 int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader);
 void si_shader_apply_scratch_relocs(struct si_context *sctx,
struct si_shader *shader,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index a842d9d..12bce9a 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -749,7 +749,6 @@ static unsigned si_update_scratch_buffer(struct si_context 
*sctx,
 {
struct si_shader *shader;
uint64_t scratch_va = sctx-scratch_buffer-gpu_address;
-   unsigned char *ptr;
 
if (!sel)
return 0;
@@ -770,12 +769,7 @@ static unsigned si_update_scratch_buffer(struct si_context 
*sctx,
si_shader_apply_scratch_relocs(sctx, shader, scratch_va);
 
/* Replace the shader bo with a new bo that has the relocs applied. */
-   r600_resource_reference(shader-bo, NULL);
-   shader-bo = si_resource_create_custom(sctx-screen-b.b, 
PIPE_USAGE_IMMUTABLE,
-  

[Mesa-dev] [PATCH 5/6] gallium/radeon: remove buffer_unmap calls that can potentially decrease perf

2015-07-16 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

buffer_unmap is currently a no-op on radeon and done correctly on amdgpu.
I plan to fix it for radeon, but before that, all occurences of buffer_unmap
that can negatively affect performance in the future must be removed.

There are 2 reasons for removing buffer_unmap calls:
- There is a likelihood that buffer_map will be called again, so we don't
  want to unmap yet.
- The buffer is being released, which automatically unmaps it.
---
 src/gallium/drivers/r300/r300_query.c | 2 --
 src/gallium/drivers/r300/r300_transfer.c  | 4 
 src/gallium/drivers/r600/r600_state_common.c  | 1 -
 src/gallium/drivers/radeon/r600_pipe_common.c | 4 +---
 src/gallium/drivers/radeon/r600_query.c   | 5 -
 src/gallium/drivers/radeon/r600_texture.c | 9 -
 src/gallium/drivers/radeonsi/si_compute.c | 2 --
 7 files changed, 1 insertion(+), 26 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_query.c 
b/src/gallium/drivers/r300/r300_query.c
index 01b83b8..6a4cd71 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -168,8 +168,6 @@ static boolean r300_get_query_result(struct pipe_context* 
pipe,
 map++;
 }
 
-r300-rws-buffer_unmap(q-cs_buf);
-
 if (q-type == PIPE_QUERY_OCCLUSION_PREDICATE) {
 vresult-b = temp != 0;
 } else {
diff --git a/src/gallium/drivers/r300/r300_transfer.c 
b/src/gallium/drivers/r300/r300_transfer.c
index b87164b..9505ae1 100644
--- a/src/gallium/drivers/r300/r300_transfer.c
+++ b/src/gallium/drivers/r300/r300_transfer.c
@@ -251,16 +251,12 @@ void r300_texture_transfer_unmap(struct pipe_context *ctx,
 struct r300_resource *tex = r300_resource(transfer-resource);
 
 if (trans-linear_texture) {
-rws-buffer_unmap(trans-linear_texture-cs_buf);
-
 if (transfer-usage  PIPE_TRANSFER_WRITE) {
 r300_copy_into_tiled_texture(ctx, trans);
 }
 
 pipe_resource_reference(
 (struct pipe_resource**)trans-linear_texture, NULL);
-} else {
-rws-buffer_unmap(tex-cs_buf);
 }
 FREE(transfer);
 }
diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index 13dc9ee..2097941 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1409,7 +1409,6 @@ static void r600_draw_vbo(struct pipe_context *ctx, const 
struct pipe_draw_info
data += info.indirect_offset / 
sizeof(unsigned);
start = data[2] * ib.index_size;
count = data[0];
-   
rctx-b.ws-buffer_unmap(indirect_resource-cs_buf);
}
else {
start = 0;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index bcbf0b9..94a7535 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -939,10 +939,8 @@ void r600_destroy_common_screen(struct r600_common_screen 
*rscreen)
pipe_mutex_destroy(rscreen-aux_context_lock);
rscreen-aux_context-destroy(rscreen-aux_context);
 
-   if (rscreen-trace_bo) {
-   rscreen-ws-buffer_unmap(rscreen-trace_bo-cs_buf);
+   if (rscreen-trace_bo)
pipe_resource_reference((struct 
pipe_resource**)rscreen-trace_bo, NULL);
-   }
 
rscreen-ws-destroy(rscreen-ws);
FREE(rscreen);
diff --git a/src/gallium/drivers/radeon/r600_query.c 
b/src/gallium/drivers/radeon/r600_query.c
index 71f4a15..a1d8241 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -118,7 +118,6 @@ static struct r600_resource *r600_new_query_buffer(struct 
r600_common_context *c
}
results += 4 * ctx-max_db;
}
-   ctx-ws-buffer_unmap(buf-cs_buf);
break;
case PIPE_QUERY_TIME_ELAPSED:
case PIPE_QUERY_TIMESTAMP:
@@ -130,7 +129,6 @@ static struct r600_resource *r600_new_query_buffer(struct 
r600_common_context *c
case PIPE_QUERY_PIPELINE_STATISTICS:
results = r600_buffer_map_sync_with_rings(ctx, buf, 
PIPE_TRANSFER_WRITE);
memset(results, 0, buf_size);
-   ctx-ws-buffer_unmap(buf-cs_buf);
break;
default:
assert(0);
@@ -751,7 +749,6 @@ static boolean r600_get_query_buffer_result(struct 
r600_common_context *ctx,
assert(0);
}
 
-   ctx-ws-buffer_unmap(qbuf-buf-cs_buf);
return TRUE;
 }
 
@@ -919,7 +916,6 @@ void r600_query_init_backend_mask(struct 
r600_common_context *ctx)
results = r600_buffer_map_sync_with_rings(ctx, buffer, 

[Mesa-dev] [PATCH 6/6] winsys/radeon: implement buffer_unmap

2015-07-16 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

This has been a no-op due to performance concerns. From now on, drivers
should decide when they don't want to unmap, not the winsys.
---
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 29 +--
 src/gallium/winsys/radeon/drm/radeon_drm_bo.h |  1 +
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 314d0ef..232e2f1 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -351,14 +351,11 @@ void *radeon_bo_do_map(struct radeon_bo *bo)
 if (bo-user_ptr)
 return bo-user_ptr;
 
-/* Return the pointer if it's already mapped. */
-if (bo-ptr)
-return bo-ptr;
-
 /* Map the buffer. */
 pipe_mutex_lock(bo-map_mutex);
-/* Return the pointer if it's already mapped (in case of a race). */
+/* Return the pointer if it's already mapped. */
 if (bo-ptr) {
+bo-map_count++;
 pipe_mutex_unlock(bo-map_mutex);
 return bo-ptr;
 }
@@ -383,6 +380,7 @@ void *radeon_bo_do_map(struct radeon_bo *bo)
 return NULL;
 }
 bo-ptr = ptr;
+bo-map_count = 1;
 pipe_mutex_unlock(bo-map_mutex);
 
 return bo-ptr;
@@ -467,7 +465,26 @@ static void *radeon_bo_map(struct radeon_winsys_cs_handle 
*buf,
 
 static void radeon_bo_unmap(struct radeon_winsys_cs_handle *_buf)
 {
-/* NOP */
+struct radeon_bo *bo = (struct radeon_bo*)_buf;
+
+if (bo-user_ptr)
+return;
+
+pipe_mutex_lock(bo-map_mutex);
+if (!bo-ptr) {
+pipe_mutex_unlock(bo-map_mutex);
+return; /* it's not been mapped */
+}
+
+assert(bo-map_count);
+if (--bo-map_count) {
+pipe_mutex_unlock(bo-map_mutex);
+return; /* it's been mapped multiple times */
+}
+
+os_munmap(bo-ptr, bo-base.size);
+bo-ptr = NULL;
+pipe_mutex_unlock(bo-map_mutex);
 }
 
 static void radeon_bo_get_base_buffer(struct pb_buffer *buf,
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h 
b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
index b83ce16..bad08d8 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h
@@ -54,6 +54,7 @@ struct radeon_bo {
 
 void *ptr;
 pipe_mutex map_mutex;
+unsigned map_count;
 
 uint32_t handle;
 uint32_t flink_name;
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/5] i965: Push miptree tiling request into flags

2015-07-16 Thread Chad Versace
On Tue 14 Jul 2015, Ben Widawsky wrote:
 With the last few patches a way was provided to influence lower layer miptree
 layout and allocation decisions via flags (replacing bools). For simplicity, I
 chose not to touch the tiling requests because the change was slightly less
 mechanical than replacing the bools.
 
 The goal is to organize the code so we can continue to add new parameters and
 tiling types while minimizing risk to the existing code, and not having to
 constantly add new function parameters.
 
 v2: Rebased on Anuj's recent Yf/Ys changes
 Fix non-msrt MCS allocation (was only happening in gen8 case before)
 
 Cc: Anuj Phogat anuj.pho...@gmail.com
 Cc: Chad Versace chad.vers...@intel.com
 Signed-off-by: Ben Widawsky b...@bwidawsk.net

I have one nitpick...

 -   /* 'requested' parameter of intel_miptree_create_layout() is relevant
 -* only for non bo miptree. Tiling for bo is already computed above.
 -* So, the tiling requested (INTEL_MIPTREE_TILING_ANY) below is
 -* just a place holder and will not make any change to the miptree
 -* tiling format.
 +   /* The BO already has a tiling format and we shouldn't confuse the lower
 +* layers by making it try to find a tiling format again.
  */
 +   assert((layout_flags 
 +  (MIPTREE_LAYOUT_ALLOC_ANY_TILED | MIPTREE_LAYOUT_ALLOC_LINEAR)) == 
 0);
 layout_flags |= MIPTREE_LAYOUT_FOR_BO;

I think the assert would be more readable if split as below. Also, the
split version eliminates uncertainty when interpreting the assertion
failure: it will tell exactly which offending flag is present.

assert(layout_flags  MIPTREE_LAYOUT_ALLOC_ANY_TILED == 0);
assert(layout_flags  MIPTREE_LAYOUT_ALLOC_LINEAR == 0);

With or without that change, patch 1 is
Reviewed-by: Chad Versace chad.vers...@intel.com
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/12] cso: remove clip state handling

2015-07-16 Thread Marek Olšák
On Thu, Jul 16, 2015 at 8:05 PM, Marek Olšák mar...@gmail.com wrote:
 From: Marek Olšák marek.ol...@amd.com

 There is no need for this.

 v2: handle redundant clip state changes in st/mesa
 ---
  src/gallium/auxiliary/cso_cache/cso_context.c | 44 
 ---
  src/gallium/auxiliary/cso_cache/cso_context.h | 13 
  src/mesa/state_tracker/st_atom_clip.c |  7 +++--
  3 files changed, 5 insertions(+), 59 deletions(-)

 diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c 
 b/src/gallium/auxiliary/cso_cache/cso_context.c
 index 1b4e2e6..3e7fe22 100644
 --- a/src/gallium/auxiliary/cso_cache/cso_context.c
 +++ b/src/gallium/auxiliary/cso_cache/cso_context.c
 @@ -111,9 +111,6 @@ struct cso_context {
 uint render_condition_mode, render_condition_mode_saved;
 boolean render_condition_cond, render_condition_cond_saved;

 -   struct pipe_clip_state clip;
 -   struct pipe_clip_state clip_saved;
 -
 struct pipe_framebuffer_state fb, fb_saved;
 struct pipe_viewport_state vp, vp_saved;
 struct pipe_blend_color blend_color;
 @@ -910,47 +907,6 @@ void cso_restore_tesseval_shader(struct cso_context *ctx)
 ctx-tesseval_shader_saved = NULL;
  }

 -/* clip state */
 -
 -static INLINE void
 -clip_state_cpy(struct pipe_clip_state *dst,
 -   const struct pipe_clip_state *src)
 -{
 -   memcpy(dst-ucp, src-ucp, sizeof(dst-ucp));
 -}
 -
 -static INLINE int
 -clip_state_cmp(const struct pipe_clip_state *a,
 -   const struct pipe_clip_state *b)
 -{
 -   return memcmp(a-ucp, b-ucp, sizeof(a-ucp));
 -}
 -
 -void
 -cso_set_clip(struct cso_context *ctx,
 - const struct pipe_clip_state *clip)
 -{
 -   if (clip_state_cmp(ctx-clip, clip)) {
 -  clip_state_cpy(ctx-clip, clip);
 -  ctx-pipe-set_clip_state(ctx-pipe, clip);
 -   }
 -}
 -
 -void
 -cso_save_clip(struct cso_context *ctx)
 -{
 -   clip_state_cpy(ctx-clip_saved, ctx-clip);
 -}
 -
 -void
 -cso_restore_clip(struct cso_context *ctx)
 -{
 -   if (clip_state_cmp(ctx-clip, ctx-clip_saved)) {
 -  clip_state_cpy(ctx-clip, ctx-clip_saved);
 -  ctx-pipe-set_clip_state(ctx-pipe, ctx-clip_saved);
 -   }
 -}
 -
  enum pipe_error
  cso_set_vertex_elements(struct cso_context *ctx,
  unsigned count,
 diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h 
 b/src/gallium/auxiliary/cso_cache/cso_context.h
 index c9a4226..3bee429 100644
 --- a/src/gallium/auxiliary/cso_cache/cso_context.h
 +++ b/src/gallium/auxiliary/cso_cache/cso_context.h
 @@ -188,19 +188,6 @@ void cso_save_render_condition(struct cso_context *cso);
  void cso_restore_render_condition(struct cso_context *cso);


 -/* clip state */
 -
 -void
 -cso_set_clip(struct cso_context *cso,
 - const struct pipe_clip_state *clip);
 -
 -void
 -cso_save_clip(struct cso_context *cso);
 -
 -void
 -cso_restore_clip(struct cso_context *cso);
 -
 -
  /* sampler view state */

  void
 diff --git a/src/mesa/state_tracker/st_atom_clip.c 
 b/src/mesa/state_tracker/st_atom_clip.c
 index f82c133..eb43fe5 100644
 --- a/src/mesa/state_tracker/st_atom_clip.c
 +++ b/src/mesa/state_tracker/st_atom_clip.c
 @@ -59,8 +59,11 @@ static void update_clip( struct st_context *st )
 memcpy(clip.ucp,
use_eye ? ctx-Transform.EyeUserPlane
: ctx-Transform._ClipUserPlane, sizeof(clip.ucp));
 -   st-state.clip = clip;
 -   cso_set_clip(st-cso_context, clip);
 +
 +   if (st-state.clip != clip) {

I've also changed this to memcmp.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/5] i965/miptree: Rewrite the miptree map logic

2015-07-16 Thread Chad Versace
On Tue 14 Jul 2015, Ben Widawsky wrote:
 This patch rewrites the logic for determining which method we using for 
 mapping
 a miptree. It is my intention that that this patch, the required patches 
 before
 this do not change functionality, or if they do, it's in very obscure an
 unobservable cases.
 
 I have two reasons why I decided to write this patch. The existing logic was 
 way
 too tricky. In particular, the way in which it evaluated which operation to 
 use
 was out of order - specifically when it checked to use the blitter in
 use_intel_mipree_map_blit(), part of the check is to determine if it will 
 later
 be unable to use the GTT. The other reason is to make playing with the various
 operations much easier. For example, there are some theories being thrown 
 around
 that we might actually want to use the blitter where we use the GTT today, and
 vice versa. After this patch, benchmarking those changes is much more
 straightforward.
 
 It's pretty difficult for me to prove there is no real change going on. I ran 
 a
 subset of my benchmarks on this though. The following benchmarks show no perf
 difference on BDW with ministat with n=5 and CI=.95:
 OglBatch7
 OglDeferred
 OglFillPixel
 OglGeomPoint
 OglGeomTriList
 OglHdrBloom
 OglPSBump2
 OglPSPhong
 OglPSPom
 OglShMapPcf
 OglTerrainFlyInst
 OglTexMem512
 OglVSDiffuse8
 OglVSInstancing
 OglZBuffer
 plot3d
 trex
 
 It's important to point out that much of the changes effect non-LLC platform,
 and I do not yet have data for that. I'll be collecting it over the next few
 days, but I figure this patch can get some comments meanwhile.
 
 Signed-off-by: Ben Widawsky b...@bwidawsk.net
 ---
  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 76 
 +--
  1 file changed, 37 insertions(+), 39 deletions(-)
 
 diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
 b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 index 2788270..545fbf3 100644
 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
 @@ -2283,6 +2283,8 @@ intel_miptree_unmap_movntdqa(struct brw_context *brw,
 map-buffer = NULL;
 map-ptr = NULL;
  }
 +#else
 +#define intel_miptree_map_movntdqa(x,y,z,w,a) abort()
  #endif
  
  static void
 @@ -2621,36 +2623,6 @@ can_blit_slice(struct brw_context *brw,
 return true;
  }
  
 -static bool
 -use_intel_mipree_map_blit(struct brw_context *brw,
 -  struct intel_mipmap_tree *mt,
 -  GLbitfield mode,
 -  unsigned int level,
 -  unsigned int slice)
 -{
 -   if (brw-has_llc 
 -   !(mode  GL_MAP_WRITE_BIT) 
 -   can_blit_slice(brw, mt, level, slice))
 -  return true;
 -
 -   if (mt-tiling != I915_TILING_NONE 
 -   mt-bo-size = brw-max_gtt_map_object_size) {
 -  /* XXX: This assertion is actually the final condition for platforms
 -   * without SSE4.1.  Returning false is not the right thing to do with
 -   * the current code. On those platforms, the goal of this function is 
 to give
 -   * preference to the GTT, and at this point we've determined we cannot 
 use
 -   * the GTT, and we cannot blit, so we are out of options.
 -   *
 -   * NOTE: It should be possible to actually handle the case, but AFAIK, 
 we
 -   * never get this assertion.
 -   */
 -  assert(can_blit_slice(brw, mt, level, slice));
 -  return true;
 -   }
 -
 -   return false;
 -}
 -
  /**
   * Parameter \a out_stride has type ptrdiff_t not because the buffer stride 
 may
   * exceed 32 bits but to diminish the likelihood subtle bugs in pointer
 @@ -2706,18 +2678,44 @@ intel_miptree_map(struct brw_context *brw,
goto done;
 }
  
 -   if (use_intel_mipree_map_blit(brw, mt, mode, level, slice)) {
 -  intel_miptree_map_blit(brw, mt, map, level, slice);
 +   /* First determine what the available option are, then pick from the best
 +* option based on the platform.
 +*/
 +   bool can_hw_blit = can_blit_slice(brw, mt, level, slice);

can_hw_blit is too weak of a condition.

can is very different from should. Before, this function chose to
call intel_miptree_map_blit() if use_intel_mipree_map_blit() recommended
it (because use really means should use in that function name). The
set of conditions that satisfies can are much larger.

For example, can_blit_slice() should return true for linear buffers
(they are blittable, after all). However, intel_miptree_map() should
mmap those buffers instead of blitting them.

 +   bool can_use_gtt = mt-bo-size  brw-max_gtt_map_object_size;
  #if defined(USE_SSE41)
 -   } else if (!(mode  GL_MAP_WRITE_BIT) 
 -  !mt-compressed  cpu_has_sse4_1 
 -  (mt-pitch % 16 == 0)) {
 -  intel_miptree_map_movntdqa(brw, mt, map, level, slice);
 +   bool can_stream_map = cpu_has_sse4_1  mt-pitch % 16 == 0;
 +#else
 +   bool can_stream_map = false;
  #endif
 -   } else {
 -  

[Mesa-dev] [PATCH] radeonsi: don't return NULL fence if no fence is available

2015-07-16 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

An alternative (and ugly) solution to the current clover issue.
---
 src/gallium/drivers/radeon/r600_pipe_common.c | 12 
 src/gallium/drivers/radeon/r600_pipe_common.h |  3 +++
 src/gallium/drivers/radeonsi/si_hw_context.c  | 10 --
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c 
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 94a7535..858404d 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -718,6 +718,15 @@ static void r600_fence_reference(struct pipe_screen 
*screen,
 {
struct radeon_winsys *rws = ((struct r600_common_screen*)screen)-ws;
 
+   if (*ptr == R600_SIGNALLED_FENCE)
+   *ptr = NULL;
+
+   if (fence == R600_SIGNALLED_FENCE) {
+   rws-fence_reference(ptr, NULL);
+   *ptr = R600_SIGNALLED_FENCE;
+   return;
+   }
+
rws-fence_reference(ptr, fence);
 }
 
@@ -727,6 +736,9 @@ static boolean r600_fence_finish(struct pipe_screen *screen,
 {
struct radeon_winsys *rws = ((struct r600_common_screen*)screen)-ws;
 
+   if (fence == R600_SIGNALLED_FENCE)
+   return true;
+
return rws-fence_wait(rws, fence, timeout);
 }
 
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index aeb1e7a..ab8cabb 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -43,6 +43,9 @@
 #include util/u_suballoc.h
 #include util/u_transfer.h
 
+/* A dummy fence handle used as a special fence that's always signalled. */
+#define R600_SIGNALLED_FENCE ((struct pipe_fence_handle*)1)
+
 #define R600_RESOURCE_FLAG_TRANSFER(PIPE_RESOURCE_FLAG_DRV_PRIV  
0)
 #define R600_RESOURCE_FLAG_FLUSHED_DEPTH   (PIPE_RESOURCE_FLAG_DRV_PRIV  
1)
 #define R600_RESOURCE_FLAG_FORCE_TILING
(PIPE_RESOURCE_FLAG_DRV_PRIV  2)
diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c 
b/src/gallium/drivers/radeonsi/si_hw_context.c
index c75def5..29576f3 100644
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -98,8 +98,14 @@ void si_context_gfx_flush(void *context, unsigned flags,
struct radeon_winsys *ws = ctx-b.ws;
 
if (cs-cdw == ctx-b.initial_gfx_cs_size) {
-   if (fence)
-   ws-fence_reference(fence, ctx-last_gfx_fence);
+   if (fence) {
+   if (ctx-last_gfx_fence)
+   ws-fence_reference(fence, ctx-last_gfx_fence);
+   else {
+   ws-fence_reference(fence, NULL);
+   *fence = R600_SIGNALLED_FENCE;
+   }
+   }
if (!(flags  RADEON_FLUSH_ASYNC))
ws-cs_sync_flush(cs);
return;
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: fix up some texture error checks

2015-07-16 Thread sroland
From: Roland Scheidegger srol...@vmware.com

In particular, we were incorrectly accepting s3tc (and lots of others)
for CompressedTexSubImage3D (but not CompressedTexImage3D) calls with 3d
targets. At this time, the only allowed formats for these calls are the
bptc ones, since none of the specific extensions allow it (astc hdr would).
Also, fix up a bug in _mesa_target_can_be_compressed - 3d target needs to
be allowed for bptc formats.
---
 src/mesa/main/teximage.c   | 41 -
 src/mesa/main/texstorage.c |  1 +
 2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 3d85615..4389082 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -1793,8 +1793,6 @@ GLboolean
 _mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target,
GLenum intFormat)
 {
-   (void) intFormat;  /* not used yet */
-
switch (target) {
case GL_TEXTURE_2D:
case GL_PROXY_TEXTURE_2D:
@@ -1814,6 +1812,16 @@ _mesa_target_can_be_compressed(const struct gl_context 
*ctx, GLenum target,
case GL_PROXY_TEXTURE_CUBE_MAP_ARRAY:
case GL_TEXTURE_CUBE_MAP_ARRAY:
   return ctx-Extensions.ARB_texture_cube_map_array;
+   case GL_TEXTURE_3D:
+  switch (intFormat) {
+  case GL_COMPRESSED_RGBA_BPTC_UNORM:
+  case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
+  case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
+  case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
+ return TRUE;
+  default:
+ return FALSE;
+  }
default:
   return GL_FALSE;
}
@@ -4575,32 +4583,23 @@ compressed_subtexture_target_check(struct gl_context 
*ctx, GLenum target,
*one of the EAC, ETC2, or RGTC formats and either border is
*non-zero, or the effective target for the texture is not
*TEXTURE_2D_ARRAY.
+   * Instead of listing all these, just list those which are allowed,
+   * which is (at this time) only bptc. Otherwise we'd say s3tc (and more)
+   * are valid here, which they are not, but of course not mentioned by
+   * core spec.
*/
   if (target != GL_TEXTURE_2D_ARRAY) {
  bool invalidformat;
  switch (format) {
 /* These came from _mesa_is_compressed_format in glformats.c. */
-/* EAC formats */
-case GL_COMPRESSED_RGBA8_ETC2_EAC:
-case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
-case GL_COMPRESSED_R11_EAC:
-case GL_COMPRESSED_RG11_EAC:
-case GL_COMPRESSED_SIGNED_R11_EAC:
-case GL_COMPRESSED_SIGNED_RG11_EAC:
-/* ETC2 formats */
-case GL_COMPRESSED_RGB8_ETC2:
-case GL_COMPRESSED_SRGB8_ETC2:
-case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
-case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
-/* RGTC formats */
-case GL_COMPRESSED_RED_RGTC1:
-case GL_COMPRESSED_SIGNED_RED_RGTC1:
-case GL_COMPRESSED_RG_RGTC2:
-case GL_COMPRESSED_SIGNED_RG_RGTC2:
-   invalidformat = true;
+case GL_COMPRESSED_RGBA_BPTC_UNORM:
+case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
+case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
+case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
+   invalidformat = false;
break;
 default:
-   invalidformat = false;
+   invalidformat = true;
  }
  if (invalidformat) {
 _mesa_error(ctx, GL_INVALID_OPERATION,
diff --git a/src/mesa/main/texstorage.c b/src/mesa/main/texstorage.c
index 53cb2c0..aa8fa3e 100644
--- a/src/mesa/main/texstorage.c
+++ b/src/mesa/main/texstorage.c
@@ -309,6 +309,7 @@ tex_storage_error_check(struct gl_context *ctx,
   GL_INVALID_ENUM : GL_INVALID_OPERATION,
   glTex%sStorage%dD(internalformat = %s), suffix, dims,
   _mesa_lookup_enum_by_nr(internalformat));
+  return GL_TRUE;
}
 
/* levels check */
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: fix up some texture error checks

2015-07-16 Thread Brian Paul

On 07/16/2015 03:15 PM, srol...@vmware.com wrote:

From: Roland Scheidegger srol...@vmware.com

In particular, we were incorrectly accepting s3tc (and lots of others)
for CompressedTexSubImage3D (but not CompressedTexImage3D) calls with 3d
targets. At this time, the only allowed formats for these calls are the
bptc ones, since none of the specific extensions allow it (astc hdr would).
Also, fix up a bug in _mesa_target_can_be_compressed - 3d target needs to
be allowed for bptc formats.
---
  src/mesa/main/teximage.c   | 41 -
  src/mesa/main/texstorage.c |  1 +
  2 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/src/mesa/main/teximage.c b/src/mesa/main/teximage.c
index 3d85615..4389082 100644
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -1793,8 +1793,6 @@ GLboolean
  _mesa_target_can_be_compressed(const struct gl_context *ctx, GLenum target,
 GLenum intFormat)
  {
-   (void) intFormat;  /* not used yet */
-
 switch (target) {
 case GL_TEXTURE_2D:
 case GL_PROXY_TEXTURE_2D:
@@ -1814,6 +1812,16 @@ _mesa_target_can_be_compressed(const struct gl_context 
*ctx, GLenum target,
 case GL_PROXY_TEXTURE_CUBE_MAP_ARRAY:
 case GL_TEXTURE_CUBE_MAP_ARRAY:
return ctx-Extensions.ARB_texture_cube_map_array;
+   case GL_TEXTURE_3D:
+  switch (intFormat) {
+  case GL_COMPRESSED_RGBA_BPTC_UNORM:
+  case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
+  case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
+  case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
+ return TRUE;
+  default:
+ return FALSE;
+  }
 default:
return GL_FALSE;
 }
@@ -4575,32 +4583,23 @@ compressed_subtexture_target_check(struct gl_context 
*ctx, GLenum target,
 *one of the EAC, ETC2, or RGTC formats and either border is
 *non-zero, or the effective target for the texture is not
 *TEXTURE_2D_ARRAY.
+   * Instead of listing all these, just list those which are allowed,
+   * which is (at this time) only bptc. Otherwise we'd say s3tc (and more)
+   * are valid here, which they are not, but of course not mentioned by
+   * core spec.
 */
if (target != GL_TEXTURE_2D_ARRAY) {
   bool invalidformat;
   switch (format) {
  /* These came from _mesa_is_compressed_format in glformats.c. */
-/* EAC formats */
-case GL_COMPRESSED_RGBA8_ETC2_EAC:
-case GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC:
-case GL_COMPRESSED_R11_EAC:
-case GL_COMPRESSED_RG11_EAC:
-case GL_COMPRESSED_SIGNED_R11_EAC:
-case GL_COMPRESSED_SIGNED_RG11_EAC:
-/* ETC2 formats */
-case GL_COMPRESSED_RGB8_ETC2:
-case GL_COMPRESSED_SRGB8_ETC2:
-case GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2:
-case GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2:
-/* RGTC formats */
-case GL_COMPRESSED_RED_RGTC1:
-case GL_COMPRESSED_SIGNED_RED_RGTC1:
-case GL_COMPRESSED_RG_RGTC2:
-case GL_COMPRESSED_SIGNED_RG_RGTC2:
-   invalidformat = true;
+case GL_COMPRESSED_RGBA_BPTC_UNORM:
+case GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM:
+case GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT:
+case GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT:
+   invalidformat = false;
 break;
  default:
-   invalidformat = false;
+   invalidformat = true;
   }
   if (invalidformat) {
  _mesa_error(ctx, GL_INVALID_OPERATION,
diff --git a/src/mesa/main/texstorage.c b/src/mesa/main/texstorage.c
index 53cb2c0..aa8fa3e 100644
--- a/src/mesa/main/texstorage.c
+++ b/src/mesa/main/texstorage.c
@@ -309,6 +309,7 @@ tex_storage_error_check(struct gl_context *ctx,
GL_INVALID_ENUM : GL_INVALID_OPERATION,
glTex%sStorage%dD(internalformat = %s), suffix, dims,
_mesa_lookup_enum_by_nr(internalformat));
+  return GL_TRUE;
 }

 /* levels check */



Reviewed-by: Brian Paul bri...@vmware.com

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965/cs: Use dispatch width of 8 for cs terminate

2015-07-16 Thread Jordan Justen
This prevents an assertion failure in brw_fs_live_variables.cpp,
fs_live_variables::setup_one_write: Assertion `var  num_vars' failed.

Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
Cc: Jason Ekstrand ja...@jlekstrand.net
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 94d6a58..62dfb9a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1960,11 +1960,12 @@ fs_visitor::emit_cs_terminate()
 */
struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD);
fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
-   bld.exec_all().MOV(payload, g0);
+   const fs_builder bld8 = bld.group(8, 0);
+   bld8.exec_all().MOV(payload, g0);
 
/* Send a message to the thread spawner to terminate the thread. */
-   fs_inst *inst = bld.exec_all()
-  .emit(CS_OPCODE_CS_TERMINATE, reg_undef, payload);
+   fs_inst *inst = bld8.exec_all()
+   .emit(CS_OPCODE_CS_TERMINATE, reg_undef, payload);
inst-eot = true;
 }
 
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/46] drirc: drop support for Heaven 3.0, fixes tessellation in 4.0

2015-07-16 Thread Marek Olšák
Thanks for the reviewing.

The patches that are missing Rbs are:

23-24, 26-43, 45-46.

The patches have been on the mailing list for one month. It looks like
I won't get any more reviews. :(

Marek



On Fri, Jun 19, 2015 at 12:46 PM, Kenneth Graunke kenn...@whitecape.org wrote:
 I made some comments, but assuming those are taken care of,
 patches 1-22 are:

 Reviewed-by: Kenneth Graunke kenn...@whitecape.org

 I plan on reviewing the rest, but probably not tonight.
 Thanks for picking this up!
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/12] cso: remove clip state handling

2015-07-16 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

There is no need for this.

v2: handle redundant clip state changes in st/mesa
---
 src/gallium/auxiliary/cso_cache/cso_context.c | 44 ---
 src/gallium/auxiliary/cso_cache/cso_context.h | 13 
 src/mesa/state_tracker/st_atom_clip.c |  7 +++--
 3 files changed, 5 insertions(+), 59 deletions(-)

diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c 
b/src/gallium/auxiliary/cso_cache/cso_context.c
index 1b4e2e6..3e7fe22 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -111,9 +111,6 @@ struct cso_context {
uint render_condition_mode, render_condition_mode_saved;
boolean render_condition_cond, render_condition_cond_saved;
 
-   struct pipe_clip_state clip;
-   struct pipe_clip_state clip_saved;
-
struct pipe_framebuffer_state fb, fb_saved;
struct pipe_viewport_state vp, vp_saved;
struct pipe_blend_color blend_color;
@@ -910,47 +907,6 @@ void cso_restore_tesseval_shader(struct cso_context *ctx)
ctx-tesseval_shader_saved = NULL;
 }
 
-/* clip state */
-
-static INLINE void
-clip_state_cpy(struct pipe_clip_state *dst,
-   const struct pipe_clip_state *src)
-{
-   memcpy(dst-ucp, src-ucp, sizeof(dst-ucp));
-}
-
-static INLINE int
-clip_state_cmp(const struct pipe_clip_state *a,
-   const struct pipe_clip_state *b)
-{
-   return memcmp(a-ucp, b-ucp, sizeof(a-ucp));
-}
-
-void
-cso_set_clip(struct cso_context *ctx,
- const struct pipe_clip_state *clip)
-{
-   if (clip_state_cmp(ctx-clip, clip)) {
-  clip_state_cpy(ctx-clip, clip);
-  ctx-pipe-set_clip_state(ctx-pipe, clip);
-   }
-}
-
-void
-cso_save_clip(struct cso_context *ctx)
-{
-   clip_state_cpy(ctx-clip_saved, ctx-clip);
-}
-
-void
-cso_restore_clip(struct cso_context *ctx)
-{
-   if (clip_state_cmp(ctx-clip, ctx-clip_saved)) {
-  clip_state_cpy(ctx-clip, ctx-clip_saved);
-  ctx-pipe-set_clip_state(ctx-pipe, ctx-clip_saved);
-   }
-}
-
 enum pipe_error
 cso_set_vertex_elements(struct cso_context *ctx,
 unsigned count,
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h 
b/src/gallium/auxiliary/cso_cache/cso_context.h
index c9a4226..3bee429 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.h
+++ b/src/gallium/auxiliary/cso_cache/cso_context.h
@@ -188,19 +188,6 @@ void cso_save_render_condition(struct cso_context *cso);
 void cso_restore_render_condition(struct cso_context *cso);
 
 
-/* clip state */
-
-void
-cso_set_clip(struct cso_context *cso,
- const struct pipe_clip_state *clip);
-
-void
-cso_save_clip(struct cso_context *cso);
-
-void
-cso_restore_clip(struct cso_context *cso);
-
-
 /* sampler view state */
 
 void
diff --git a/src/mesa/state_tracker/st_atom_clip.c 
b/src/mesa/state_tracker/st_atom_clip.c
index f82c133..eb43fe5 100644
--- a/src/mesa/state_tracker/st_atom_clip.c
+++ b/src/mesa/state_tracker/st_atom_clip.c
@@ -59,8 +59,11 @@ static void update_clip( struct st_context *st )
memcpy(clip.ucp,
   use_eye ? ctx-Transform.EyeUserPlane
   : ctx-Transform._ClipUserPlane, sizeof(clip.ucp));
-   st-state.clip = clip;
-   cso_set_clip(st-cso_context, clip);
+
+   if (st-state.clip != clip) {
+  st-state.clip = clip;
+  st-pipe-set_clip_state(st-pipe, clip);
+   }
 }
 
 
-- 
2.1.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 25/46] glsl: lower gl_TessLevel* from float[n] to vecn.

2015-07-16 Thread Marek Olšák
On Sat, Jun 20, 2015 at 12:08 AM, Kenneth Graunke kenn...@whitecape.org wrote:
 On Wednesday, June 17, 2015 01:01:21 AM Marek Olšák wrote:
 From: Fabian Bieler fabianbie...@fastmail.fm

 Similar to gl_ClipDistance - gl_ClipDistanceMESA
 ---
  src/glsl/Makefile.sources   |   1 +
  src/glsl/ir_optimization.h  |   1 +
  src/glsl/link_varyings.cpp  |  51 +++-
  src/glsl/link_varyings.h|  13 +-
  src/glsl/linker.cpp |   4 +
  src/glsl/lower_tess_level.cpp   | 462 
 
  src/mesa/drivers/dri/i965/brw_context.c |   1 +
  src/mesa/main/mtypes.h  |   1 +
  8 files changed, 521 insertions(+), 13 deletions(-)
  create mode 100644 src/glsl/lower_tess_level.cpp

 diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources
 index d784a81..b3b84d6 100644
 --- a/src/glsl/Makefile.sources
 +++ b/src/glsl/Makefile.sources
 @@ -154,6 +154,7 @@ LIBGLSL_FILES = \
   lower_packed_varyings.cpp \
   lower_named_interface_blocks.cpp \
   lower_packing_builtins.cpp \
 + lower_tess_level.cpp \
   lower_texture_projection.cpp \
   lower_variable_index_to_cond_assign.cpp \
   lower_vec_index_to_cond_assign.cpp \
 diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
 index e6939f3..688a5e1 100644
 --- a/src/glsl/ir_optimization.h
 +++ b/src/glsl/ir_optimization.h
 @@ -132,6 +132,7 @@ bool optimize_split_arrays(exec_list *instructions, bool 
 linked);
  bool lower_offset_arrays(exec_list *instructions);
  void optimize_dead_builtin_variables(exec_list *instructions,
   enum ir_variable_mode other);
 +bool lower_tess_level(gl_shader *shader);

  bool lower_vertex_id(gl_shader *shader);

 diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp
 index 373d337..d439b62 100644
 --- a/src/glsl/link_varyings.cpp
 +++ b/src/glsl/link_varyings.cpp
 @@ -318,7 +318,7 @@ tfeedback_decl::init(struct gl_context *ctx, const void 
 *mem_ctx,

 this-location = -1;
 this-orig_name = input;
 -   this-is_clip_distance_mesa = false;
 +   this-is_mesa_var = none;

 This seems like a bad name...is_foo suggests a boolean (rather than an
 enum), and mesa_var is pretty generic.  Perhaps calling it
 lowered_builtin_array_variable would be better?

 this-skip_components = 0;
 this-next_buffer_separator = false;
 this-matched_candidate = NULL;
 @@ -367,8 +367,15 @@ tfeedback_decl::init(struct gl_context *ctx, const void 
 *mem_ctx,
  */
 if 
 (ctx-Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerClipDistance 
 strcmp(this-var_name, gl_ClipDistance) == 0) {
 -  this-is_clip_distance_mesa = true;
 +  this-is_mesa_var = clip_distance;
 }
 +
 +   if (ctx-Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerTessLevel 
 
 +   (strcmp(this-var_name, gl_TessLevelOuter) == 0))
 +  this-is_mesa_var = tess_level_outer;
 +   if (ctx-Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerTessLevel 
 
 +   (strcmp(this-var_name, gl_TessLevelInner) == 0))
 +  this-is_mesa_var = tess_level_inner;

 Using ShaderCompilerOptions[MESA_SHADER_VERTEX] for LowerTessLevel seems
 odd - perhaps LowerClipDistance and LowerTessLevel really shouldn't be
 per-stage options, and just live in ctx-Const?  *shrug*

  }


 @@ -415,9 +422,22 @@ tfeedback_decl::assign_location(struct gl_context *ctx,
   this-matched_candidate-type-fields.array-matrix_columns;
const unsigned vector_elements =
   this-matched_candidate-type-fields.array-vector_elements;
 -  unsigned actual_array_size = this-is_clip_distance_mesa ?
 - prog-LastClipDistanceArraySize :
 - this-matched_candidate-type-array_size();
 +  unsigned actual_array_size;
 +  switch (this-is_mesa_var) {
 +  case clip_distance:
 + actual_array_size = prog-LastClipDistanceArraySize;
 + break;
 +  case tess_level_outer:
 + actual_array_size = 4;
 + break;
 +  case tess_level_inner:
 + actual_array_size = 2;
 + break;
 +  case none:
 +  default:
 + actual_array_size = this-matched_candidate-type-array_size();
 + break;
 +  }

if (this-is_subscripted) {
   /* Check array bounds. */
 @@ -428,7 +448,7 @@ tfeedback_decl::assign_location(struct gl_context *ctx,
   actual_array_size);
  return false;
   }
 - unsigned array_elem_size = this-is_clip_distance_mesa ?
 + unsigned array_elem_size = this-is_mesa_var ?
  1 : vector_elements * matrix_cols;
   fine_location += array_elem_size * this-array_subscript;
   this-size = 1;
 @@ -437,7 +457,7 @@ tfeedback_decl::assign_location(struct gl_context *ctx,
}
this-vector_elements = vector_elements;
this-matrix_columns = matrix_cols;
 -  if 

Re: [Mesa-dev] [PATCH] i965/cs: Use dispatch width of 8 for cs terminate

2015-07-16 Thread Jason Ekstrand
On Jul 16, 2015 2:00 PM, Jordan Justen jordan.l.jus...@intel.com wrote:

 This prevents an assertion failure in brw_fs_live_variables.cpp,
 fs_live_variables::setup_one_write: Assertion `var  num_vars' failed.

Best guess is that you should just fix regs_read to return the right value
(1 in this case).  Most other send instructions use mlen but that may not
be needed tour CS_TERMINATE.
--Jason

 Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
 Cc: Jason Ekstrand ja...@jlekstrand.net
 ---
  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 7 ---
  1 file changed, 4 insertions(+), 3 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 index 94d6a58..62dfb9a 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 @@ -1960,11 +1960,12 @@ fs_visitor::emit_cs_terminate()
  */
 struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD);
 fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
 -   bld.exec_all().MOV(payload, g0);
 +   const fs_builder bld8 = bld.group(8, 0);
 +   bld8.exec_all().MOV(payload, g0);

 /* Send a message to the thread spawner to terminate the thread. */
 -   fs_inst *inst = bld.exec_all()
 -  .emit(CS_OPCODE_CS_TERMINATE, reg_undef, payload);
 +   fs_inst *inst = bld8.exec_all()
 +   .emit(CS_OPCODE_CS_TERMINATE, reg_undef, payload);
 inst-eot = true;
  }

 --
 2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nvc0: add a missing parameter to nvc0_set_shader_images()

2015-07-16 Thread Ilia Mirkin
Reviewed-by: Ilia Mirkin imir...@alum.mit.edu

On Thu, Jul 16, 2015 at 5:05 PM, Samuel Pitoiset
samuel.pitoi...@gmail.com wrote:
 This fixes a compilation warning introduced in commit 05a12c5
 (gallium: add interface for writable shader images).

 While we are at it, fix indentation and rename parameters according to
 the gallium interface.

 Signed-off-by: Samuel Pitoiset samuel.pitoi...@gmail.com
 ---
  src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

 diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c 
 b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
 index 337559c..d18b064 100644
 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
 +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
 @@ -1125,9 +1125,9 @@ nvc0_set_compute_resources(struct pipe_context *pipe,
  }

  static void
 -nvc0_set_shader_images(struct pipe_context *pipe,
 -  unsigned start, unsigned nr,
 -  struct pipe_image_view **views)
 +nvc0_set_shader_images(struct pipe_context *pipe, unsigned shader,
 +   unsigned start_slot, unsigned count,
 +   struct pipe_image_view **views)
  {
  #if 0
 nvc0_bind_surfaces_range(nvc0_context(pipe), 0, start, nr, views);
 --
 2.4.5

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] clover: Fix bug with computing hard_event status

2015-07-16 Thread Tom Stellard
On Sat, Jul 11, 2015 at 02:35:53PM +0300, Francisco Jerez wrote:
 Tom Stellard thomas.stell...@amd.com writes:
 
  pipe_context::flush() can return a NULL fence if the queue is already
  empty, so we should not assume that an event with a NULL fence
  has the status of CL_QUEUED.
 
 
 This seems suspicious...  On the one hand it doesn't seem to be a
 documented feature of pipe_context::flush to return NULL except in
 error conditions (I'm pretty sure other drivers like nouveau won't), and
 it seems like it could easily break assumptions of other state trackers.
 
 IMO pipe_context::flush() should respect the invariant that whatever is
 returned in the fence output argument (unless some error occurred) be a
 valid argument for pipe_screen::fence_finish() and ::fence_signalled()
 -- I don't think NULL is?
 
 On the other hand this leaves me wondering how could the queue already
 be empty when clover calls pipe_context::flush() -- I assume by queue
 you mean the pipe driver's?  The fact that clover calls
 pipe_context::flush() implies that clover's event queue is not empty
 (i.e. there have been commands enqueued to the pipe driver since the
 last call to pipe_context::flush()).  It sounds like this mismatch
 between clover's and the pipe driver's command queue might be caused by
 some race condition elsewhere?
 
 Thanks.
 

The bug appears in programs which call clFinish() without ever
adding anything to the command queue.  In this case, radeonsi
sees that no commands have been submitted to the GPU, so it doesn't
submit the fence and sets the fence parameter to NULL.

-Tom


  CC: 10.6 mesa-sta...@lists.freedesktop.org
  ---
   src/gallium/state_trackers/clover/core/event.cpp | 7 ---
   src/gallium/state_trackers/clover/core/event.hpp | 1 +
   2 files changed, 5 insertions(+), 3 deletions(-)
 
  diff --git a/src/gallium/state_trackers/clover/core/event.cpp 
  b/src/gallium/state_trackers/clover/core/event.cpp
  index d75b839..b973c78 100644
  --- a/src/gallium/state_trackers/clover/core/event.cpp
  +++ b/src/gallium/state_trackers/clover/core/event.cpp
  @@ -118,7 +118,7 @@ event::wait() const {
   hard_event::hard_event(command_queue q, cl_command_type command,
  const ref_vectorevent deps, action action) :
  event(q.context(), deps, profile(q, action), [](event ev){}),
  -   _queue(q), _command(command), _fence(NULL) {
  +   _queue(q), _command(command), _fence(NULL), _fenced(false) {
  if (q.profiling_enabled())
 _time_queued = timestamp::current(q);
   
  @@ -138,7 +138,7 @@ hard_event::status() const {
  if (event::status()  0)
 return event::status();
   
  -   else if (!_fence)
  +   else if (!_fenced)
 return CL_QUEUED;
   
  else if (!screen-fence_finish(screen, _fence, 0))
  @@ -167,7 +167,7 @@ hard_event::wait() const {
  if (status() == CL_QUEUED)
 queue()-flush();
   
  -   if (!_fence ||
  +   if (!_fenced ||
  !screen-fence_finish(screen, _fence, PIPE_TIMEOUT_INFINITE))
 throw error(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST);
   }
  @@ -196,6 +196,7 @@ void
   hard_event::fence(pipe_fence_handle *fence) {
  pipe_screen *screen = queue()-device().pipe;
  screen-fence_reference(screen, _fence, fence);
  +   _fenced = true;
   }
   
   event::action
  diff --git a/src/gallium/state_trackers/clover/core/event.hpp 
  b/src/gallium/state_trackers/clover/core/event.hpp
  index 6469e48..fac62d2 100644
  --- a/src/gallium/state_trackers/clover/core/event.hpp
  +++ b/src/gallium/state_trackers/clover/core/event.hpp
  @@ -137,6 +137,7 @@ namespace clover {
 const intrusive_refcommand_queue _queue;
 cl_command_type _command;
 pipe_fence_handle *_fence;
  +  bool _fenced;
 lazycl_ulong _time_queued, _time_submit, _time_start, _time_end;
  };
   
  -- 
  2.0.4




 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/5] i965/miptree: Cleanup some of the miptree map logic

2015-07-16 Thread Chad Versace
On Wed 15 Jul 2015, Anuj Phogat wrote:
 On Tue, Jul 14, 2015 at 9:56 AM, Ben Widawsky
 benjamin.widaw...@intel.com wrote:
  At the crux of this change is moving whether or not we can even use the 
  hardware
  blitter into the can_blit_slice check. Fundamentally this makes sense as
  blitting a slice is a subset in functionality of being able to use the 
  blitter
  at all.
 
  NOTE: I think it's bad practice to have the assert in a function that is
  determining whether or not we should use the blitter, but I tried the
  alternatives, and they look worse IMO.
 
  Signed-off-by: Ben Widawsky b...@bwidawsk.net
  ---
   src/mesa/drivers/dri/i965/intel_blit.c| 13 +
   src/mesa/drivers/dri/i965/intel_blit.h|  3 +++
   src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 27 
  +--
   3 files changed, 33 insertions(+), 10 deletions(-)
 
  diff --git a/src/mesa/drivers/dri/i965/intel_blit.c 
  b/src/mesa/drivers/dri/i965/intel_blit.c
  index bc39053..c4701e3 100644
  --- a/src/mesa/drivers/dri/i965/intel_blit.c
  +++ b/src/mesa/drivers/dri/i965/intel_blit.c
  @@ -241,6 +241,19 @@ intel_miptree_blit_compatible_formats(mesa_format src, 
  mesa_format dst)
  return false;
   }
 
  +bool
  +intel_miptree_can_hw_blit(struct brw_context *brw, struct 
  intel_mipmap_tree *mt)
  +{
  +   if (mt-compressed)
  +  return false;
  +
  +   /* Prior to Sandybridge, the blitter can't handle Y tiling */
  +   if (brw-gen  6  mt-tiling == I915_TILING_Y)
  +  return false;
  +
  +   return true;
  +}
  +
   /**
* Implements a rectangular block transfer (blit) of pixels between two
* miptrees.
  diff --git a/src/mesa/drivers/dri/i965/intel_blit.h 
  b/src/mesa/drivers/dri/i965/intel_blit.h
  index c3d19a5..e60dd9b 100644
  --- a/src/mesa/drivers/dri/i965/intel_blit.h
  +++ b/src/mesa/drivers/dri/i965/intel_blit.h
  @@ -50,6 +50,9 @@ intelEmitCopyBlit(struct brw_context *brw,
 
   bool intel_miptree_blit_compatible_formats(mesa_format src, mesa_format 
  dst);
 
  +bool intel_miptree_can_hw_blit(struct brw_context *brw,
  +   struct intel_mipmap_tree *mt);
  +
   bool intel_miptree_blit(struct brw_context *brw,
   struct intel_mipmap_tree *src_mt,
   int src_level, int src_slice,
  diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
  b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
  index 72fba49..1330c2f 100644
  --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
  +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
  @@ -2600,9 +2600,14 @@ intel_miptree_release_map(struct intel_mipmap_tree 
  *mt,
   }
 
   static bool
  -can_blit_slice(struct intel_mipmap_tree *mt,
  +can_blit_slice(struct brw_context *brw,
  +   struct intel_mipmap_tree *mt,
  unsigned int level, unsigned int slice)
   {
  +
  +   if (!intel_miptree_can_hw_blit(brw, mt))
  +  return false;
  +
  uint32_t image_x;
  uint32_t image_y;
  intel_miptree_get_image_offset(mt, level, slice, image_x, image_y);
  @@ -2624,20 +2629,22 @@ use_intel_mipree_map_blit(struct brw_context *brw,
 unsigned int slice)
   {
  if (brw-has_llc 
  -  /* It's probably not worth swapping to the blit ring because of
  -   * all the overhead involved.
  -   */
  !(mode  GL_MAP_WRITE_BIT) 
  -   !mt-compressed 
  -   (mt-tiling == I915_TILING_X ||
  -/* Prior to Sandybridge, the blitter can't handle Y tiling */
  -(brw-gen = 6  mt-tiling == I915_TILING_Y)) 
  -   can_blit_slice(mt, level, slice))
  +   can_blit_slice(brw, mt, level, slice))
 return true;
 
  if (mt-tiling != I915_TILING_NONE 
  mt-bo-size = brw-max_gtt_map_object_size) {
  -  assert(can_blit_slice(mt, level, slice));
  +  /* XXX: This assertion is actually the final condition for platforms
  +   * without SSE4.1.  Returning false is not the right thing to do with
  +   * the current code. On those platforms, the goal of this function 
  is to give
  +   * preference to the GTT, and at this point we've determined we 
  cannot use
  +   * the GTT, and we cannot blit, so we are out of options.
  +   *
  +   * NOTE: It should be possible to actually handle the case, but 
  AFAIK, we
  +   * never get this assertion.
  +   */
  +  assert(can_blit_slice(brw, mt, level, slice));
 return true;
  }
 
  --
  2.4.5
 
  ___
  mesa-dev mailing list
  mesa-dev@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 
 This patch now allows using hw blitter with I915_TILING_NONE
 which is not allowed for unexplained reason at present. So, we
 have a bug fix in this patch. May be you should split this in to
 two? Changes in the patch look fine to me.

The XY_SETUP_BLT instruction *does* support linear blits (at least for

[Mesa-dev] [PATCH] nvc0: add a missing parameter to nvc0_set_shader_images()

2015-07-16 Thread Samuel Pitoiset
This fixes a compilation warning introduced in commit 05a12c5
(gallium: add interface for writable shader images).

While we are at it, fix indentation and rename parameters according to
the gallium interface.

Signed-off-by: Samuel Pitoiset samuel.pitoi...@gmail.com
---
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
index 337559c..d18b064 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -1125,9 +1125,9 @@ nvc0_set_compute_resources(struct pipe_context *pipe,
 }
 
 static void
-nvc0_set_shader_images(struct pipe_context *pipe,
-  unsigned start, unsigned nr,
-  struct pipe_image_view **views)
+nvc0_set_shader_images(struct pipe_context *pipe, unsigned shader,
+   unsigned start_slot, unsigned count,
+   struct pipe_image_view **views)
 {
 #if 0
nvc0_bind_surfaces_range(nvc0_context(pipe), 0, start, nr, views);
-- 
2.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 28/46] glsl: don't lower variable indexing on non-patch tessellation inputs/outputs

2015-07-16 Thread Marek Olšák
On Tue, Jun 23, 2015 at 2:04 AM, Kenneth Graunke kenn...@whitecape.org wrote:
 On Wednesday, June 17, 2015 01:01:24 AM Marek Olšák wrote:
 From: Marek Olšák marek.ol...@amd.com

 There is no way to lower them, because the array sizes are unknown
 at compile time.

 Based on a patch from: Fabian Bieler fabianbie...@fastmail.fm

 I'm a bit confused by the justification given for this patch.

 TCS/TES per-vertex inputs:
 --

 ...are always fixed-size arrays of length gl_MaxPatchVertices, because:

 The length of gl_in is equal to the implementation-dependent maximum
  patch size (gl_MaxPatchVertices).

 Similarly to the built-in inputs, each user-defined input variable has
  a value for each vertex and thus needs to be declared as arrays or
  inside input blocks declared as arrays.  Declaring an array size is
  optional.  If no size is specified, it will be taken from the
  implementation-dependent maximum patch size (gl_MaxPatchVertices).
  If a size is specified, it must match the maximum patch size;
  otherwise, a link-error will occur.

 This same text exists for both TCS inputs and TES inputs.  Since we
 always know the array size, I don't see why we can't do lowering in
 this case.

 I'm pretty new to tessellation shaders, so am I missing something?

 TCS per-patch inputs:
 -

 ...don't exist AFAICT.

 TES per-patch inputs:
 -

 ...do exist, require no special handling.

 TCS per-vertex outputs:
 ---

 ...are arrays whose size is known at link time, but not necessarily
 compile time.

 The length of gl_out is equal to the output patch size specified in the
  tessellation control shader output layout declaration.

 A tessellation control shader may also declare user-defined per-vertex
  output variables. User-defined per-vertex output variables are declared
  with the qualifier out and have a value for each vertex in the output
  patch. Such variables must be declared as arrays or inside output blocks
  declared as arrays. Declaring an array size is optional. If no size is
  specified, it will be taken from the output patch size declared in the
  shader.

 Apparently, the index must also be gl_InvocationID when writing:

 While per-vertex output variables are declared as arrays indexed by
  vertex number, each tessellation control shader invocation may write only
  to those outputs corresponding to its output patch vertex. Tessellation
  control shaders must use the input variable gl_InvocationID as the
  vertex number index when writing to per-vertex output variables.

 So we clearly don't want to do lowering on writes.  But for reads, it
 seems like we could do lowering when the array size is known (such as
 post-linking).  I'm not sure whether or not it's beneficial...

 It might be nice to add a comment explaining why it makes no sense to
 lower variable indexing on TCS output writes (with the above spec
 citation).

gl_MaxPatchVertices (typically 32) is the implementation-dependent
maximum limit. The real size is unknown at compile time. It's usually
3 or 4 in most apps though.

For TCS inputs, the size is specified by glPatchParameteri(GL_PATCH_VERTICES).
For TES inputs, the size is specified by the vertices output layout
qualifier in TCS.

The gl_PatchVerticesIn built-in uniform contains the real size.

I'll add a comment that explains it.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 28/46] glsl: don't lower variable indexing on non-patch tessellation inputs/outputs

2015-07-16 Thread Marek Olšák
From: Marek Olšák marek.ol...@amd.com

There is no way to lower them, because the array sizes are unknown
at compile time.

Based on a patch from: Fabian Bieler fabianbie...@fastmail.fm

v2: add comments

---
 src/glsl/ir_optimization.h   |  5 +-
 src/glsl/lower_variable_index_to_cond_assign.cpp | 58 ++--
 src/glsl/test_optpass.cpp|  3 +-
 src/mesa/drivers/dri/i965/brw_shader.cpp |  8 ++--
 src/mesa/program/ir_to_mesa.cpp  |  2 +-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp   |  2 +-
 6 files changed, 57 insertions(+), 21 deletions(-)

diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
index 688a5e1..a174c96 100644
--- a/src/glsl/ir_optimization.h
+++ b/src/glsl/ir_optimization.h
@@ -114,8 +114,9 @@ bool lower_discard(exec_list *instructions);
 void lower_discard_flow(exec_list *instructions);
 bool lower_instructions(exec_list *instructions, unsigned what_to_lower);
 bool lower_noise(exec_list *instructions);
-bool lower_variable_index_to_cond_assign(exec_list *instructions,
-bool lower_input, bool lower_output, bool lower_temp, bool lower_uniform);
+bool lower_variable_index_to_cond_assign(gl_shader_stage stage,
+exec_list *instructions, bool lower_input, bool lower_output,
+bool lower_temp, bool lower_uniform);
 bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz);
 bool lower_const_arrays_to_uniforms(exec_list *instructions);
 bool lower_clip_distance(gl_shader *shader);
diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp 
b/src/glsl/lower_variable_index_to_cond_assign.cpp
index 4a6a76c..fb7f670 100644
--- a/src/glsl/lower_variable_index_to_cond_assign.cpp
+++ b/src/glsl/lower_variable_index_to_cond_assign.cpp
@@ -335,12 +335,14 @@ struct switch_generator
 
 class variable_index_to_cond_assign_visitor : public ir_rvalue_visitor {
 public:
-   variable_index_to_cond_assign_visitor(bool lower_input,
-bool lower_output,
-bool lower_temp,
-bool lower_uniform)
+   variable_index_to_cond_assign_visitor(gl_shader_stage stage,
+ bool lower_input,
+ bool lower_output,
+ bool lower_temp,
+ bool lower_uniform)
{
   this-progress = false;
+  this-stage = stage;
   this-lower_inputs = lower_input;
   this-lower_outputs = lower_output;
   this-lower_temps = lower_temp;
@@ -348,6 +350,8 @@ public:
}
 
bool progress;
+
+   gl_shader_stage stage;
bool lower_inputs;
bool lower_outputs;
bool lower_temps;
@@ -369,18 +373,44 @@ public:
   case ir_var_auto:
   case ir_var_temporary:
 return this-lower_temps;
+
   case ir_var_uniform:
   case ir_var_shader_storage:
 return this-lower_uniforms;
+
   case ir_var_function_in:
   case ir_var_const_in:
  return this-lower_temps;
+
   case ir_var_shader_in:
+ /* The input array size is unknown at compiler time for non-patch
+  * inputs in TCS and TES. The arrays are sized to
+  * the implementation-dependent limit gl_MaxPatchVertices, but
+  * the real size is stored in the gl_PatchVerticeIn built-in
+  * uniform.
+  *
+  * The TCS input array size is specified by
+  * glPatchParameteri(GL_PATCH_VERTICES).
+  *
+  * The TES input array size is specified by the vertices output
+  * layout qualifier in TCS.
+  */
+ if ((stage == MESA_SHADER_TESS_CTRL ||
+  stage == MESA_SHADER_TESS_EVAL)  !var-data.patch)
+return false;
  return this-lower_inputs;
+
   case ir_var_function_out:
+ /* TCS non-patch outputs can only be indexed with gl_InvocationID.
+  * Other expressions are not allowed.
+  */
+ if (stage == MESA_SHADER_TESS_CTRL  !var-data.patch)
+return false;
  return this-lower_temps;
+
   case ir_var_shader_out:
  return this-lower_outputs;
+
   case ir_var_function_inout:
 return this-lower_temps;
   }
@@ -523,16 +553,18 @@ public:
 } /* anonymous namespace */
 
 bool
-lower_variable_index_to_cond_assign(exec_list *instructions,
-   bool lower_input,
-   bool lower_output,
-   bool lower_temp,
-   bool lower_uniform)
+lower_variable_index_to_cond_assign(gl_shader_stage stage,
+exec_list *instructions,
+bool lower_input,
+bool lower_output,
+bool lower_temp,
+   

Re: [Mesa-dev] [PATCH 6/6] i965: Disable resource streamer in BLORP

2015-07-16 Thread Kenneth Graunke
On Friday, July 03, 2015 10:00:33 AM Abdiel Janulgue wrote:
 Switch off hardware-generated binding tables and gather push
 constants in the blorp. Blorp requires only a minimal set of
 simple constants. There is no need for the extra complexity
 to program a gather table entry into the pipeline.
 
 Cc: kenn...@whitecape.org
 Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com
 ---
  src/mesa/drivers/dri/i965/gen7_blorp.cpp | 2 ++
  1 file changed, 2 insertions(+)
 
 diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp 
 b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
 index abace6d..9822dc1 100644
 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
 +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
 @@ -794,6 +794,8 @@ gen7_blorp_exec(struct brw_context *brw,
 }
 depthstencil_offset = gen6_blorp_emit_depth_stencil_state(brw, params);
 gen7_blorp_emit_depth_stencil_state_pointers(brw, depthstencil_offset);
 +   if (brw-use_resource_streamer)
 +  gen7_disable_hw_binding_tables(brw);
 if (params-use_wm_prog) {
uint32_t wm_surf_offset_renderbuffer;
uint32_t wm_surf_offset_texture = 0;
 

Reviewed-by: Kenneth Graunke kenn...@whitecape.org


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v3 5/6] i965: Upload binding tables in hw-generated binding table format.

2015-07-16 Thread Kenneth Graunke
On Tuesday, July 07, 2015 11:53:29 AM Abdiel Janulgue wrote:
 When hardware-generated binding tables are enabled, use the hw-generated
 binding table format when uploading binding table state.
 
 Normally, the CS will will just consume the binding table pointer commands
 as pipelined state. When the RS is enabled however, the RS flushes whatever
 edited surface state entries of our on-chip binding table to the binding
 table pool before passing the command on to the CS.
 
 Note that the the binding table pointer offset is relative to the binding 
 table
 pool base address when resource streamer instead of the surface state base 
 address.
 
 v2: Fix possible buffer overflow when allocating a chunk out of the
 hw-binding table pool (Ken).
 v3: Remove extra newline and add missing brace around if-statement (Matt).
 
 Cc: kenn...@whitecape.org
 Cc: matts...@gmail.com
 Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com
 ---
  src/mesa/drivers/dri/i965/brw_binding_tables.c | 72 
 --
  1 file changed, 56 insertions(+), 16 deletions(-)
 
 diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c 
 b/src/mesa/drivers/dri/i965/brw_binding_tables.c
 index b3d592b..cc56dbf 100644
 --- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
 +++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
 @@ -50,6 +50,26 @@ static const GLuint stage_to_bt_edit[MESA_SHADER_FRAGMENT 
 + 1] = {
 _3DSTATE_BINDING_TABLE_EDIT_PS,
  };
  
 +static uint32_t
 +reserve_hw_bt_space(struct brw_context *brw, unsigned bytes)
 +{
 +   if (brw-hw_bt_pool.next_offset + bytes = brw-hw_bt_pool.bo-size - 
 128) {

Why -128?  I don't see why we should have to subtract anything...

 +  gen7_reset_hw_bt_pool_offsets(brw);
 +   }
 +
 +   uint32_t offset = brw-hw_bt_pool.next_offset;
 +
 +   /* From the Haswell PRM, Volume 2b: Command Reference: Instructions,
 +* 3DSTATE_BINDING_TABLE_POINTERS_xS:
 +*
 +* If HW Binding Table is enabled, the offset is relative to the
 +*  Binding Table Pool Base Address and the alignment is 64 bytes.
 +*/
 +   brw-hw_bt_pool.next_offset += ALIGN(bytes, 64);
 +
 +   return offset;
 +}
 +
  /**
   * Upload a shader stage's binding table as indirect state.
   *
 @@ -70,30 +90,50 @@ brw_upload_binding_table(struct brw_context *brw,
  
stage_state-bind_bo_offset = 0;
 } else {
 -  /* Upload a new binding table. */
 -  if (INTEL_DEBUG  DEBUG_SHADER_TIME) {
 - brw-vtbl.emit_buffer_surface_state(
 -brw, stage_state-surf_offset[
 -prog_data-binding_table.shader_time_start],
 -brw-shader_time.bo, 0, BRW_SURFACEFORMAT_RAW,
 -brw-shader_time.bo-size, 1, true);
 +  /* When RS is enabled use hw-binding table uploads, otherwise fallback 
 to
 +   * software-uploads.
 +   */
 +  if (brw-use_resource_streamer) {
 + gen7_update_binding_table_from_array(brw, stage_state-stage,
 +  stage_state-surf_offset,
 +  prog_data-binding_table
 +  .size_bytes / 4);
 +  } else {
 + /* Upload a new binding table. */
 + if (INTEL_DEBUG  DEBUG_SHADER_TIME) {
 +brw-vtbl.emit_buffer_surface_state(
 +   brw, stage_state-surf_offset[
 +  prog_data-binding_table.shader_time_start],
 +   brw-shader_time.bo, 0, BRW_SURFACEFORMAT_RAW,
 +   brw-shader_time.bo-size, 1, true);
 + }

Doesn't this mean INTEL_DEBUG=shader_time is broken when hardware
binding tables are enabled?  Please fix.

 +
 + uint32_t *bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
 +  
 prog_data-binding_table.size_bytes,
 +  32,
 +  stage_state-bind_bo_offset);
 +
 + /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
 + memcpy(bind, stage_state-surf_offset,
 +prog_data-binding_table.size_bytes);
}
 -
 -  uint32_t *bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
 -   prog_data-binding_table.size_bytes, 
 32,
 -   stage_state-bind_bo_offset);
 -
 -  /* BRW_NEW_SURFACES and BRW_NEW_*_CONSTBUF */
 -  memcpy(bind, stage_state-surf_offset,
 - prog_data-binding_table.size_bytes);
 }
  
 brw-ctx.NewDriverState |= brw_new_binding_table;
  
 if (brw-gen = 7) {
 +  if (brw-use_resource_streamer) {
 + stage_state-bind_bo_offset =
 +reserve_hw_bt_space(brw, prog_data-binding_table.size_bytes);
 +  }
BEGIN_BATCH(2);
OUT_BATCH(packet_name  16 | (2 - 2));
 -  OUT_BATCH(stage_state-bind_bo_offset);
 +  /* Align SurfaceStateOffset[16:6] format to [15:5] PS Binding Table 
 field
 +   * when 

Re: [Mesa-dev] [PATCH v5 3/6] i965: Enable hardware-generated binding tables on render path.

2015-07-16 Thread Kenneth Graunke
On Tuesday, July 07, 2015 11:50:21 AM Abdiel Janulgue wrote:
 This patch implements the binding table enable command which is also
 used to allocate a binding table pool where where hardware-generated
 binding table entries are flushed into. Each binding table offset in
 the binding table pool is unique per each shader stage that are
 enabled within a batch.
 
 Also insert the required brw_tracked_state objects to enable
 hw-generated binding tables in normal render path.
 
 v2: - Use MOCS in binding table pool alloc for GEN8
 - Fix spurious offset when allocating binding table pool entry
   and start from zero instead.
 v3: - Include GEN8 fix for spurious offset above.
 v4: - Fixup wrong packet length in enable/disable hw-binding table
   for GEN8 (Ville).
 - Don't invoke HW-binding table disable command when we dont
   have resource streamer (Chris).
 v5: - Reorder the state cache invalidate flush so it happens in-between
   enabling hw-generated binding tables and the previous sw-binding
   table GPU state (Chris).
 
 Cc: kenn...@whitecape.org
 Cc: syrj...@sci.fi
 Cc: ch...@chris-wilson.co.uk
 Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com
 ---
  src/mesa/drivers/dri/i965/brw_binding_tables.c | 96 
 ++
  src/mesa/drivers/dri/i965/brw_context.c|  4 ++
  src/mesa/drivers/dri/i965/brw_context.h|  6 ++
  src/mesa/drivers/dri/i965/brw_state.h  |  6 ++
  src/mesa/drivers/dri/i965/brw_state_upload.c   |  4 ++
  src/mesa/drivers/dri/i965/gen7_disable.c   |  4 +-
  src/mesa/drivers/dri/i965/gen8_disable.c   |  4 +-
  src/mesa/drivers/dri/i965/intel_batchbuffer.c  |  4 ++
  8 files changed, 124 insertions(+), 4 deletions(-)
 
 diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c 
 b/src/mesa/drivers/dri/i965/brw_binding_tables.c
 index 98ff0dd..2f32976 100644
 --- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
 +++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
 @@ -170,6 +170,102 @@ const struct brw_tracked_state brw_gs_binding_table = {
 .emit = brw_gs_upload_binding_table,
  };
  
 +/**
 + * Hardware-generated binding tables for the resource streamer
 + */

Comment still isn't sensible.  Perhaps

/**
 * Disable hardware binding table support, falling back to the
 * older software-generated binding table mechanism.
 */

 +void
 +gen7_disable_hw_binding_tables(struct brw_context *brw)
 +{
 +   if (!brw-use_resource_streamer)
 +  return;
 +
 +   int pkt_len = brw-gen = 8 ? 4 : 3;
 +
 +   BEGIN_BATCH(pkt_len);
 +   OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC  16 | (pkt_len - 2));
 +   if (brw-gen = 8) {
 +  OUT_BATCH(0);
 +  OUT_BATCH(0);
 +  OUT_BATCH(0);
 +   } else {
 +  OUT_BATCH(HSW_BT_POOL_ALLOC_MUST_BE_ONE);
 +  OUT_BATCH(0);
 +   }
 +   ADVANCE_BATCH();
 +
 +   /* From the Haswell PRM, Volume 7: 3D Media GPGPU,
 +* 3DSTATE_BINDING_TABLE_POOL_ALLOC  Programming Note:
 +*
 +* When switching between HW and SW binding table generation, SW must
 +* issue a state cache invalidate.
 +*/
 +   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);

In the enabling case, Chris mentioned the the flush should go first - does this
need to be changed, too?  Or is this right, and we should disable in the
opposite order?  Chris, thoughts?

 +}
 +

/**
 * Enable hardware binding tables and set up the binding table pool.
 */

 +void
 +gen7_enable_hw_binding_tables(struct brw_context *brw)
 +{
 +   if (!brw-use_resource_streamer)
 +  return;
 +
 +   if (!brw-hw_bt_pool.bo) {
 +  /* We use a single re-usable buffer object for the lifetime of the
 +   * context and size it to maximum allowed binding tables that can be
 +   * programmed per batch:
 +   *
 +   * From the Haswell PRM, Volume 7: 3D Media GPGPU,
 +   * 3DSTATE_BINDING_TABLE_POOL_ALLOC  Programming Note:
 +   * A maximum of 16,383 Binding tables are allowed in any batch buffer
 +   */
 +  static const int max_size = 16383 * 4;
 +  brw-hw_bt_pool.bo = drm_intel_bo_alloc(brw-bufmgr, hw_bt,
 +  max_size, 64);
 +  brw-hw_bt_pool.next_offset = 0;
 +   }
 +
 +   /* From the Haswell PRM, Volume 7: 3D Media GPGPU,
 +* 3DSTATE_BINDING_TABLE_POOL_ALLOC  Programming Note:
 +*
 +* When switching between HW and SW binding table generation, SW must
 +* issue a state cache invalidate.
 +*/
 +   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
 +
 +   int pkt_len = brw-gen = 8 ? 4 : 3;
 +   uint32_t dw1 = BRW_HW_BINDING_TABLE_ENABLE;
 +   if (brw-is_haswell)
 +  dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_POOL_MOCS) |
 + HSW_BT_POOL_ALLOC_MUST_BE_ONE;
 +   else if (brw-gen = 8)
 +  dw1 |= BDW_MOCS_WB;

Multi-line statements should have braces, and the indentation is a bit
off:

   if (brw-is_haswell) {
  dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_POOL_MOCS) |
  

Re: [Mesa-dev] [PATCH 2/2] i965/cs: Use dispatch width of 8 for cs terminate payload setup

2015-07-16 Thread Jason Ekstrand
Both are

Reviewed-by: Jason Ekstrand jason.ekstr...@intel.com
On Jul 16, 2015 5:12 PM, Jordan Justen jordan.l.jus...@intel.com wrote:

 This prevents an assertion failure in brw_fs_live_variables.cpp,
 fs_live_variables::setup_one_write: Assertion `var  num_vars' failed.

 Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
 Cc: Jason Ekstrand ja...@jlekstrand.net
 ---
  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 index 94d6a58..d6a60a7 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 @@ -1960,7 +1960,7 @@ fs_visitor::emit_cs_terminate()
  */
 struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD);
 fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
 -   bld.exec_all().MOV(payload, g0);
 +   bld.group(8, 0).exec_all().MOV(payload, g0);

 /* Send a message to the thread spawner to terminate the thread. */
 fs_inst *inst = bld.exec_all()
 --
 2.1.4


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/cs: Use dispatch width of 8 for cs terminate

2015-07-16 Thread Jason Ekstrand
On Jul 16, 2015 4:35 PM, Jordan Justen jordan.l.jus...@intel.com wrote:

 On 2015-07-16 13:59:45, Jason Ekstrand wrote:
 On Jul 16, 2015 2:00 PM, Jordan Justen jordan.l.jus...@intel.com
 wrote:
 
  This prevents an assertion failure in brw_fs_live_variables.cpp,
  fs_live_variables::setup_one_write: Assertion `var  num_vars'
failed.
 
 Best guess is that you should just fix regs_read to return the right
value
 (1 in this case).  Most other send instructions use mlen but that
may not
 be needed tour CS_TERMINATE.

 I think regs_read will fix a similar assert in setup_one_read. The MOV
 will still hit the setup_one_write assert. Should I be generating the
 MOV differently?

 For example

bld.group(8, 0).exec_all().MOV(payload, g0);

Yes, that's how we need to do the MOV.

Out of curiosity, why are you emitting a MOV at all and not just calling
CS_TERMINATE with g0 directly?
--Jason

 works in combo with an update to regs_read.

 -Jordan

  Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
  Cc: Jason Ekstrand ja...@jlekstrand.net
  ---
   src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 7 ---
   1 file changed, 4 insertions(+), 3 deletions(-)
 
  diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
  index 94d6a58..62dfb9a 100644
  --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
  +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
  @@ -1960,11 +1960,12 @@ fs_visitor::emit_cs_terminate()
   */
  struct brw_reg g0 = retype(brw_vec8_grf(0, 0),
 BRW_REGISTER_TYPE_UD);
  fs_reg payload = fs_reg(GRF, alloc.allocate(1),
 BRW_REGISTER_TYPE_UD);
  -   bld.exec_all().MOV(payload, g0);
  +   const fs_builder bld8 = bld.group(8, 0);
  +   bld8.exec_all().MOV(payload, g0);
 
  /* Send a message to the thread spawner to terminate the
thread. */
  -   fs_inst *inst = bld.exec_all()
  -  .emit(CS_OPCODE_CS_TERMINATE, reg_undef,
 payload);
  +   fs_inst *inst = bld8.exec_all()
  +   .emit(CS_OPCODE_CS_TERMINATE, reg_undef,
 payload);
  inst-eot = true;
   }
 
  --
  2.1.4
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/cs: Use dispatch width of 8 for cs terminate

2015-07-16 Thread Jordan Justen
On 2015-07-16 14:37:59, Jason Ekstrand wrote:
On Jul 16, 2015 4:35 PM, Jordan Justen jordan.l.jus...@intel.com
wrote:

 On 2015-07-16 13:59:45, Jason Ekstrand wrote:
 On Jul 16, 2015 2:00 PM, Jordan Justen
jordan.l.jus...@intel.com
 wrote:
 
  This prevents an assertion failure in brw_fs_live_variables.cpp,
  fs_live_variables::setup_one_write: Assertion `var  num_vars'
failed.
 
 Best guess is that you should just fix regs_read to return the
right value
 (1 in this case).  Most other send instructions use mlen but that
may not
 be needed tour CS_TERMINATE.

 I think regs_read will fix a similar assert in setup_one_read. The MOV
 will still hit the setup_one_write assert. Should I be generating the
 MOV differently?

 For example

bld.group(8, 0).exec_all().MOV(payload, g0);
 
Yes, that's how we need to do the MOV.
 
Out of curiosity, why are you emitting a MOV at all and not just calling
CS_TERMINATE with g0 directly?

While sending from g0 appears to work, apparently we are supposed to
do the final send from a high register. Ken found some wording that
appeared to indicate that we needed to do this even on compute:

http://article.gmane.org/gmane.comp.video.mesa3d.devel/98098

-Jordan

 works in combo with an update to regs_read.

 -Jordan

  Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
  Cc: Jason Ekstrand ja...@jlekstrand.net
  ---
   src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 7 ---
   1 file changed, 4 insertions(+), 3 deletions(-)
 
  diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
  index 94d6a58..62dfb9a 100644
  --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
  +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
  @@ -1960,11 +1960,12 @@ fs_visitor::emit_cs_terminate()
   */
  struct brw_reg g0 = retype(brw_vec8_grf(0, 0),
 BRW_REGISTER_TYPE_UD);
  fs_reg payload = fs_reg(GRF, alloc.allocate(1),
 BRW_REGISTER_TYPE_UD);
  -   bld.exec_all().MOV(payload, g0);
  +   const fs_builder bld8 = bld.group(8, 0);
  +   bld8.exec_all().MOV(payload, g0);
 
  /* Send a message to the thread spawner to terminate the
thread. */
  -   fs_inst *inst = bld.exec_all()
  -  .emit(CS_OPCODE_CS_TERMINATE, reg_undef,
 payload);
  +   fs_inst *inst = bld8.exec_all()
  +   .emit(CS_OPCODE_CS_TERMINATE, reg_undef,
 payload);
  inst-eot = true;
   }
 
  --
  2.1.4
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965/cs: Use dispatch width of 8 for cs terminate payload setup

2015-07-16 Thread Jordan Justen
This prevents an assertion failure in brw_fs_live_variables.cpp,
fs_live_variables::setup_one_write: Assertion `var  num_vars' failed.

Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
Cc: Jason Ekstrand ja...@jlekstrand.net
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 94d6a58..d6a60a7 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1960,7 +1960,7 @@ fs_visitor::emit_cs_terminate()
 */
struct brw_reg g0 = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD);
fs_reg payload = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD);
-   bld.exec_all().MOV(payload, g0);
+   bld.group(8, 0).exec_all().MOV(payload, g0);
 
/* Send a message to the thread spawner to terminate the thread. */
fs_inst *inst = bld.exec_all()
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 91290] SIGSEGV glcpp/glcpp-parse.y:1077

2015-07-16 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=91290

Vinson Lee v...@freedesktop.org changed:

   What|Removed |Added

   Keywords||bisected

-- 
You are receiving this mail because:
You are the QA Contact for the bug.
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/5] i965: Push miptree tiling request into flags

2015-07-16 Thread Matt Turner
On Thu, Jul 16, 2015 at 11:49 AM, Chad Versace chad.vers...@intel.com wrote:
 On Tue 14 Jul 2015, Ben Widawsky wrote:
 With the last few patches a way was provided to influence lower layer miptree
 layout and allocation decisions via flags (replacing bools). For simplicity, 
 I
 chose not to touch the tiling requests because the change was slightly less
 mechanical than replacing the bools.

 The goal is to organize the code so we can continue to add new parameters and
 tiling types while minimizing risk to the existing code, and not having to
 constantly add new function parameters.

 v2: Rebased on Anuj's recent Yf/Ys changes
 Fix non-msrt MCS allocation (was only happening in gen8 case before)

 Cc: Anuj Phogat anuj.pho...@gmail.com
 Cc: Chad Versace chad.vers...@intel.com
 Signed-off-by: Ben Widawsky b...@bwidawsk.net

 I have one nitpick...

 -   /* 'requested' parameter of intel_miptree_create_layout() is relevant
 -* only for non bo miptree. Tiling for bo is already computed above.
 -* So, the tiling requested (INTEL_MIPTREE_TILING_ANY) below is
 -* just a place holder and will not make any change to the miptree
 -* tiling format.
 +   /* The BO already has a tiling format and we shouldn't confuse the lower
 +* layers by making it try to find a tiling format again.
  */
 +   assert((layout_flags 
 +  (MIPTREE_LAYOUT_ALLOC_ANY_TILED | MIPTREE_LAYOUT_ALLOC_LINEAR)) 
 == 0);
 layout_flags |= MIPTREE_LAYOUT_FOR_BO;

 I think the assert would be more readable if split as below. Also, the
 split version eliminates uncertainty when interpreting the assertion
 failure: it will tell exactly which offending flag is present.

 assert(layout_flags  MIPTREE_LAYOUT_ALLOC_ANY_TILED == 0);
 assert(layout_flags  MIPTREE_LAYOUT_ALLOC_LINEAR == 0);

Note that you need parentheses around the  expression. Otherwise you
actually get a  (b == c).
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/cs: Use dispatch width of 8 for cs terminate

2015-07-16 Thread Jason Ekstrand
On Jul 16, 2015 5:03 PM, Jordan Justen jordan.l.jus...@intel.com wrote:

 On 2015-07-16 14:37:59, Jason Ekstrand wrote:
 On Jul 16, 2015 4:35 PM, Jordan Justen jordan.l.jus...@intel.com
 wrote:
 
  On 2015-07-16 13:59:45, Jason Ekstrand wrote:
  On Jul 16, 2015 2:00 PM, Jordan Justen
 jordan.l.jus...@intel.com
  wrote:
  
   This prevents an assertion failure in
brw_fs_live_variables.cpp,
   fs_live_variables::setup_one_write: Assertion `var 
num_vars'
 failed.
  
  Best guess is that you should just fix regs_read to return the
 right value
  (1 in this case).  Most other send instructions use mlen but
that
 may not
  be needed tour CS_TERMINATE.
 
  I think regs_read will fix a similar assert in setup_one_read. The
MOV
  will still hit the setup_one_write assert. Should I be generating
the
  MOV differently?
 
  For example
 
 bld.group(8, 0).exec_all().MOV(payload, g0);
 
 Yes, that's how we need to do the MOV.
 
 Out of curiosity, why are you emitting a MOV at all and not just
calling
 CS_TERMINATE with g0 directly?

 While sending from g0 appears to work, apparently we are supposed to
 do the final send from a high register. Ken found some wording that
 appeared to indicate that we needed to do this even on compute:

Right... That makes sense. Might be worth a comment somewhere of you don't
have one already.
--Jason

 http://article.gmane.org/gmane.comp.video.mesa3d.devel/98098

 -Jordan

  works in combo with an update to regs_read.
 
  -Jordan
 
   Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
   Cc: Jason Ekstrand ja...@jlekstrand.net
   ---
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 7 ---
1 file changed, 4 insertions(+), 3 deletions(-)
  
   diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
  b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
   index 94d6a58..62dfb9a 100644
   --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
   +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
   @@ -1960,11 +1960,12 @@ fs_visitor::emit_cs_terminate()
*/
   struct brw_reg g0 = retype(brw_vec8_grf(0, 0),
  BRW_REGISTER_TYPE_UD);
   fs_reg payload = fs_reg(GRF, alloc.allocate(1),
  BRW_REGISTER_TYPE_UD);
   -   bld.exec_all().MOV(payload, g0);
   +   const fs_builder bld8 = bld.group(8, 0);
   +   bld8.exec_all().MOV(payload, g0);
  
   /* Send a message to the thread spawner to terminate the
 thread. */
   -   fs_inst *inst = bld.exec_all()
   -  .emit(CS_OPCODE_CS_TERMINATE,
reg_undef,
  payload);
   +   fs_inst *inst = bld8.exec_all()
   +   .emit(CS_OPCODE_CS_TERMINATE,
reg_undef,
  payload);
   inst-eot = true;
}
  
   --
   2.1.4
  
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] nir: add nir_foreach_instr_safe_reverse()

2015-07-16 Thread Kenneth Graunke
From: Connor Abbott connor.w.abb...@intel.com

Reviewed-by: Kenneth Graunke kenn...@whitecape.org
---
 src/glsl/nir/nir.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 0db1fc3..62cdbd4 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1233,6 +1233,8 @@ nir_block_last_instr(nir_block *block)
foreach_list_typed_reverse(nir_instr, instr, node, (block)-instr_list)
 #define nir_foreach_instr_safe(block, instr) \
foreach_list_typed_safe(nir_instr, instr, node, (block)-instr_list)
+#define nir_foreach_instr_safe_reverse(block, instr) \
+   foreach_list_typed_safe_reverse(nir_instr, instr, node, 
(block)-instr_list)
 
 typedef struct nir_if {
nir_cf_node cf_node;
-- 
2.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] nir: add the ability insert a CF node after an instruction

2015-07-16 Thread Kenneth Graunke
From: Connor Abbott connor.w.abb...@intel.com

This will split the block containing the instruction and put the CF node
in between.

v2: (by Kenneth Graunke)
- Simplify split_block_after_instr()'s implementation by using
  split_block_end() rather than duplicating code.
- Fix a bug in nir_cf_node_insert_after_instr() where inserting a
  non-block after the last instruction would cause update_if_uses()
  to be called twice, making us try to add the same SSA def to the
  if_uses list twice, corrupting the list.
- Comment changes.

Cc: Jason Ekstrand ja...@jlekstrand.net
Signed-off-by: Kenneth Graunke kenn...@whitecape.org
---
 src/glsl/nir/nir.c | 62 ++
 src/glsl/nir/nir.h |  3 +++
 2 files changed, 65 insertions(+)

Nothing uses this yet, but I've tested it with my SIMD8 geometry shader patches,
which use this to replace emit_vertex intrinsics with if blocks (for safety
checks that make sure the program hasn't emitted too many vertices).  It seems
to work just fine, and seems like a really useful piece of infrastructure to
have, so I'm submitting it now.

Jason, would you mind reviewing it, since Connor and I both hacked on it?
It would be nice to have a non-author take a look at it :)

diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
index 78ff886..0c53bab 100644
--- a/src/glsl/nir/nir.c
+++ b/src/glsl/nir/nir.c
@@ -843,6 +843,29 @@ split_block_end(nir_block *block)
 }
 
 /**
+ * Creates a new block, and moves all the instructions after the given
+ * instruction to the new block.
+ */
+static nir_block *
+split_block_after_instr(nir_instr *instr)
+{
+   /* We don't have to do anything special for handling jump instructions,
+* as this will move the successors associated with the jump to the new
+* block already.
+*/
+   nir_block *new_block = split_block_end(instr-block);
+
+   nir_instr *cur_instr;
+   while ((cur_instr = nir_instr_next(instr)) != NULL) {
+  exec_node_remove(cur_instr-node);
+  exec_list_push_tail(new_block-instr_list, cur_instr-node);
+  cur_instr-block = new_block;
+   }
+
+   return new_block;
+}
+
+/**
  * Inserts a non-basic block between two basic blocks and links them together.
  */
 
@@ -1124,6 +1147,45 @@ nir_cf_node_insert_after(nir_cf_node *node, nir_cf_node 
*after)
 }
 
 void
+nir_cf_node_insert_after_instr(nir_instr *instr, nir_cf_node *after)
+{
+   /* If the instruction is the last in its block, then this is equivalent
+* to inserting the CF node after this block.  Just call that, to avoid
+* attempting to split blocks unnecessarily.
+*/
+   if (nir_instr_is_last(instr)) {
+  nir_cf_node_insert_after(instr-block-cf_node, after);
+  return;
+   }
+
+   update_if_uses(after);
+
+   if (after-type == nir_cf_node_block) {
+  /* We're attempting to insert a block after an instruction; instead,
+   * just move all of the instructions into the existing block.  Actually
+   * removing and adding them would involve removing and adding uses/defs,
+   * which we don't need to do, so just take them off the list directly.
+   */
+  nir_block *after_block = nir_cf_node_as_block(after);
+  nir_foreach_instr_safe_reverse(after_block, new_instr) {
+ exec_node_remove(new_instr-node);
+ new_instr-block = instr-block;
+ exec_node_insert_after(instr-node, new_instr-node);
+  }
+   } else {
+  /* We're inserting a loop or if after an instruction.  Split up the
+   * basic block and insert it between those two blocks.
+   */
+  nir_block *before_block = instr-block;
+  nir_block *after_block = split_block_after_instr(instr);
+  insert_non_block(before_block, after, after_block);
+   }
+
+   nir_function_impl *impl = nir_cf_node_get_function(instr-block-cf_node);
+   nir_metadata_preserve(impl, nir_metadata_none);
+}
+
+void
 nir_cf_node_insert_before(nir_cf_node *node, nir_cf_node *before)
 {
update_if_uses(before);
diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index 62cdbd4..6efbc18 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -1506,6 +1506,9 @@ void nir_cf_node_insert_after(nir_cf_node *node, 
nir_cf_node *after);
 /** puts a control flow node immediately before another control flow node */
 void nir_cf_node_insert_before(nir_cf_node *node, nir_cf_node *before);
 
+/** puts a control flow node immediately after a given instruction */
+void nir_cf_node_insert_after_instr(nir_instr *instr, nir_cf_node *after);
+
 /** puts a control flow node at the beginning of a list from an if, loop, or 
function */
 void nir_cf_node_insert_begin(struct exec_list *list, nir_cf_node *node);
 
-- 
2.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] nir: add nir_instr_is_first() and nir_instr_is_last() helpers

2015-07-16 Thread Kenneth Graunke
From: Connor Abbott connor.w.abb...@intel.com

Reviewed-by: Kenneth Graunke kenn...@whitecape.org
---
 src/glsl/nir/nir.h | 12 
 1 file changed, 12 insertions(+)

diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
index e9a506c..0db1fc3 100644
--- a/src/glsl/nir/nir.h
+++ b/src/glsl/nir/nir.h
@@ -443,6 +443,18 @@ nir_instr_prev(nir_instr *instr)
   return exec_node_data(nir_instr, prev, node);
 }
 
+static inline bool
+nir_instr_is_first(nir_instr *instr)
+{
+   return exec_node_is_head_sentinel(exec_node_get_prev(instr-node));
+}
+
+static inline bool
+nir_instr_is_last(nir_instr *instr)
+{
+   return exec_node_is_tail_sentinel(exec_node_get_next(instr-node));
+}
+
 typedef struct {
/** for debugging only, can be NULL */
const char* name;
-- 
2.4.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] nir: add nir_instr_is_first() and nir_instr_is_last() helpers

2015-07-16 Thread Jason Ekstrand
R-B me too
On Jul 16, 2015 5:19 PM, Kenneth Graunke kenn...@whitecape.org wrote:

 From: Connor Abbott connor.w.abb...@intel.com

 Reviewed-by: Kenneth Graunke kenn...@whitecape.org
 ---
  src/glsl/nir/nir.h | 12 
  1 file changed, 12 insertions(+)

 diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
 index e9a506c..0db1fc3 100644
 --- a/src/glsl/nir/nir.h
 +++ b/src/glsl/nir/nir.h
 @@ -443,6 +443,18 @@ nir_instr_prev(nir_instr *instr)
return exec_node_data(nir_instr, prev, node);
  }

 +static inline bool
 +nir_instr_is_first(nir_instr *instr)
 +{
 +   return exec_node_is_head_sentinel(exec_node_get_prev(instr-node));
 +}
 +
 +static inline bool
 +nir_instr_is_last(nir_instr *instr)
 +{
 +   return exec_node_is_tail_sentinel(exec_node_get_next(instr-node));
 +}
 +
  typedef struct {
 /** for debugging only, can be NULL */
 const char* name;
 --
 2.4.5

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/3] nir: add nir_foreach_instr_safe_reverse()

2015-07-16 Thread Jason Ekstrand
R-B me too
On Jul 16, 2015 5:19 PM, Kenneth Graunke kenn...@whitecape.org wrote:

 From: Connor Abbott connor.w.abb...@intel.com

 Reviewed-by: Kenneth Graunke kenn...@whitecape.org
 ---
  src/glsl/nir/nir.h | 2 ++
  1 file changed, 2 insertions(+)

 diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
 index 0db1fc3..62cdbd4 100644
 --- a/src/glsl/nir/nir.h
 +++ b/src/glsl/nir/nir.h
 @@ -1233,6 +1233,8 @@ nir_block_last_instr(nir_block *block)
 foreach_list_typed_reverse(nir_instr, instr, node,
 (block)-instr_list)
  #define nir_foreach_instr_safe(block, instr) \
 foreach_list_typed_safe(nir_instr, instr, node, (block)-instr_list)
 +#define nir_foreach_instr_safe_reverse(block, instr) \
 +   foreach_list_typed_safe_reverse(nir_instr, instr, node,
 (block)-instr_list)

  typedef struct nir_if {
 nir_cf_node cf_node;
 --
 2.4.5

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/3] nir: add the ability insert a CF node after an instruction

2015-07-16 Thread Jason Ekstrand
On Jul 16, 2015 5:19 PM, Kenneth Graunke kenn...@whitecape.org wrote:

 From: Connor Abbott connor.w.abb...@intel.com

 This will split the block containing the instruction and put the CF node
 in between.

 v2: (by Kenneth Graunke)
 - Simplify split_block_after_instr()'s implementation by using
   split_block_end() rather than duplicating code.
 - Fix a bug in nir_cf_node_insert_after_instr() where inserting a
   non-block after the last instruction would cause update_if_uses()
   to be called twice, making us try to add the same SSA def to the
   if_uses list twice, corrupting the list.
 - Comment changes.

 Cc: Jason Ekstrand ja...@jlekstrand.net
 Signed-off-by: Kenneth Graunke kenn...@whitecape.org
 ---
  src/glsl/nir/nir.c | 62
++
  src/glsl/nir/nir.h |  3 +++
  2 files changed, 65 insertions(+)

 Nothing uses this yet, but I've tested it with my SIMD8 geometry shader
patches,
 which use this to replace emit_vertex intrinsics with if blocks (for
safety
 checks that make sure the program hasn't emitted too many vertices).  It
seems
 to work just fine, and seems like a really useful piece of infrastructure
to
 have, so I'm submitting it now.

 Jason, would you mind reviewing it, since Connor and I both hacked on it?
 It would be nice to have a non-author take a look at it :)

 diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c
 index 78ff886..0c53bab 100644
 --- a/src/glsl/nir/nir.c
 +++ b/src/glsl/nir/nir.c
 @@ -843,6 +843,29 @@ split_block_end(nir_block *block)
  }

  /**
 + * Creates a new block, and moves all the instructions after the given
 + * instruction to the new block.
 + */
 +static nir_block *
 +split_block_after_instr(nir_instr *instr)
 +{
 +   /* We don't have to do anything special for handling jump
instructions,
 +* as this will move the successors associated with the jump to the
new
 +* block already.
 +*/
 +   nir_block *new_block = split_block_end(instr-block);
 +
 +   nir_instr *cur_instr;
 +   while ((cur_instr = nir_instr_next(instr)) != NULL) {

Please use a for loop or pull the iteration expression out. This is really
obtuse.

Otherwise, at first glance this looks pretty good.  I'd like to take a
longer look before I call it reviewed though.

 +  exec_node_remove(cur_instr-node);
 +  exec_list_push_tail(new_block-instr_list, cur_instr-node);

At some point we should get a better mechanism for list splicing. Don't
bother with it now because I'm hoping to move NIR to the list in until
before too long.  We can make the change then.

 +  cur_instr-block = new_block;
 +   }
 +
 +   return new_block;
 +}
 +
 +/**
   * Inserts a non-basic block between two basic blocks and links them
together.
   */

 @@ -1124,6 +1147,45 @@ nir_cf_node_insert_after(nir_cf_node *node,
nir_cf_node *after)
  }

  void
 +nir_cf_node_insert_after_instr(nir_instr *instr, nir_cf_node *after)
 +{
 +   /* If the instruction is the last in its block, then this is
equivalent
 +* to inserting the CF node after this block.  Just call that, to
avoid
 +* attempting to split blocks unnecessarily.
 +*/
 +   if (nir_instr_is_last(instr)) {
 +  nir_cf_node_insert_after(instr-block-cf_node, after);
 +  return;
 +   }
 +
 +   update_if_uses(after);
 +
 +   if (after-type == nir_cf_node_block) {
 +  /* We're attempting to insert a block after an instruction;
instead,
 +   * just move all of the instructions into the existing block.
Actually
 +   * removing and adding them would involve removing and adding
uses/defs,
 +   * which we don't need to do, so just take them off the list
directly.
 +   */
 +  nir_block *after_block = nir_cf_node_as_block(after);
 +  nir_foreach_instr_safe_reverse(after_block, new_instr) {
 + exec_node_remove(new_instr-node);
 + new_instr-block = instr-block;
 + exec_node_insert_after(instr-node, new_instr-node);
 +  }
 +   } else {
 +  /* We're inserting a loop or if after an instruction.  Split up the
 +   * basic block and insert it between those two blocks.
 +   */
 +  nir_block *before_block = instr-block;
 +  nir_block *after_block = split_block_after_instr(instr);
 +  insert_non_block(before_block, after, after_block);
 +   }
 +
 +   nir_function_impl *impl =
nir_cf_node_get_function(instr-block-cf_node);
 +   nir_metadata_preserve(impl, nir_metadata_none);
 +}
 +
 +void
  nir_cf_node_insert_before(nir_cf_node *node, nir_cf_node *before)
  {
 update_if_uses(before);
 diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
 index 62cdbd4..6efbc18 100644
 --- a/src/glsl/nir/nir.h
 +++ b/src/glsl/nir/nir.h
 @@ -1506,6 +1506,9 @@ void nir_cf_node_insert_after(nir_cf_node *node,
nir_cf_node *after);
  /** puts a control flow node immediately before another control flow
node */
  void nir_cf_node_insert_before(nir_cf_node *node, nir_cf_node *before);

 +/** puts a control flow node immediately after a given instruction */

Re: [Mesa-dev] [PATCH] i965/nir/fs: removed unneeded support for global variables

2015-07-16 Thread Kenneth Graunke
On Friday, June 26, 2015 01:47:48 PM Alejandro Piñeiro wrote:
 As functions are inlined, and nir_lower_global_vars_to_local gets
 run, all global variables are lowered to local variables.
 ---
 
 Jason Enkstrand already confirmed that global support is not needed
 on the bug open for the nir/vec4 support:
 https://bugs.freedesktop.org/show_bug.cgi?id=89580#c9
 
 So this patch just apply that answer to the fs path. 
 
 Full piglit run. No regressions.
 
  src/mesa/drivers/dri/i965/brw_fs.h   |  1 -
  src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 14 ++
  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |  1 -
  3 files changed, 2 insertions(+), 14 deletions(-)
 
 diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
 b/src/mesa/drivers/dri/i965/brw_fs.h
 index 243baf6..c49d0f8 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs.h
 +++ b/src/mesa/drivers/dri/i965/brw_fs.h
 @@ -345,7 +345,6 @@ public:
 unsigned max_grf;
  
 fs_reg *nir_locals;
 -   fs_reg *nir_globals;
 fs_reg nir_inputs;
 fs_reg nir_outputs;
 fs_reg *nir_system_values;
 diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
 b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
 index 59081ea..a648a5a 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
 @@ -55,14 +55,6 @@ fs_visitor::emit_nir_code()
  
 nir_emit_system_values(nir);
  
 -   nir_globals = ralloc_array(mem_ctx, fs_reg, nir-reg_alloc);
 -   foreach_list_typed(nir_register, reg, node, nir-registers) {
 -  unsigned array_elems =
 - reg-num_array_elems == 0 ? 1 : reg-num_array_elems;
 -  unsigned size = array_elems * reg-num_components;
 -  nir_globals[reg-index] = bld.vgrf(BRW_REGISTER_TYPE_F, size);
 -   }
 -
 /* get the main function and emit it */
 nir_foreach_overload(nir, overload) {
assert(strcmp(overload-function-name, main) == 0);
 @@ -1151,10 +1143,8 @@ fs_reg_for_nir_reg(fs_visitor *v, nir_register 
 *nir_reg,
 unsigned base_offset, nir_src *indirect)
  {
 fs_reg reg;
 -   if (nir_reg-is_global)
 -  reg = v-nir_globals[nir_reg-index];
 -   else
 -  reg = v-nir_locals[nir_reg-index];
 +

Perhaps include a sanity check:

   assert(!nir_reg-is_global);

Either way,
Reviewed-by: Kenneth Graunke kenn...@whitecape.org

 +   reg = v-nir_locals[nir_reg-index];
  
 reg = offset(reg, base_offset * nir_reg-num_components);
 if (indirect) {
 diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
 b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 index 9a4bad6..90d5706 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
 @@ -2012,7 +2012,6 @@ fs_visitor::fs_visitor(const struct brw_compiler 
 *compiler, void *log_data,
 this-no16_msg = NULL;
  
 this-nir_locals = NULL;
 -   this-nir_globals = NULL;
  
 memset(this-payload, 0, sizeof(this-payload));
 memset(this-outputs, 0, sizeof(this-outputs));
 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] nvc0: fix geometry program revalidation of clipping params

2015-07-16 Thread samuel.pitoiset

Seems reasonable.

Please, let me know the result of the full piglit run.
If everything is okay, this patch is :

Reviewed-by: Samuel Pitoiset samuel.pitoi...@gmail.com

On 13/07/2015 20:08, Ilia Mirkin wrote:

This was, btw, introduced in commit 3a8ae6ac243b (nvc0: adapt to new
clip state). Back then there was no real geometry support yet.

On Mon, Jul 13, 2015 at 2:05 PM, Ilia Mirkin imir...@alum.mit.edu wrote:

Any one which, after using a geometry shader, enables an extra clip
distance. i.e. none.

On Mon, Jul 13, 2015 at 4:16 AM, Samuel Pitoiset
samuel.pitoi...@gmail.com wrote:

What piglit test does this fix?

On Sat, Jul 11, 2015 at 7:13 PM, Ilia Mirkin imir...@alum.mit.edu wrote:

Signed-off-by: Ilia Mirkin imir...@alum.mit.edu
Cc: mesa-sta...@lists.freedesktop.org
---

Even though in practice a geometry program will never be using UCP's,
we still were revalidating (aka recompiling) the program when more
clip planes became enabled (which also are used for regular clip
distances).

This seems like it should have led to massive fail, but I guess you
don't change the number of clip planes when using geometry shaders.
But I'm going to put this through a full piglit run just in case
there's something I'm missing.

  src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
index 785e52e..11f2b10 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -339,7 +339,7 @@ nvc0_check_program_ucps(struct nvc0_context *nvc0,
nvc0_vertprog_validate(nvc0);
 else
 if (likely(vp == nvc0-gmtyprog))
-  nvc0_vertprog_validate(nvc0);
+  nvc0_gmtyprog_validate(nvc0);
 else
nvc0_tevlprog_validate(nvc0);
  }
--
2.3.6

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev




--
Best regards,
Samuel Pitoiset.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965/cs: Return 1 for regs_read on CS_OPCODE_CS_TERMINATE

2015-07-16 Thread Jordan Justen
This prevents an assertion failure in brw_fs_live_variables.cpp,
fs_live_variables::setup_one_read: Assertion `var  num_vars' failed.

Signed-off-by: Jordan Justen jordan.l.jus...@intel.com
Cc: Jason Ekstrand ja...@jlekstrand.net
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index ff0675d..2e3eb05 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -698,6 +698,9 @@ fs_inst::regs_read(int arg) const
  return 1;
   break;
 
+   case CS_OPCODE_CS_TERMINATE:
+  return 1;
+
default:
   if (is_tex()  arg == 0  src[0].file == GRF)
  return mlen;
-- 
2.1.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v5 2/6] i965: Enable resource streamer for the batchbuffer

2015-07-16 Thread Kenneth Graunke
On Friday, July 03, 2015 10:00:30 AM Abdiel Janulgue wrote:
 Check first if the hardware and kernel supports resource streamer. If this
 is allowed, tell the kernel to enable the resource streamer enable bit on
 MI_BATCHBUFFER_START by specifying I915_EXEC_RESOURCE_STREAMER
 execbuffer flags.
 
 v2: - Use new I915_PARAM_HAS_RESOURCE_STREAMER ioctl to check if kernel
   supports RS (Ken).
 - Add brw_device_info::has_resource_streamer and toggle it for
   Haswell, Broadwell, Cherryview, Skylake, and Broxton (Ken).
 v3: - Update I915_PARAM_HAS_RESOURCE_STREAMER to match updated kernel.
 v4: - Always inspect the getparam.value (Chris Wilson).
 v5: - Fold redundant devinfo-has_resource_streamer check in context create
   into init screen.
 
 Cc: kenn...@whitecape.org
 Cc: ch...@chris-wilson.co.uk
 Signed-off-by: Abdiel Janulgue abdiel.janul...@linux.intel.com

This patch is:
Reviewed-by: Kenneth Graunke kenn...@whitecape.org


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev